From 1ad6bfd8aaa4f11e7e2307ac5a48a3de16552139 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 5 Mar 2020 18:53:30 -0800 Subject: [PATCH 1/2] [SYCL] Move LowerWGScope pass to LLVM project LowerWGScope pass is an llvm pass that performs SYCL specific transformations in LLVM IR right after frontend. LLVM passes are supposed to be in llvm project and not in clang project. Signed-off-by: Artur Gainullin --- clang/lib/CodeGen/BackendUtil.cpp | 1 - clang/lib/CodeGen/CMakeLists.txt | 4 +- clang/lib/CodeGen/CodeGenAction.cpp | 4 +- clang/lib/CodeGen/SYCLLowerIR/CMakeLists.txt | 18 -------- clang/test/CodeGenSYCL/hier_par.cpp | 43 ------------------- llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 2 + .../include/llvm}/SYCLLowerIR/LowerWGScope.h | 1 - llvm/lib/CMakeLists.txt | 1 + llvm/lib/LLVMBuild.txt | 1 + llvm/lib/SYCLLowerIR/CMakeLists.txt | 9 ++++ llvm/lib/SYCLLowerIR/LLVMBuild.txt | 20 +++++++++ .../lib}/SYCLLowerIR/LowerWGScope.cpp | 6 +-- .../lib}/SYCLLowerIR/README.txt | 0 llvm/tools/bugpoint/CMakeLists.txt | 1 + llvm/tools/opt/CMakeLists.txt | 1 + llvm/tools/opt/opt.cpp | 1 + sycl/test/hier_par/hier_par_wgscope.cpp | 2 +- 18 files changed, 43 insertions(+), 73 deletions(-) delete mode 100644 clang/lib/CodeGen/SYCLLowerIR/CMakeLists.txt delete mode 100644 clang/test/CodeGenSYCL/hier_par.cpp rename {clang/lib/CodeGen => llvm/include/llvm}/SYCLLowerIR/LowerWGScope.h (94%) create mode 100644 llvm/lib/SYCLLowerIR/CMakeLists.txt create mode 100644 llvm/lib/SYCLLowerIR/LLVMBuild.txt rename {clang/lib/CodeGen => llvm/lib}/SYCLLowerIR/LowerWGScope.cpp (99%) rename {clang/lib/CodeGen => llvm/lib}/SYCLLowerIR/README.txt (100%) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 0401aa26dcbc6..707bea4092d05 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/BackendUtil.h" -#include "SYCLLowerIR/LowerWGScope.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt index a06fef5195bc3..5af0ac7f57d40 100644 --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -1,5 +1,3 @@ -add_subdirectory(SYCLLowerIR) - set(LLVM_LINK_COMPONENTS Analysis BitReader @@ -23,6 +21,7 @@ set(LLVM_LINK_COMPONENTS Remarks ScalarOpts Support + SYCLLowerIR Target TransformUtils ) @@ -112,5 +111,4 @@ add_clang_library(clangCodeGen clangFrontend clangLex clangSerialization - clangSYCLLowerIR ) diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index de9a9385f2f9a..4e1fe1308119f 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -10,7 +10,6 @@ #include "CodeGenModule.h" #include "CoverageMappingGen.h" #include "MacroPPCallbacks.h" -#include "SYCLLowerIR/LowerWGScope.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" @@ -39,6 +38,7 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Pass.h" +#include "llvm/SYCLLowerIR/LowerWGScope.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TimeProfiler.h" @@ -335,7 +335,7 @@ namespace clang { if (LangOpts.SYCLIsDevice) { PrettyStackTraceString CrashInfo("Pre-linking SYCL passes"); legacy::PassManager PreLinkingSyclPasses; - PreLinkingSyclPasses.add(createSYCLLowerWGScopePass()); + PreLinkingSyclPasses.add(llvm::createSYCLLowerWGScopePass()); PreLinkingSyclPasses.run(*getModule()); } diff --git a/clang/lib/CodeGen/SYCLLowerIR/CMakeLists.txt b/clang/lib/CodeGen/SYCLLowerIR/CMakeLists.txt deleted file mode 100644 index f9d744d74e676..0000000000000 --- a/clang/lib/CodeGen/SYCLLowerIR/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Core - Support - ) - -if(NOT CLANG_BUILT_STANDALONE) - set(tablegen_deps intrinsics_gen) -endif() - -add_clang_library(clangSYCLLowerIR - LowerWGScope.cpp - - DEPENDS - ${tablegen_deps} - - LINK_LIBS - clangBasic - ) diff --git a/clang/test/CodeGenSYCL/hier_par.cpp b/clang/test/CodeGenSYCL/hier_par.cpp deleted file mode 100644 index 6967600c2761b..0000000000000 --- a/clang/test/CodeGenSYCL/hier_par.cpp +++ /dev/null @@ -1,43 +0,0 @@ -//==- hier_par.cpp --- hierarchical parallelism regression tests -----------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// RUN: %clangxx -O2 -I %S/Inputs -fsycl -fsycl-device-only -c -Xclang -emit-llvm -o %t.ll %s -// RUN: cat %t.ll | FileCheck %s - -// This test checks for bug fix regressions related to hierarchical parallelism. -// - bug1: private var's (cl::sycl::group argument) address shared locally -// the test checks that a "shadow" local variable is generated for the group -// argument -// -// This is compile-only test for now. -// -// XFAIL:* -#include "sycl.hpp" - -using namespace cl::sycl; - -void foo() { - int *ptr = nullptr; - - queue myQueue; - buffer buf(ptr, range<1>(1)); - - myQueue.submit([&](handler &cgh) { - auto dev_ptr = buf.get_access(cgh); - - cgh.parallel_for_work_group( - range<1>(1), range<1>(1), [=](group<1> g) { -// CHECK: @[[SHADOW:[a-zA-Z0-9]+]] = internal unnamed_addr addrspace(3) global %[[GROUP_CLASS:"[^"]+"]] undef, align [[ALIGN:[0-9]+]] -// CHECK: define {{.*}} spir_func void @{{"[^"]+"}}({{[^,]+}}, %[[GROUP_CLASS]]* byval(%[[GROUP_CLASS]]) align {{[0-9]+}} %[[GROUP_OBJ:[A-Za-z_0-9]+]]) {{.*}}!work_group_scope{{.*}} { -// CHECK-NOT: {{^[ \t]*define}} -// CHECK: %[[TMP:[A-Za-z_0-9]+]] = bitcast %[[GROUP_CLASS]] addrspace(3)* @[[SHADOW]] to i8 addrspace(3)* -// CHECK: %[[OBJ:[A-Za-z_0-9]+]] = bitcast %[[GROUP_CLASS]]* %[[GROUP_OBJ]] to i8* -// CHECK: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align [[ALIGN]] %[[TMP]], {{[^,]+}} %[[OBJ]], {{[^)]+}}) - }); - }); -} diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 0499422e1b4b7..6de57384fa3e4 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -404,6 +404,7 @@ void initializeStripNonDebugSymbolsPass(PassRegistry&); void initializeStripNonLineTableDebugInfoPass(PassRegistry&); void initializeStripSymbolsPass(PassRegistry&); void initializeStructurizeCFGPass(PassRegistry&); +void initializeSYCLLowerWGScopeLegacyPassPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry&); void initializeTailDuplicatePass(PassRegistry&); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 51d89c4b16019..616dd7825f3bb 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -37,6 +37,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/SYCLLowerIR/LowerWGScope.h" #include "llvm/Support/Valgrind.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" @@ -199,6 +200,7 @@ namespace { (void) llvm::createMergeFunctionsPass(); (void) llvm::createMergeICmpsLegacyPass(); (void) llvm::createExpandMemCmpPass(); + (void)llvm::createSYCLLowerWGScopePass(); std::string buf; llvm::raw_string_ostream os(buf); (void) llvm::createPrintModulePass(os); diff --git a/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.h b/llvm/include/llvm/SYCLLowerIR/LowerWGScope.h similarity index 94% rename from clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.h rename to llvm/include/llvm/SYCLLowerIR/LowerWGScope.h index bd705c0d88af6..c3b537ebb923c 100644 --- a/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.h +++ b/llvm/include/llvm/SYCLLowerIR/LowerWGScope.h @@ -25,7 +25,6 @@ class SYCLLowerWGScopePass : public PassInfoMixin { }; FunctionPass *createSYCLLowerWGScopePass(); -void initializeSYCLLowerWGScopeLegacyPassPass(PassRegistry &); } // namespace llvm diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt index 8f8d417124c87..4865f595429f7 100644 --- a/llvm/lib/CMakeLists.txt +++ b/llvm/lib/CMakeLists.txt @@ -27,6 +27,7 @@ add_subdirectory(AsmParser) add_subdirectory(LineEditor) add_subdirectory(ProfileData) add_subdirectory(Passes) +add_subdirectory(SYCLLowerIR) add_subdirectory(TextAPI) add_subdirectory(ToolDrivers) add_subdirectory(XRay) diff --git a/llvm/lib/LLVMBuild.txt b/llvm/lib/LLVMBuild.txt index 1ae59791cd6c1..961ad89e40c56 100644 --- a/llvm/lib/LLVMBuild.txt +++ b/llvm/lib/LLVMBuild.txt @@ -42,6 +42,7 @@ subdirectories = Passes ProfileData Support + SYCLLowerIR TableGen TextAPI Target diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt new file mode 100644 index 0000000000000..7a327d7657b69 --- /dev/null +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -0,0 +1,9 @@ +add_llvm_component_library(LLVMSYCLLowerIR + LowerWGScope.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR + + DEPENDS + intrinsics_gen + ) diff --git a/llvm/lib/SYCLLowerIR/LLVMBuild.txt b/llvm/lib/SYCLLowerIR/LLVMBuild.txt new file mode 100644 index 0000000000000..19fd5a3f5d667 --- /dev/null +++ b/llvm/lib/SYCLLowerIR/LLVMBuild.txt @@ -0,0 +1,20 @@ +;===- ./lib/SYCLLowerIR/LLVMBuild.txt -----------------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Group +name = SYCLLowerIR +parent = Libraries diff --git a/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp similarity index 99% rename from clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp rename to llvm/lib/SYCLLowerIR/LowerWGScope.cpp index 4e13eb2df9ca5..f8bb128a435e8 100644 --- a/clang/lib/CodeGen/SYCLLowerIR/LowerWGScope.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp @@ -73,10 +73,7 @@ // et. al. //===----------------------------------------------------------------------===// -#include "LowerWGScope.h" - -#include "clang/Basic/AddressSpaces.h" - +#include "llvm/SYCLLowerIR/LowerWGScope.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -85,6 +82,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" diff --git a/clang/lib/CodeGen/SYCLLowerIR/README.txt b/llvm/lib/SYCLLowerIR/README.txt similarity index 100% rename from clang/lib/CodeGen/SYCLLowerIR/README.txt rename to llvm/lib/SYCLLowerIR/README.txt diff --git a/llvm/tools/bugpoint/CMakeLists.txt b/llvm/tools/bugpoint/CMakeLists.txt index 0b5998e181ebb..421889cfedb7f 100644 --- a/llvm/tools/bugpoint/CMakeLists.txt +++ b/llvm/tools/bugpoint/CMakeLists.txt @@ -16,6 +16,7 @@ set(LLVM_LINK_COMPONENTS ObjCARCOpts ScalarOpts Support + SYCLLowerIR Target TransformUtils Vectorize diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt index 79613c836c533..ad9e20bd0b439 100644 --- a/llvm/tools/opt/CMakeLists.txt +++ b/llvm/tools/opt/CMakeLists.txt @@ -18,6 +18,7 @@ set(LLVM_LINK_COMPONENTS Remarks ScalarOpts Support + SYCLLowerIR Target TransformUtils Vectorize diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index fe2500ad4ac32..46bdfd6fe0cc5 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -563,6 +563,7 @@ int main(int argc, char **argv) { initializeWriteBitcodePassPass(Registry); initializeHardwareLoopsPass(Registry); initializeTypePromotionPass(Registry); + initializeSYCLLowerWGScopeLegacyPassPass(Registry); #ifdef BUILD_EXAMPLES initializeExampleIRTransforms(Registry); diff --git a/sycl/test/hier_par/hier_par_wgscope.cpp b/sycl/test/hier_par/hier_par_wgscope.cpp index ae346a1789547..d624e6ba82d5f 100644 --- a/sycl/test/hier_par/hier_par_wgscope.cpp +++ b/sycl/test/hier_par/hier_par_wgscope.cpp @@ -12,7 +12,7 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// RUN: %clangxx -O0 -fsycl %s -o %t.out +// RUN: %clangxx -O0 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out From 094338dda4a3ebfa53e9b331e5b0217e31a43c52 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 9 Mar 2020 11:22:25 -0700 Subject: [PATCH 2/2] [SYCL] Add lit tests for SYCL specific CodeGen and LowerWGScope pass Signed-off-by: Artur Gainullin --- llvm/test/SYCLLowerIR/byval_arg.ll | 27 +++++++++ llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll | 79 ++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 llvm/test/SYCLLowerIR/byval_arg.ll create mode 100644 llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll diff --git a/llvm/test/SYCLLowerIR/byval_arg.ll b/llvm/test/SYCLLowerIR/byval_arg.ll new file mode 100644 index 0000000000000..5d65bdd982f8d --- /dev/null +++ b/llvm/test/SYCLLowerIR/byval_arg.ll @@ -0,0 +1,27 @@ +; RUN: opt < %s -LowerWGScope -S | FileCheck %s + +; Check that argument of the function marked with !work_group_scope +; attribute passed as byval is shared by leader work item via local +; memory to all work items + +%struct.baz = type { i64 } + +; CHECK: @[[SHADOW:[a-zA-Z0-9]+]] = internal unnamed_addr addrspace(3) global %struct.baz undef + +define internal spir_func void @wibble(%struct.baz* byval(%struct.baz) %arg1) !work_group_scope !0 { +; CHECK-LABEL: @wibble( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[CMPZ]], label [[LEADER:%.*]], label [[MERGE:%.*]] +; CHECK: leader: +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.baz* [[ARG1:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 8 bitcast (%struct.baz addrspace(3)* @[[SHADOW]] to i8 addrspace(3)*), i8* [[TMP2]], i64 8, i1 false) +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: call void @__spirv_ControlBarrier(i32 2, i32 2, i32 272) +; CHECK-NEXT: ret void +; + ret void +} + +!0 = !{} diff --git a/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll b/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll new file mode 100644 index 0000000000000..3eac5462bd296 --- /dev/null +++ b/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -LowerWGScope -S | FileCheck %s + +; Check that allocas which correspond to PFWI lambda object and a local copy of the PFWG lambda object +; are properly handled by LowerWGScope pass. Check that WG-shared local "shadow" variables are created +; and before each PFWI invocation leader WI stores its private copy of the variable into the shadow, +; then all WIs load the shadow value into their private copies ("materialize" the private copy). + +%struct.bar = type { i8 } +%struct.zot = type { %struct.widget, %struct.widget, %struct.widget, %struct.foo } +%struct.widget = type { %struct.barney } +%struct.barney = type { [3 x i64] } +%struct.foo = type { %struct.barney } +%struct.foo.0 = type { i8 } + +; CHECK: @[[PFWG_SHADOW:.*]] = internal unnamed_addr addrspace(3) global %struct.bar addrspace(4)* +; CHECK: @[[PFWI_SHADOW:.*]] = internal unnamed_addr addrspace(3) global %struct.foo.0 +; CHECK: @[[GROUP_SHADOW:.*]] = internal unnamed_addr addrspace(3) global %struct.zot + +define internal spir_func void @wibble(%struct.bar addrspace(4)* %arg, %struct.zot* byval(%struct.zot) align 8 %arg1) align 2 !work_group_scope !0 { +; CHECK-LABEL: @wibble( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMPZ3]], label [[LEADER:%.*]], label [[MERGE:%.*]] +; CHECK: leader: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.zot* [[ARG1:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 16 bitcast (%struct.zot addrspace(3)* @[[GROUP_SHADOW]] to i8 addrspace(3)*), i8* align 8 [[TMP1]], i64 96, i1 false) +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: call void @__spirv_ControlBarrier(i32 2, i32 2, i32 272) +; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BAR:%.*]] addrspace(4)*, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_FOO_0:%.*]], align 1 +; CHECK-NEXT: [[ID:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[ID]], 0 +; CHECK-NEXT: br i1 [[CMPZ]], label [[WG_LEADER:%.*]], label [[WG_CF:%.*]] +; CHECK: wg_leader: +; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[ARG:%.*]], [[STRUCT_BAR]] addrspace(4)** [[TMP]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)** [[TMP]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast [[STRUCT_ZOT:%.*]] addrspace(3)* @[[GROUP_SHADOW]] to [[STRUCT_ZOT]] addrspace(4)* +; CHECK-NEXT: store [[STRUCT_ZOT]] addrspace(4)* [[TMP4]], [[STRUCT_ZOT]] addrspace(4)* addrspace(3)* @wibbleWG_tmp4 +; CHECK-NEXT: br label [[WG_CF]] +; CHECK: wg_cf: +; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: [[CMPZ2:%.*]] = icmp eq i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[CMPZ2]], label [[TESTMAT:%.*]], label [[LEADERMAT:%.*]] +; CHECK: TestMat: +; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.foo.0* [[TMP2]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 8 getelementptr inbounds (%struct.foo.0, [[STRUCT_FOO_0]] addrspace(3)* @[[PFWI_SHADOW]], i32 0, i32 0), i8* align 1 [[TMP4]], i64 1, i1 false) +; CHECK-NEXT: [[MAT_LD:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)** [[TMP]] +; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[MAT_LD]], [[STRUCT_BAR]] addrspace(4)* addrspace(3)* @[[PFWG_SHADOW]] +; CHECK-NEXT: br label [[LEADERMAT]] +; CHECK: LeaderMat: +; CHECK-NEXT: call void @__spirv_ControlBarrier(i32 2, i32 2, i32 272) +; CHECK-NEXT: [[MAT_LD1:%.*]] = load [[STRUCT_BAR]] addrspace(4)*, [[STRUCT_BAR]] addrspace(4)* addrspace(3)* @[[PFWG_SHADOW]] +; CHECK-NEXT: store [[STRUCT_BAR]] addrspace(4)* [[MAT_LD1]], [[STRUCT_BAR]] addrspace(4)** [[TMP]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.foo.0* [[TMP2]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 1 [[TMP5]], i8 addrspace(3)* align 8 getelementptr inbounds (%struct.foo.0, [[STRUCT_FOO_0]] addrspace(3)* @[[PFWI_SHADOW]], i32 0, i32 0), i64 1, i1 false) +; CHECK-NEXT: call void @__spirv_ControlBarrier(i32 2, i32 2, i32 272) +; CHECK-NEXT: [[WG_VAL_TMP4:%.*]] = load [[STRUCT_ZOT]] addrspace(4)*, [[STRUCT_ZOT]] addrspace(4)* addrspace(3)* @wibbleWG_tmp4 +; CHECK-NEXT: call spir_func void @bar(%struct.zot addrspace(4)* [[WG_VAL_TMP4]], %struct.foo.0* byval(%struct.foo.0) align 1 [[TMP2]]) +; CHECK-NEXT: ret void +; +bb: + %tmp = alloca %struct.bar addrspace(4)*, align 8 + %tmp2 = alloca %struct.foo.0, align 1 + store %struct.bar addrspace(4)* %arg, %struct.bar addrspace(4)** %tmp, align 8 + %tmp3 = load %struct.bar addrspace(4)*, %struct.bar addrspace(4)** %tmp, align 8 + %tmp4 = addrspacecast %struct.zot* %arg1 to %struct.zot addrspace(4)* + call spir_func void @bar(%struct.zot addrspace(4)* %tmp4, %struct.foo.0* byval(%struct.foo.0) align 1 %tmp2) + ret void +} + +define internal spir_func void @bar(%struct.zot addrspace(4)* %arg, %struct.foo.0* byval(%struct.foo.0) align 1 %arg1) align 2 !work_item_scope !0 !parallel_for_work_item !0 { +bb: + ret void +} + +!0 = !{}