From 39c3cd67c53fbf1692463f407bac1c1fcd599af3 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 28 Sep 2022 18:40:31 +0100 Subject: [PATCH 1/6] [SYCL] Always inline kernel lambda operator in entry point This patch marks the `operator()` of the kernel lambda as `always_inline` so that it gets inlined into the kernel entry point. Kernel entry point are functions that take the captured variables as parameters, create a lambda object from that, setup the index structs and then call `operator()` on the lambda. Inlining the operator into the entry point should be beneficial in most cases as it allows the compiler to optimize out the lambda creation, which can be very important for kernels capturing a lot of variables. In a lot of cases the inliner will already do it, but when it doesn't it can lead to very confusing performance implications since the kernel entry point isn't directly visible to users. --- clang/lib/Sema/SemaSYCL.cpp | 7 +++ .../check-direct-attribute-propagation.cpp | 5 +-- .../CodeGenSYCL/debug-info-srcpos-kernel.cpp | 5 --- clang/test/CodeGenSYCL/device-functions.cpp | 1 - clang/test/CodeGenSYCL/device-variables.cpp | 9 ++-- clang/test/CodeGenSYCL/esimd_metadata2.cpp | 1 - clang/test/CodeGenSYCL/kernel-handler.cpp | 3 -- clang/test/CodeGenSYCL/max-concurrency.cpp | 38 ---------------- .../no_opaque_basic-kernel-wrapper.cpp | 3 -- ...que_check-direct-attribute-propagation.cpp | 5 +-- .../no_opaque_device-functions.cpp | 1 - .../no_opaque_device-variables.cpp | 7 ++- .../CodeGenSYCL/no_opaque_kernel-handler.cpp | 3 -- .../CodeGenSYCL/no_opaque_max-concurrency.cpp | 44 ------------------- clang/test/CodeGenSYCL/no_opaque_sampler.cpp | 4 +- .../no_opaque_spir-calling-conv.cpp | 8 ++-- .../test/CodeGenSYCL/no_opaque_spir-enum.cpp | 1 - .../no_opaque_stall_enable_device.cpp | 4 -- .../no_opaque_union-kernel-param.cpp | 1 - clang/test/CodeGenSYCL/sampler.cpp | 4 +- clang/test/CodeGenSYCL/spir-calling-conv.cpp | 8 ++-- clang/test/CodeGenSYCL/spir-enum.cpp | 1 - .../test/CodeGenSYCL/stall_enable_device.cpp | 4 -- .../CodeGenSYCL/sycl-device-static-init.cpp | 1 - clang/test/CodeGenSYCL/union-kernel-param.cpp | 1 - .../unique_stable_name_windows_diff.cpp | 9 +--- 26 files changed, 33 insertions(+), 145 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ec2803b37159b..af664de3da4e7 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -837,6 +837,13 @@ class SingleDeviceFunctionTracker { CallGraphNode *KernelNode = Parent.getNodeForKernel(SYCLKernel); llvm::SmallVector CallStack; VisitCallNode(KernelNode, GetFDFromNode(KernelNode), CallStack); + + // Always inline the KernelBody in the kernel entry point. + if (KernelBody) { + KernelBody->addAttr(AlwaysInlineAttr::CreateImplicit( + KernelBody->getASTContext(), {}, AttributeCommonInfo::AS_Keyword, + AlwaysInlineAttr::Keyword_forceinline)); + } } public: diff --git a/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp b/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp index 327caa1db18ef..8d0989ba3327c 100644 --- a/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp +++ b/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp @@ -304,7 +304,6 @@ int main() { // Test attribute is not propagated. // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name32() #0 !kernel_arg_buffer_location ![[NUM]] - // CHECK: define {{.*}}spir_func void @{{.*}}Functor10{{.*}}(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 // CHECK-NOT: noalias // CHECK-SAME: { // CHECK: define dso_local spir_func void @_Z4foo8v() @@ -312,12 +311,12 @@ int main() { h.single_task(f10); // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name33() #0 !kernel_arg_buffer_location ![[NUM]] - // CHECK: define {{.*}}spir_func void @{{.*}}Foo8{{.*}}(ptr addrspace(4) noalias noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 + // CHECK: store ptr addrspace(4) %Foo8{{.*}} !noalias Foo8 boo8; h.single_task(boo8); // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name34() #0 !kernel_arg_buffer_location ![[NUM]] - // CHECK: define {{.*}}spir_func void @{{.*}}(ptr addrspace(4) noalias noundef align 1 dereferenceable_or_null(1) %this) #4 align 2 + // CHECK: store ptr addrspace(4){{.*}} !noalias h.single_task( []() [[intel::kernel_args_restrict]]{}); }); diff --git a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp index d66a69002dace..a610c98aa4889 100644 --- a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp +++ b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp @@ -27,14 +27,9 @@ int main() { // CHECK: define{{.*}} spir_kernel {{.*}}19use_kernel_for_test({{.*}}){{.*}} !dbg [[KERNEL:![0-9]+]] {{.*}}{ // CHECK: getelementptr inbounds %class.anon, {{.*}}, i32 0, i32 0, !dbg [[LINE_A0:![0-9]+]] // CHECK: call spir_func void {{.*}}6__init{{.*}} !dbg [[LINE_A0]] -// CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv{{.*}} !dbg [[LINE_B0:![0-9]+]] -// CHECK: ret void, !dbg [[LINE_C0:![0-9]+]] // CHECK: [[KERNEL]] = {{.*}}!DISubprogram(name: "{{.*}}19use_kernel_for_test" // CHECK-SAME: scope: [[FILE:![0-9]+]], // CHECK-SAME: file: [[FILE]], // CHECK-SAME: flags: DIFlagArtificial | DIFlagPrototyped // CHECK: [[FILE]] = !DIFile(filename: "{{.*}}debug-info-srcpos-kernel.cpp"{{.*}}) // CHECK: [[LINE_A0]] = !DILocation(line: 15,{{.*}}scope: [[KERNEL]] -// CHECK: [[LINE_B0]] = !DILocation(line: 16,{{.*}}scope: [[BLOCK:![0-9]+]] -// CHECK: [[BLOCK]] = distinct !DILexicalBlock(scope: [[KERNEL]] -// CHECK: [[LINE_C0]] = !DILocation(line: 17,{{.*}}scope: [[KERNEL]] diff --git a/clang/test/CodeGenSYCL/device-functions.cpp b/clang/test/CodeGenSYCL/device-functions.cpp index e19f29b1b3cda..0346cc87bda8c 100644 --- a/clang/test/CodeGenSYCL/device-functions.cpp +++ b/clang/test/CodeGenSYCL/device-functions.cpp @@ -22,6 +22,5 @@ int main() { return 0; } // CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE11fake_kernel() -// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(ptr addrspace(4) {{[^,]*}} %this) // CHECK: define {{.*}}spir_func void @_Z3foov() // CHECK: define linkonce_odr spir_func noundef i32 @_Z3barIiET_S0_(i32 noundef %arg) diff --git a/clang/test/CodeGenSYCL/device-variables.cpp b/clang/test/CodeGenSYCL/device-variables.cpp index 6559a34984def..3c8b575acc2b9 100644 --- a/clang/test/CodeGenSYCL/device-variables.cpp +++ b/clang/test/CodeGenSYCL/device-variables.cpp @@ -23,19 +23,18 @@ int main() { kernel([=]() { // Global variables used directly foo(global_value); + // CHECK: [[LOAD:%[a-z0-9.]+]] = load ptr addrspace(4){{.*}} // CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) addrspacecast (ptr addrspace(1) @{{.*}}global_value to ptr addrspace(4))) int a = my_array[0]; - // CHECK: [[LOAD:%[0-9]+]] = load i32, ptr addrspace(4) - // CHECK: store i32 [[LOAD]], ptr addrspace(4) %a + // CHECK: store i32 42, ptr addrspace(4) %a int b = some_const; // Constant used directly // CHECK: store i32 1, ptr addrspace(4) %b foo(local_value); // Local variables and constexprs captured by lambda - // CHECK: [[GEP:%[a-z_]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 0 - // CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) [[GEP]]) + // CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) [[LOAD]]) int some_device_local_var = some_local_var; - // CHECK: [[GEP1:%[a-z_]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 1 + // CHECK: [[GEP1:%[a-z_.]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 1 // CHECK: [[LOAD1:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP1]] // CHECK: store i32 [[LOAD1]], ptr addrspace(4) %some_device_local_var }); diff --git a/clang/test/CodeGenSYCL/esimd_metadata2.cpp b/clang/test/CodeGenSYCL/esimd_metadata2.cpp index 897aa54f3b433..458c028a126d1 100644 --- a/clang/test/CodeGenSYCL/esimd_metadata2.cpp +++ b/clang/test/CodeGenSYCL/esimd_metadata2.cpp @@ -10,7 +10,6 @@ __attribute__((sycl_device)) __attribute__((sycl_explicit_simd)) void esimd_func // CHECK-ESIMD-DAG: define {{.*}}spir_kernel void @{{.*}}kernel_cm() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} !intel_reqd_sub_group_size ![[SGSIZE1:[0-9]+]] {{.*}}{ // CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}esimd_funcv() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} !intel_reqd_sub_group_size ![[SGSIZE1]] { // CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}shared_funcv() #{{[0-9]+}} { -// CHECK-ESIMD-DAG: define linkonce_odr spir_func void @_ZN12ESIMDFunctorclEv({{.*}}) #{{[0-9]+}} {{.*}} !sycl_explicit_simd !{{[0-9]+}} { class ESIMDFunctor { public: diff --git a/clang/test/CodeGenSYCL/kernel-handler.cpp b/clang/test/CodeGenSYCL/kernel-handler.cpp index e0b0349864d05..8eaa33608188f 100644 --- a/clang/test/CodeGenSYCL/kernel-handler.cpp +++ b/clang/test/CodeGenSYCL/kernel-handler.cpp @@ -33,6 +33,3 @@ void test(int val) { // NATIVESUPPORT-NOT: load ptr addrspace(1), ptr addrspace(1) %_arg__specialization_constants_buffer.addr, align 8 // NATIVESUPPORT-NOT: addrspacecast ptr addrspace(1) %{{[0-9]+}} to ptr // NATIVESUPPORT-NOT: call void @{{.*}}__init_specialization_constants_buffer{{.*}}(ptr noundef align 4 nonnull align 1 dereferenceable(1) %kh, ptr noundef align 4 %{{[0-9]+}}) - -// ALL: call{{ spir_func | }}void @{{[a-zA-Z0-9_$]+}}kernel_handler{{[a-zA-Z0-9_$]+}} -// ALL-SAME: noundef byval(%"class.sycl::_V1::kernel_handler") diff --git a/clang/test/CodeGenSYCL/max-concurrency.cpp b/clang/test/CodeGenSYCL/max-concurrency.cpp index 333c75f4c170d..77c00f1fc7b15 100644 --- a/clang/test/CodeGenSYCL/max-concurrency.cpp +++ b/clang/test/CodeGenSYCL/max-concurrency.cpp @@ -20,48 +20,10 @@ // CHECK: ret void // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() [[ATTR0:#.*]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]] -// CHECK: entry: -// CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1 -// CHECK: [[F1_ASCAST:%.*]] = addrspacecast ptr [[F1]] to ptr addrspace(4) -// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F1]]) -// CHECK: call spir_func void @_ZNK8Functor1clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F1_ASCAST]]) -// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F1]]) -// CHECK: ret void // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]] -// CHECK: entry -// CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1 -// CHECK: [[F3_ASCAST:%.*]] = addrspacecast ptr [[F3]] to ptr addrspace(4) -// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F3]]) -// CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F3_ASCAST]]) -// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F3]] -// CHECK: ret void - -// CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv -// CHECK: entry: -// CHECK: [[ADDR_1:%.*]] = alloca ptr addrspace(4), align 8 -// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast ptr [[ADDR_1]] to ptr addrspace(4) -// CHECK: store ptr addrspace(4) %this, ptr addrspace(4) [[ADDR1_CAST]], align 8 -// CHECK: %this1 = load ptr addrspace(4), ptr addrspace(4) [[ADDR1_CAST]], align 8 -// CHECK: ret void // CHECK: define dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E12kernel_name5() -// CHECK: entry: -// CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1 -// CHECK: [[H2:%.*]] = addrspacecast ptr [[H1]] to ptr addrspace(4) -// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[H1]]) -// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[H2]]) -// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[H1]]) -// CHECK: ret void - -// CHECK: define {{.*}}spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv -// CHECK: entry: -// CHECK: [[ADDR_1:%.*]] = alloca ptr addrspace(4), align 8 -// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast ptr [[ADDR_1]] to ptr addrspace(4) -// CHECK: store ptr addrspace(4) %this, ptr addrspace(4) [[ADDR1_CAST]], align 8 -// CHECK: %this1 = load ptr addrspace(4), ptr addrspace(4) [[ADDR1_CAST]], align 8 -// CHECK: call spir_func void @_Z4funcILi2EEvv() -// CHECK: ret void template void max_concurrency() { diff --git a/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp b/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp index d207aebfe86b8..766fa44818acd 100644 --- a/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp @@ -58,6 +58,3 @@ int main() { // CHECK-SAME: %"struct.sycl::_V1::range"* noundef byval({{.*}}) align 4 [[ARANGE]], // CHECK-SAME: %"struct.sycl::_V1::range"* noundef byval({{.*}}) align 4 [[MRANGE]], // CHECK-SAME: %"struct.sycl::_V1::id"* noundef byval({{.*}}) align 4 [[OID]]) - -// Check lambda "()" operator call -// CHECK: call spir_func void @{{.*}}(%class.anon addrspace(4)* {{[^,]*}}) diff --git a/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp b/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp index 5790ff5f7a9f2..9923f75f6c402 100644 --- a/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp @@ -304,7 +304,6 @@ int main() { // Test attribute is not propagated. // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name32() #0 !kernel_arg_buffer_location ![[NUM]] - // CHECK: define {{.*}}spir_func void @{{.*}}Functor10{{.*}}(%class.Functor10 addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 // CHECK-NOT: noalias // CHECK-SAME: { // CHECK: define dso_local spir_func void @_Z4foo8v() @@ -312,12 +311,12 @@ int main() { h.single_task(f10); // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name33() #0 !kernel_arg_buffer_location ![[NUM]] - // CHECK: define {{.*}}spir_func void @{{.*}}Foo8{{.*}}(%class.Foo8 addrspace(4)* noalias noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 + // CHECK: store %class.Foo8{{.*}} !noalias Foo8 boo8; h.single_task(boo8); // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name34() #0 !kernel_arg_buffer_location ![[NUM]] - // CHECK: define {{.*}}spir_func void @{{.*}}(%class.anon{{.*}} addrspace(4)* noalias noundef align 1 dereferenceable_or_null(1) %this) #4 align 2 + // CHECK: store %class.anon{{.*}} !noalias h.single_task( []() [[intel::kernel_args_restrict]]{}); }); diff --git a/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp b/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp index 555dfadc3b14a..9dbf48e8460e0 100644 --- a/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp @@ -22,6 +22,5 @@ int main() { return 0; } // CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE11fake_kernel() -// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %this) // CHECK: define {{.*}}spir_func void @_Z3foov() // CHECK: define linkonce_odr spir_func noundef i32 @_Z3barIiET_S0_(i32 noundef %arg) diff --git a/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp b/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp index c5a19a068dea7..991a49afd8e63 100644 --- a/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp @@ -25,17 +25,16 @@ int main() { foo(global_value); // CHECK: call spir_func void @{{.*}}foo{{.*}}(i32 addrspace(4)* noundef align 4 dereferenceable(4) addrspacecast (i32 addrspace(1)* @{{.*}}global_value to i32 addrspace(4)*)) int a = my_array[0]; - // CHECK: [[LOAD:%[0-9]+]] = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @{{.*}}my_array to [1 x i32] addrspace(4)*), i64 0, i64 0) - // CHECK: store i32 [[LOAD]], i32 addrspace(4)* %a + // CHECK: store i32 42, i32 addrspace(4)* %a int b = some_const; // Constant used directly // CHECK: store i32 1, i32 addrspace(4)* %b foo(local_value); // Local variables and constexprs captured by lambda - // CHECK: [[GEP:%[a-z_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 0 + // CHECK: [[GEP:%[a-z_.]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 0 // CHECK: call spir_func void @{{.*}}foo{{.*}}(i32 addrspace(4)* noundef align 4 dereferenceable(4) [[GEP]]) int some_device_local_var = some_local_var; - // CHECK: [[GEP1:%[a-z_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 1 + // CHECK: [[GEP1:%[a-z_.]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 1 // CHECK: [[LOAD1:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP1]] // CHECK: store i32 [[LOAD1]], i32 addrspace(4)* %some_device_local_var }); diff --git a/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp b/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp index 97828a7d8e2fc..b4130fc321da2 100644 --- a/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp @@ -33,6 +33,3 @@ void test(int val) { // NATIVESUPPORT-NOT: load i8 addrspace(1)*, i8 addrspace(1)** %_arg__specialization_constants_buffer.addr, align 8 // NATIVESUPPORT-NOT: addrspacecast i8 addrspace(1)* %{{[0-9]+}} to i8* // NATIVESUPPORT-NOT: call void @{{.*}}__init_specialization_constants_buffer{{.*}}(%"class.sycl::_V1::kernel_handler"* noundef align 4 nonnull align 1 dereferenceable(1) %kh, i8* noundef align 4 %{{[0-9]+}}) - -// ALL: call{{ spir_func | }}void @{{[a-zA-Z0-9_$]+}}kernel_handler{{[a-zA-Z0-9_$]+}} -// ALL-SAME: noundef byval(%"class.sycl::_V1::kernel_handler") diff --git a/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp b/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp index b6e22eb3fdc39..43f7e9dcf6c14 100644 --- a/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp @@ -19,54 +19,10 @@ // CHECK: ret void // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() [[ATTR0:#.*]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]] -// CHECK: entry: -// CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1 -// CHECK: [[F1_ASCAST:%.*]] = addrspacecast [[CLASS_F1]]* [[F1]] to [[CLASS_F1]] addrspace(4)* -// CHECK: [[TMP0:%.*]] = bitcast [[CLASS_F1]]* [[F1]] to i8* -// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP0]]) -// CHECK: call spir_func void @_ZNK8Functor1clEv([[CLASS_F1]] addrspace(4)* noundef align 1 dereferenceable_or_null(1) [[F1_ASCAST]]) -// CHECK: [[TMP1:%.*]] = bitcast [[CLASS_F1]]* [[F1]] to i8* -// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP1]]) -// CHECK: ret void // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]] -// CHECK: entry -// CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1 -// CHECK: [[F3_ASCAST:%.*]] = addrspacecast [[CLASS_F3]]* [[F3]] to [[CLASS_F3]] addrspace(4)* -// CHECK: [[TMP2:%.*]] = bitcast [[CLASS_F3]]* [[F3]] to i8* -// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP2]]) -// CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv([[CLASS_F3]] addrspace(4)* noundef align 1 dereferenceable_or_null(1) [[F3_ASCAST]]) -// CHECK: [[TMP3:%.*]] = bitcast [[CLASS_F3]]* [[F3]] to i8* -// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP3]] -// CHECK: ret void - -// CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv -// CHECK: entry: -// CHECK: [[ADDR_1:%.*]] = alloca [[CLASS_F3:%.*]] addrspace(4)*, align 8 -// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast [[CLASS_F3]] addrspace(4)** [[ADDR_1]] to [[CLASS_F3]] addrspace(4)* addrspace(4)* -// CHECK: store [[CLASS_F3]] addrspace(4)* %this, [[CLASS_F3]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8 -// CHECK: %this1 = load [[CLASS_F3]] addrspace(4)*, [[CLASS_F3]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8 -// CHECK: ret void // CHECK: define dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E12kernel_name5() -// CHECK: entry: -// CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1 -// CHECK: [[H2:%.*]] = addrspacecast [[H]]* [[H1]] to [[H]] addrspace(4)* -// CHECK: [[H3:%.*]] = bitcast [[H]]* [[H1]] to i8* -// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[H3]]) -// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv([[H]] addrspace(4)* noundef align 1 dereferenceable_or_null(1) [[H2]]) -// CHECK: [[TMP4:%.*]] = bitcast [[H]]* [[H1]] to i8* -// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP4]]) -// CHECK: ret void - -// CHECK: define {{.*}}spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv -// CHECK: entry: -// CHECK: [[ADDR_1:%.*]] = alloca [[HH:%.*]] addrspace(4)*, align 8 -// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast [[HH]] addrspace(4)** [[ADDR_1]] to [[HH]] addrspace(4)* addrspace(4)* -// CHECK: store [[HH]] addrspace(4)* %this, [[HH]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8 -// CHECK: %this1 = load [[HH]] addrspace(4)*, [[HH]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8 -// CHECK: call spir_func void @_Z4funcILi2EEvv() -// CHECK: ret void template void max_concurrency() { diff --git a/clang/test/CodeGenSYCL/no_opaque_sampler.cpp b/clang/test/CodeGenSYCL/no_opaque_sampler.cpp index 0b99141677fdf..0e24a89a38c14 100644 --- a/clang/test/CodeGenSYCL/no_opaque_sampler.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_sampler.cpp @@ -1,12 +1,12 @@ // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s // CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(%opencl.sampler_t addrspace(2)* [[SAMPLER_ARG:%[a-zA-Z0-9_]+]]) // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8 +// CHECK: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8 // CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8 // CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* [[ANON]] to %class.anon addrspace(4)* // CHECK: store %opencl.sampler_t addrspace(2)* [[SAMPLER_ARG]], %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8 // CHECK-NEXT: [[BITCAST:%[0-9]+]] = bitcast %class.anon* [[ANON]] to i8* -// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]]) #4 +// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]]) // CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* [[ANONCAST]], i32 0, i32 0 // CHECK-NEXT: [[LOAD_SAMPLER_ARG:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8 // CHECK-NEXT: call spir_func void @{{[a-zA-Z0-9_]+}}(%"class.sycl::_V1::sampler" addrspace(4)* {{[^,]*}} [[GEP]], %opencl.sampler_t addrspace(2)* [[LOAD_SAMPLER_ARG]]) diff --git a/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp b/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp index bcdff2a97613f..2dac44fba090e 100644 --- a/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp @@ -5,14 +5,16 @@ __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { kernelFunc(); } +void myFunc() { } + int main() { // CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE15kernel_function() - // CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %{{.+}}) + // CHECK: call spir_func void @_Z6myFuncv() - // CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.{{.*}}anon addrspace(4)* {{[^,]*}} %this) + // CHECK: define {{.*}}spir_func void @_Z6myFuncv() - kernel_single_task([]() {}); + kernel_single_task([]() { myFunc(); }); return 0; } diff --git a/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp b/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp index c251c2e829488..2ed2f79fe930c 100644 --- a/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp @@ -23,7 +23,6 @@ int main() { // CHECK: define {{.*}}spir_kernel void @_ZTSZ4test9enum_typeE15kernel_function(i32 noundef %_arg_val) // CHECK: getelementptr inbounds %class.anon, %class.anon addrspace(4)* - // CHECK: call spir_func void @_ZZ4test9enum_typeENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %{{.+}}) test( enum_type::B ); return 0; diff --git a/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp b/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp index c9d531cdf00f0..bddccaaf10dcf 100644 --- a/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp @@ -26,12 +26,10 @@ class Foo { int main() { q.submit([&](handler &h) { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]] - // CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]] h.single_task( FuncObj()); // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]] - // CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]] Foo f; h.single_task(f); @@ -39,7 +37,6 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3() // CHECK-NOT: !stall_enable // CHECK-SAME: { - // CHECK: define {{.*}}spir_func void @{{.*}}func{{.*}} !stall_enable ![[NUM4]] h.single_task( []() { func(); }); @@ -47,7 +44,6 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() // CHECK-NOT: !stall_enable // CHECK-SAME: { - // CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.anon{{.*}} addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM4]] h.single_task( []() { func1(); }); diff --git a/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp b/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp index 772e3fce4a4bc..0dbcf0129c24e 100644 --- a/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp @@ -39,4 +39,3 @@ int main() { // CHECK: [[MEMCPY_DST:%[0-9a-zA-Z_]+]] = bitcast %union.{{.*}}MyUnion addrspace(4)* [[L_STRUCT_ADDR]] to i8 addrspace(4)* // CHECK: [[MEMCPY_SRC:%[0-9a-zA-Z_]+]] = bitcast %union.{{.*}}MyUnion addrspace(4)* [[MEM_ARGAS]] to i8 addrspace(4)* // CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[MEMCPY_DST]], i8 addrspace(4)* align 4 [[MEMCPY_SRC]], i64 12, i1 false) -// CHECK: call spir_func void @{{.*}}(%class.anon addrspace(4)* {{[^,]*}} [[LOCAL_OBJECTAS]]) diff --git a/clang/test/CodeGenSYCL/sampler.cpp b/clang/test/CodeGenSYCL/sampler.cpp index 600269324580a..455b939b49d3d 100644 --- a/clang/test/CodeGenSYCL/sampler.cpp +++ b/clang/test/CodeGenSYCL/sampler.cpp @@ -1,11 +1,11 @@ // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s // CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(ptr addrspace(2) [[SAMPLER_ARG:%[a-zA-Z0-9_]+]]) // CHECK-NEXT: entry: -// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca ptr addrspace(2), align 8 +// CHECK: [[SAMPLER_ARG]].addr = alloca ptr addrspace(2), align 8 // CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8 // CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[ANON]] to ptr addrspace(4) // CHECK: store ptr addrspace(2) [[SAMPLER_ARG]], ptr addrspace(4) [[SAMPLER_ARG]].addr.ascast, align 8 -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[ANON]]) #4 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[ANON]]) // CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) [[ANONCAST]], i32 0, i32 0 // CHECK-NEXT: [[LOAD_SAMPLER_ARG:%[0-9]+]] = load ptr addrspace(2), ptr addrspace(4) [[SAMPLER_ARG]].addr.ascast, align 8 // CHECK-NEXT: call spir_func void @{{[a-zA-Z0-9_]+}}(ptr addrspace(4) {{[^,]*}} [[GEP]], ptr addrspace(2) [[LOAD_SAMPLER_ARG]]) diff --git a/clang/test/CodeGenSYCL/spir-calling-conv.cpp b/clang/test/CodeGenSYCL/spir-calling-conv.cpp index c345ee03be46c..9164773a91ccc 100644 --- a/clang/test/CodeGenSYCL/spir-calling-conv.cpp +++ b/clang/test/CodeGenSYCL/spir-calling-conv.cpp @@ -5,14 +5,16 @@ __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { kernelFunc(); } +void myFunc() { } + int main() { // CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE15kernel_function() - // CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv(ptr addrspace(4) {{[^,]*}} %{{.+}}) + // CHECK: call spir_func void @_Z6myFuncv() - // CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(ptr addrspace(4) {{[^,]*}} %this) + // CHECK: define {{.*}}spir_func void @_Z6myFuncv() - kernel_single_task([]() {}); + kernel_single_task([]() { myFunc(); }); return 0; } diff --git a/clang/test/CodeGenSYCL/spir-enum.cpp b/clang/test/CodeGenSYCL/spir-enum.cpp index cc0978a844478..754d85e8d4272 100644 --- a/clang/test/CodeGenSYCL/spir-enum.cpp +++ b/clang/test/CodeGenSYCL/spir-enum.cpp @@ -23,7 +23,6 @@ int main() { // CHECK: define {{.*}}spir_kernel void @_ZTSZ4test9enum_typeE15kernel_function(i32 noundef %_arg_val) // CHECK: getelementptr inbounds %class.anon, ptr addrspace(4) - // CHECK: call spir_func void @_ZZ4test9enum_typeENKUlvE_clEv(ptr addrspace(4) {{[^,]*}} %{{.+}}) test( enum_type::B ); return 0; diff --git a/clang/test/CodeGenSYCL/stall_enable_device.cpp b/clang/test/CodeGenSYCL/stall_enable_device.cpp index 7e6183b397ba9..490591ce3c04d 100644 --- a/clang/test/CodeGenSYCL/stall_enable_device.cpp +++ b/clang/test/CodeGenSYCL/stall_enable_device.cpp @@ -26,12 +26,10 @@ class Foo { int main() { q.submit([&](handler &h) { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]] - // CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]] h.single_task( FuncObj()); // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]] - // CHECK define {{.*}}spir_func void @{{.*}}FooclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]] Foo f; h.single_task(f); @@ -39,7 +37,6 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3() // CHECK-NOT: !stall_enable // CHECK-SAME: { - // CHECK: define {{.*}}spir_func void @{{.*}}func{{.*}} !stall_enable ![[NUM4]] h.single_task( []() { func(); }); @@ -47,7 +44,6 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() // CHECK-NOT: !stall_enable // CHECK-SAME: { - // CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM4]] h.single_task( []() { func1(); }); diff --git a/clang/test/CodeGenSYCL/sycl-device-static-init.cpp b/clang/test/CodeGenSYCL/sycl-device-static-init.cpp index e9a1d7f58f653..eeb391248193f 100644 --- a/clang/test/CodeGenSYCL/sycl-device-static-init.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-static-init.cpp @@ -7,7 +7,6 @@ // CHECK-NOT: @_ZN8BaseInitI12TestBaseTypeE15s_regbase_ncsdmE = weak_odr addrspace(1) global %struct._ZTS16RegisterBaseInit.RegisterBaseInit zeroinitializer, comdat, align 1 // CHECK-NOT: @_ZGVN8BaseInitI12TestBaseTypeE15s_regbase_ncsdmE = weak_odr global i64 0, comdat($_ZN8BaseInitI12TestBaseTypeE9s_regbaseE), align 8 // CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE11fake_kernel() -// CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv struct TestBaseType {}; struct RegisterBaseInit { diff --git a/clang/test/CodeGenSYCL/union-kernel-param.cpp b/clang/test/CodeGenSYCL/union-kernel-param.cpp index d34dec879992c..14a6e0e440232 100644 --- a/clang/test/CodeGenSYCL/union-kernel-param.cpp +++ b/clang/test/CodeGenSYCL/union-kernel-param.cpp @@ -33,4 +33,3 @@ int main() { // CHECK: [[MEM_ARGAS:%.*]] = addrspacecast ptr [[MEM_ARG]] to ptr addrspace(4) // CHECK: [[L_STRUCT_ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) [[LOCAL_OBJECTAS]], i32 0, i32 0 // CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 [[L_STRUCT_ADDR]], ptr addrspace(4) align 4 [[MEM_ARGAS]], i64 12, i1 false) -// CHECK: call spir_func void @{{.*}}(ptr addrspace(4) {{[^,]*}} [[LOCAL_OBJECTAS]]) diff --git a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp index fb02c1b876106..6ea572b0f0922 100644 --- a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp +++ b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --check-prefixes=WIN,CHECK +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel(Func F){ @@ -57,11 +57,4 @@ int main() { // CHECK: private unnamed_addr constant [17 x i8] c"_ZTSZ4mainEUlvE_\00" // CHECK: private unnamed_addr constant [17 x i8] c"_ZTSZ4mainEUliE_\00" // CHECK: private unnamed_addr constant [17 x i8] c"_ZTSZ4mainEUldE_\00" - - // On Windows, ensure that we haven't broken the 'lambda numbering' for thex - // lambda itself. - // WIN: define internal void @"??R Date: Tue, 11 Oct 2022 10:07:58 +0100 Subject: [PATCH 2/6] [SYCL] Introduce flag to disable force inlining of kernel lambda --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Driver/Options.td | 6 +++ clang/lib/Sema/SemaSYCL.cpp | 9 +++- .../check-direct-attribute-propagation.cpp | 7 +-- .../CodeGenSYCL/debug-info-srcpos-kernel.cpp | 7 ++- clang/test/CodeGenSYCL/device-functions.cpp | 3 +- clang/test/CodeGenSYCL/device-variables.cpp | 11 +++-- clang/test/CodeGenSYCL/esimd_metadata2.cpp | 1 + clang/test/CodeGenSYCL/kernel-handler.cpp | 7 ++- clang/test/CodeGenSYCL/max-concurrency.cpp | 40 +++++++++++++++- .../no_opaque_basic-kernel-wrapper.cpp | 5 +- ...que_check-direct-attribute-propagation.cpp | 7 +-- .../no_opaque_device-functions.cpp | 3 +- .../no_opaque_device-variables.cpp | 9 ++-- .../CodeGenSYCL/no_opaque_kernel-handler.cpp | 7 ++- .../CodeGenSYCL/no_opaque_max-concurrency.cpp | 46 ++++++++++++++++++- clang/test/CodeGenSYCL/no_opaque_sampler.cpp | 6 +-- .../no_opaque_spir-calling-conv.cpp | 10 ++-- .../test/CodeGenSYCL/no_opaque_spir-enum.cpp | 3 +- .../no_opaque_stall_enable_device.cpp | 6 ++- .../no_opaque_union-kernel-param.cpp | 3 +- clang/test/CodeGenSYCL/sampler.cpp | 6 +-- clang/test/CodeGenSYCL/spir-calling-conv.cpp | 10 ++-- clang/test/CodeGenSYCL/spir-enum.cpp | 3 +- .../test/CodeGenSYCL/stall_enable_device.cpp | 6 ++- .../CodeGenSYCL/sycl-device-static-init.cpp | 3 +- clang/test/CodeGenSYCL/union-kernel-param.cpp | 3 +- .../unique_stable_name_windows_diff.cpp | 11 ++++- .../sycl-force-inline-kernel-lambda.cpp | 30 ++++++++++++ sycl/doc/UsersManual.md | 6 +++ 30 files changed, 221 insertions(+), 54 deletions(-) create mode 100644 clang/test/SemaSYCL/sycl-force-inline-kernel-lambda.cpp diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 4717fae090312..00daa04c13442 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -279,6 +279,7 @@ LANGOPT(IntelFPGA , 1, 0, "Perform ahead-of-time compilation for FPGA") LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code") LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters") LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels") +LANGOPT(SYCLForceInlineKernelLambda , 1, 0, "Force inline SYCL kernel lambdas in entry point") LANGOPT(SYCLESIMDForceStatelessMem, 1, 0, "Make accessors use USM memory in ESIMD kernels") ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used") LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 86aa1027ffda4..67ae72aa19058 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2942,6 +2942,12 @@ defm sycl_unnamed_lambda " >= clang::LangOptions::SYCLMajorVersion::SYCL_2020")>, PosFlag, NegFlag, BothFlags<[CC1Option, CoreOption], " unnamed SYCL lambda kernels">>; +defm sycl_inline_kernel_lambda + : BoolFOption< + "sycl-force-inline-kernel-lambda", LangOpts<"SYCLForceInlineKernelLambda">, + DefaultTrue, + PosFlag, NegFlag, + BothFlags<[CC1Option, CoreOption], " force inline SYCL kernels lambda in entry point">>; def fsycl_help_EQ : Joined<["-"], "fsycl-help=">, Flags<[NoXarchOption, CoreOption]>, HelpText<"Emit help information from the " "related offline compilation tool. Valid values: all, fpga, gen, x86_64.">, diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index af664de3da4e7..81c8867373e19 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -838,8 +838,13 @@ class SingleDeviceFunctionTracker { llvm::SmallVector CallStack; VisitCallNode(KernelNode, GetFDFromNode(KernelNode), CallStack); - // Always inline the KernelBody in the kernel entry point. - if (KernelBody) { + // Always inline the KernelBody in the kernel entry point. For ESIMD + // inlining is handled later down the pipeline. + if (KernelBody && + Parent.SemaRef.getLangOpts().SYCLForceInlineKernelLambda && + !KernelBody->hasAttr() && + !KernelBody->hasAttr() && + !KernelBody->hasAttr()) { KernelBody->addAttr(AlwaysInlineAttr::CreateImplicit( KernelBody->getASTContext(), {}, AttributeCommonInfo::AS_Keyword, AlwaysInlineAttr::Keyword_forceinline)); diff --git a/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp b/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp index 8d0989ba3327c..7e4973c60f044 100644 --- a/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp +++ b/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s // Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]], // [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]], @@ -304,6 +304,7 @@ int main() { // Test attribute is not propagated. // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name32() #0 !kernel_arg_buffer_location ![[NUM]] + // CHECK: define {{.*}}spir_func void @{{.*}}Functor10{{.*}}(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 // CHECK-NOT: noalias // CHECK-SAME: { // CHECK: define dso_local spir_func void @_Z4foo8v() @@ -311,12 +312,12 @@ int main() { h.single_task(f10); // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name33() #0 !kernel_arg_buffer_location ![[NUM]] - // CHECK: store ptr addrspace(4) %Foo8{{.*}} !noalias + // CHECK: define {{.*}}spir_func void @{{.*}}Foo8{{.*}}(ptr addrspace(4) noalias noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 Foo8 boo8; h.single_task(boo8); // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name34() #0 !kernel_arg_buffer_location ![[NUM]] - // CHECK: store ptr addrspace(4){{.*}} !noalias + // CHECK: define {{.*}}spir_func void @{{.*}}(ptr addrspace(4) noalias noundef align 1 dereferenceable_or_null(1) %this) #4 align 2 h.single_task( []() [[intel::kernel_args_restrict]]{}); }); diff --git a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp index a610c98aa4889..ba9c97a6edc79 100644 --- a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp +++ b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s +// RUN: %clang -Xclang -fno-sycl-force-inline-kernel-lambda -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s // // Verify the SYCL kernel routine is marked artificial and has the // expected source correlation. @@ -27,9 +27,14 @@ int main() { // CHECK: define{{.*}} spir_kernel {{.*}}19use_kernel_for_test({{.*}}){{.*}} !dbg [[KERNEL:![0-9]+]] {{.*}}{ // CHECK: getelementptr inbounds %class.anon, {{.*}}, i32 0, i32 0, !dbg [[LINE_A0:![0-9]+]] // CHECK: call spir_func void {{.*}}6__init{{.*}} !dbg [[LINE_A0]] +// CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv{{.*}} !dbg [[LINE_B0:![0-9]+]] +// CHECK: ret void, !dbg [[LINE_C0:![0-9]+]] // CHECK: [[KERNEL]] = {{.*}}!DISubprogram(name: "{{.*}}19use_kernel_for_test" // CHECK-SAME: scope: [[FILE:![0-9]+]], // CHECK-SAME: file: [[FILE]], // CHECK-SAME: flags: DIFlagArtificial | DIFlagPrototyped // CHECK: [[FILE]] = !DIFile(filename: "{{.*}}debug-info-srcpos-kernel.cpp"{{.*}}) // CHECK: [[LINE_A0]] = !DILocation(line: 15,{{.*}}scope: [[KERNEL]] +// CHECK: [[LINE_B0]] = !DILocation(line: 16,{{.*}}scope: [[BLOCK:![0-9]+]] +// CHECK: [[BLOCK]] = distinct !DILexicalBlock(scope: [[KERNEL]] +// CHECK: [[LINE_C0]] = !DILocation(line: 17,{{.*}}scope: [[KERNEL]] diff --git a/clang/test/CodeGenSYCL/device-functions.cpp b/clang/test/CodeGenSYCL/device-functions.cpp index 0346cc87bda8c..6415da9e8ef56 100644 --- a/clang/test/CodeGenSYCL/device-functions.cpp +++ b/clang/test/CodeGenSYCL/device-functions.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s template T bar(T arg); @@ -22,5 +22,6 @@ int main() { return 0; } // CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE11fake_kernel() +// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(ptr addrspace(4) {{[^,]*}} %this) // CHECK: define {{.*}}spir_func void @_Z3foov() // CHECK: define linkonce_odr spir_func noundef i32 @_Z3barIiET_S0_(i32 noundef %arg) diff --git a/clang/test/CodeGenSYCL/device-variables.cpp b/clang/test/CodeGenSYCL/device-variables.cpp index 3c8b575acc2b9..f8c572a0e982d 100644 --- a/clang/test/CodeGenSYCL/device-variables.cpp +++ b/clang/test/CodeGenSYCL/device-variables.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s enum class test_type { value1, value2, value3 }; @@ -23,18 +23,19 @@ int main() { kernel([=]() { // Global variables used directly foo(global_value); - // CHECK: [[LOAD:%[a-z0-9.]+]] = load ptr addrspace(4){{.*}} // CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) addrspacecast (ptr addrspace(1) @{{.*}}global_value to ptr addrspace(4))) int a = my_array[0]; - // CHECK: store i32 42, ptr addrspace(4) %a + // CHECK: [[LOAD:%[0-9]+]] = load i32, ptr addrspace(4) + // CHECK: store i32 [[LOAD]], ptr addrspace(4) %a int b = some_const; // Constant used directly // CHECK: store i32 1, ptr addrspace(4) %b foo(local_value); // Local variables and constexprs captured by lambda - // CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) [[LOAD]]) + // CHECK: [[GEP:%[a-z_]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 0 + // CHECK: call spir_func void @{{.*}}foo{{.*}}(ptr addrspace(4) noundef align 4 dereferenceable(4) [[GEP]]) int some_device_local_var = some_local_var; - // CHECK: [[GEP1:%[a-z_.]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 1 + // CHECK: [[GEP1:%[a-z_]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) %{{.*}}, i32 0, i32 1 // CHECK: [[LOAD1:%[0-9]+]] = load i32, ptr addrspace(4) [[GEP1]] // CHECK: store i32 [[LOAD1]], ptr addrspace(4) %some_device_local_var }); diff --git a/clang/test/CodeGenSYCL/esimd_metadata2.cpp b/clang/test/CodeGenSYCL/esimd_metadata2.cpp index 458c028a126d1..897aa54f3b433 100644 --- a/clang/test/CodeGenSYCL/esimd_metadata2.cpp +++ b/clang/test/CodeGenSYCL/esimd_metadata2.cpp @@ -10,6 +10,7 @@ __attribute__((sycl_device)) __attribute__((sycl_explicit_simd)) void esimd_func // CHECK-ESIMD-DAG: define {{.*}}spir_kernel void @{{.*}}kernel_cm() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} !intel_reqd_sub_group_size ![[SGSIZE1:[0-9]+]] {{.*}}{ // CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}esimd_funcv() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} !intel_reqd_sub_group_size ![[SGSIZE1]] { // CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}shared_funcv() #{{[0-9]+}} { +// CHECK-ESIMD-DAG: define linkonce_odr spir_func void @_ZN12ESIMDFunctorclEv({{.*}}) #{{[0-9]+}} {{.*}} !sycl_explicit_simd !{{[0-9]+}} { class ESIMDFunctor { public: diff --git a/clang/test/CodeGenSYCL/kernel-handler.cpp b/clang/test/CodeGenSYCL/kernel-handler.cpp index 8eaa33608188f..f94d416e6b260 100644 --- a/clang/test/CodeGenSYCL/kernel-handler.cpp +++ b/clang/test/CodeGenSYCL/kernel-handler.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT // This test checks IR generated when kernel_handler argument // (used to handle SYCL 2020 specialization constants) is passed @@ -33,3 +33,6 @@ void test(int val) { // NATIVESUPPORT-NOT: load ptr addrspace(1), ptr addrspace(1) %_arg__specialization_constants_buffer.addr, align 8 // NATIVESUPPORT-NOT: addrspacecast ptr addrspace(1) %{{[0-9]+}} to ptr // NATIVESUPPORT-NOT: call void @{{.*}}__init_specialization_constants_buffer{{.*}}(ptr noundef align 4 nonnull align 1 dereferenceable(1) %kh, ptr noundef align 4 %{{[0-9]+}}) + +// ALL: call{{ spir_func | }}void @{{[a-zA-Z0-9_$]+}}kernel_handler{{[a-zA-Z0-9_$]+}} +// ALL-SAME: noundef byval(%"class.sycl::_V1::kernel_handler") diff --git a/clang/test/CodeGenSYCL/max-concurrency.cpp b/clang/test/CodeGenSYCL/max-concurrency.cpp index 77c00f1fc7b15..82465ebafae52 100644 --- a/clang/test/CodeGenSYCL/max-concurrency.cpp +++ b/clang/test/CodeGenSYCL/max-concurrency.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s #include "sycl.hpp" @@ -20,10 +20,48 @@ // CHECK: ret void // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() [[ATTR0:#.*]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]] +// CHECK: entry: +// CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1 +// CHECK: [[F1_ASCAST:%.*]] = addrspacecast ptr [[F1]] to ptr addrspace(4) +// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F1]]) +// CHECK: call spir_func void @_ZNK8Functor1clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F1_ASCAST]]) +// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F1]]) +// CHECK: ret void // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]] +// CHECK: entry +// CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1 +// CHECK: [[F3_ASCAST:%.*]] = addrspacecast ptr [[F3]] to ptr addrspace(4) +// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[F3]]) +// CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[F3_ASCAST]]) +// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[F3]] +// CHECK: ret void + +// CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv +// CHECK: entry: +// CHECK: [[ADDR_1:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast ptr [[ADDR_1]] to ptr addrspace(4) +// CHECK: store ptr addrspace(4) %this, ptr addrspace(4) [[ADDR1_CAST]], align 8 +// CHECK: %this1 = load ptr addrspace(4), ptr addrspace(4) [[ADDR1_CAST]], align 8 +// CHECK: ret void // CHECK: define dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E12kernel_name5() +// CHECK: entry: +// CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1 +// CHECK: [[H2:%.*]] = addrspacecast ptr [[H1]] to ptr addrspace(4) +// CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr [[H1]]) +// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) [[H2]]) +// CHECK: call void @llvm.lifetime.end.p0(i64 1, ptr [[H1]]) +// CHECK: ret void + +// CHECK: define {{.*}}spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv +// CHECK: entry: +// CHECK: [[ADDR_1:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast ptr [[ADDR_1]] to ptr addrspace(4) +// CHECK: store ptr addrspace(4) %this, ptr addrspace(4) [[ADDR1_CAST]], align 8 +// CHECK: %this1 = load ptr addrspace(4), ptr addrspace(4) [[ADDR1_CAST]], align 8 +// CHECK: call spir_func void @_Z4funcILi2EEvv() +// CHECK: ret void template void max_concurrency() { diff --git a/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp b/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp index 766fa44818acd..ac5ee2ba871ab 100644 --- a/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s // This test checks that compiler generates correct kernel wrapper for basic // case. @@ -58,3 +58,6 @@ int main() { // CHECK-SAME: %"struct.sycl::_V1::range"* noundef byval({{.*}}) align 4 [[ARANGE]], // CHECK-SAME: %"struct.sycl::_V1::range"* noundef byval({{.*}}) align 4 [[MRANGE]], // CHECK-SAME: %"struct.sycl::_V1::id"* noundef byval({{.*}}) align 4 [[OID]]) + +// Check lambda "()" operator call +// CHECK: call spir_func void @{{.*}}(%class.anon addrspace(4)* {{[^,]*}}) diff --git a/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp b/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp index 9923f75f6c402..24cb44bfe8cdd 100644 --- a/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s // Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]], // [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]], @@ -304,6 +304,7 @@ int main() { // Test attribute is not propagated. // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name32() #0 !kernel_arg_buffer_location ![[NUM]] + // CHECK: define {{.*}}spir_func void @{{.*}}Functor10{{.*}}(%class.Functor10 addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 // CHECK-NOT: noalias // CHECK-SAME: { // CHECK: define dso_local spir_func void @_Z4foo8v() @@ -311,12 +312,12 @@ int main() { h.single_task(f10); // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name33() #0 !kernel_arg_buffer_location ![[NUM]] - // CHECK: store %class.Foo8{{.*}} !noalias + // CHECK: define {{.*}}spir_func void @{{.*}}Foo8{{.*}}(%class.Foo8 addrspace(4)* noalias noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 Foo8 boo8; h.single_task(boo8); // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name34() #0 !kernel_arg_buffer_location ![[NUM]] - // CHECK: store %class.anon{{.*}} !noalias + // CHECK: define {{.*}}spir_func void @{{.*}}(%class.anon{{.*}} addrspace(4)* noalias noundef align 1 dereferenceable_or_null(1) %this) #4 align 2 h.single_task( []() [[intel::kernel_args_restrict]]{}); }); diff --git a/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp b/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp index 9dbf48e8460e0..2ef3295daecd7 100644 --- a/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s template T bar(T arg); @@ -22,5 +22,6 @@ int main() { return 0; } // CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE11fake_kernel() +// CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %this) // CHECK: define {{.*}}spir_func void @_Z3foov() // CHECK: define linkonce_odr spir_func noundef i32 @_Z3barIiET_S0_(i32 noundef %arg) diff --git a/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp b/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp index 991a49afd8e63..f6d5fad56cf52 100644 --- a/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s enum class test_type { value1, value2, value3 }; @@ -25,16 +25,17 @@ int main() { foo(global_value); // CHECK: call spir_func void @{{.*}}foo{{.*}}(i32 addrspace(4)* noundef align 4 dereferenceable(4) addrspacecast (i32 addrspace(1)* @{{.*}}global_value to i32 addrspace(4)*)) int a = my_array[0]; - // CHECK: store i32 42, i32 addrspace(4)* %a + // CHECK: [[LOAD:%[0-9]+]] = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @{{.*}}my_array to [1 x i32] addrspace(4)*), i64 0, i64 0) + // CHECK: store i32 [[LOAD]], i32 addrspace(4)* %a int b = some_const; // Constant used directly // CHECK: store i32 1, i32 addrspace(4)* %b foo(local_value); // Local variables and constexprs captured by lambda - // CHECK: [[GEP:%[a-z_.]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 0 + // CHECK: [[GEP:%[a-z_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 0 // CHECK: call spir_func void @{{.*}}foo{{.*}}(i32 addrspace(4)* noundef align 4 dereferenceable(4) [[GEP]]) int some_device_local_var = some_local_var; - // CHECK: [[GEP1:%[a-z_.]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 1 + // CHECK: [[GEP1:%[a-z_]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %{{.*}}, i32 0, i32 1 // CHECK: [[LOAD1:%[0-9]+]] = load i32, i32 addrspace(4)* [[GEP1]] // CHECK: store i32 [[LOAD1]], i32 addrspace(4)* %some_device_local_var }); diff --git a/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp b/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp index b4130fc321da2..cf80fcdb15580 100644 --- a/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT // This test checks IR generated when kernel_handler argument // (used to handle SYCL 2020 specialization constants) is passed @@ -33,3 +33,6 @@ void test(int val) { // NATIVESUPPORT-NOT: load i8 addrspace(1)*, i8 addrspace(1)** %_arg__specialization_constants_buffer.addr, align 8 // NATIVESUPPORT-NOT: addrspacecast i8 addrspace(1)* %{{[0-9]+}} to i8* // NATIVESUPPORT-NOT: call void @{{.*}}__init_specialization_constants_buffer{{.*}}(%"class.sycl::_V1::kernel_handler"* noundef align 4 nonnull align 1 dereferenceable(1) %kh, i8* noundef align 4 %{{[0-9]+}}) + +// ALL: call{{ spir_func | }}void @{{[a-zA-Z0-9_$]+}}kernel_handler{{[a-zA-Z0-9_$]+}} +// ALL-SAME: noundef byval(%"class.sycl::_V1::kernel_handler") diff --git a/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp b/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp index 43f7e9dcf6c14..b5d1c8dde8f34 100644 --- a/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s #include "sycl.hpp" @@ -19,10 +19,54 @@ // CHECK: ret void // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() [[ATTR0:#.*]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]] +// CHECK: entry: +// CHECK: [[F1:%.*]] = alloca [[CLASS_F1:%.*]], align 1 +// CHECK: [[F1_ASCAST:%.*]] = addrspacecast [[CLASS_F1]]* [[F1]] to [[CLASS_F1]] addrspace(4)* +// CHECK: [[TMP0:%.*]] = bitcast [[CLASS_F1]]* [[F1]] to i8* +// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP0]]) +// CHECK: call spir_func void @_ZNK8Functor1clEv([[CLASS_F1]] addrspace(4)* noundef align 1 dereferenceable_or_null(1) [[F1_ASCAST]]) +// CHECK: [[TMP1:%.*]] = bitcast [[CLASS_F1]]* [[F1]] to i8* +// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP1]]) +// CHECK: ret void // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() [[ATTR0]] {{.*}} !max_concurrency ![[NUM1:[0-9]+]] +// CHECK: entry +// CHECK: [[F3:%.*]] = alloca [[CLASS_F3:%.*]], align 1 +// CHECK: [[F3_ASCAST:%.*]] = addrspacecast [[CLASS_F3]]* [[F3]] to [[CLASS_F3]] addrspace(4)* +// CHECK: [[TMP2:%.*]] = bitcast [[CLASS_F3]]* [[F3]] to i8* +// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP2]]) +// CHECK: call spir_func void @_ZNK8Functor3ILi4EEclEv([[CLASS_F3]] addrspace(4)* noundef align 1 dereferenceable_or_null(1) [[F3_ASCAST]]) +// CHECK: [[TMP3:%.*]] = bitcast [[CLASS_F3]]* [[F3]] to i8* +// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP3]] +// CHECK: ret void + +// CHECK: define linkonce_odr spir_func void @_ZNK8Functor3ILi4EEclEv +// CHECK: entry: +// CHECK: [[ADDR_1:%.*]] = alloca [[CLASS_F3:%.*]] addrspace(4)*, align 8 +// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast [[CLASS_F3]] addrspace(4)** [[ADDR_1]] to [[CLASS_F3]] addrspace(4)* addrspace(4)* +// CHECK: store [[CLASS_F3]] addrspace(4)* %this, [[CLASS_F3]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8 +// CHECK: %this1 = load [[CLASS_F3]] addrspace(4)*, [[CLASS_F3]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8 +// CHECK: ret void // CHECK: define dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E12kernel_name5() +// CHECK: entry: +// CHECK: [[H1:%.*]] = alloca [[H:%.*]], align 1 +// CHECK: [[H2:%.*]] = addrspacecast [[H]]* [[H1]] to [[H]] addrspace(4)* +// CHECK: [[H3:%.*]] = bitcast [[H]]* [[H1]] to i8* +// CHECK: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[H3]]) +// CHECK: call spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv([[H]] addrspace(4)* noundef align 1 dereferenceable_or_null(1) [[H2]]) +// CHECK: [[TMP4:%.*]] = bitcast [[H]]* [[H1]] to i8* +// CHECK: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[TMP4]]) +// CHECK: ret void + +// CHECK: define {{.*}}spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv +// CHECK: entry: +// CHECK: [[ADDR_1:%.*]] = alloca [[HH:%.*]] addrspace(4)*, align 8 +// CHECK: [[ADDR1_CAST:%.*]] = addrspacecast [[HH]] addrspace(4)** [[ADDR_1]] to [[HH]] addrspace(4)* addrspace(4)* +// CHECK: store [[HH]] addrspace(4)* %this, [[HH]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8 +// CHECK: %this1 = load [[HH]] addrspace(4)*, [[HH]] addrspace(4)* addrspace(4)* [[ADDR1_CAST]], align 8 +// CHECK: call spir_func void @_Z4funcILi2EEvv() +// CHECK: ret void template void max_concurrency() { diff --git a/clang/test/CodeGenSYCL/no_opaque_sampler.cpp b/clang/test/CodeGenSYCL/no_opaque_sampler.cpp index 0e24a89a38c14..6165a54593cd6 100644 --- a/clang/test/CodeGenSYCL/no_opaque_sampler.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_sampler.cpp @@ -1,12 +1,12 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s // CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(%opencl.sampler_t addrspace(2)* [[SAMPLER_ARG:%[a-zA-Z0-9_]+]]) // CHECK-NEXT: entry: -// CHECK: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8 +// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8 // CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8 // CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast %class.anon* [[ANON]] to %class.anon addrspace(4)* // CHECK: store %opencl.sampler_t addrspace(2)* [[SAMPLER_ARG]], %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8 // CHECK-NEXT: [[BITCAST:%[0-9]+]] = bitcast %class.anon* [[ANON]] to i8* -// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[BITCAST]]) #4 // CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, %class.anon addrspace(4)* [[ANONCAST]], i32 0, i32 0 // CHECK-NEXT: [[LOAD_SAMPLER_ARG:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)* addrspace(4)* [[SAMPLER_ARG]].addr.ascast, align 8 // CHECK-NEXT: call spir_func void @{{[a-zA-Z0-9_]+}}(%"class.sycl::_V1::sampler" addrspace(4)* {{[^,]*}} [[GEP]], %opencl.sampler_t addrspace(2)* [[LOAD_SAMPLER_ARG]]) diff --git a/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp b/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp index 2dac44fba090e..8c914f8919b69 100644 --- a/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp @@ -1,20 +1,18 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { kernelFunc(); } -void myFunc() { } - int main() { // CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE15kernel_function() - // CHECK: call spir_func void @_Z6myFuncv() + // CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %{{.+}}) - // CHECK: define {{.*}}spir_func void @_Z6myFuncv() + // CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(%class.{{.*}}anon addrspace(4)* {{[^,]*}} %this) - kernel_single_task([]() { myFunc(); }); + kernel_single_task([]() {}); return 0; } diff --git a/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp b/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp index 2ed2f79fe930c..456e558cb18b0 100644 --- a/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { @@ -23,6 +23,7 @@ int main() { // CHECK: define {{.*}}spir_kernel void @_ZTSZ4test9enum_typeE15kernel_function(i32 noundef %_arg_val) // CHECK: getelementptr inbounds %class.anon, %class.anon addrspace(4)* + // CHECK: call spir_func void @_ZZ4test9enum_typeENKUlvE_clEv(%class.anon addrspace(4)* {{[^,]*}} %{{.+}}) test( enum_type::B ); return 0; diff --git a/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp b/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp index bddccaaf10dcf..0084e4296fe33 100644 --- a/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s // Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device. @@ -26,10 +26,12 @@ class Foo { int main() { q.submit([&](handler &h) { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]] + // CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]] h.single_task( FuncObj()); // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]] + // CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]] Foo f; h.single_task(f); @@ -37,6 +39,7 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3() // CHECK-NOT: !stall_enable // CHECK-SAME: { + // CHECK: define {{.*}}spir_func void @{{.*}}func{{.*}} !stall_enable ![[NUM4]] h.single_task( []() { func(); }); @@ -44,6 +47,7 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() // CHECK-NOT: !stall_enable // CHECK-SAME: { + // CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(%class.anon{{.*}} addrspace(4)* noundef align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM4]] h.single_task( []() { func1(); }); diff --git a/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp b/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp index 0dbcf0129c24e..a9092aae39e91 100644 --- a/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s // This test checks a kernel argument that is union with both array and non-array fields. @@ -39,3 +39,4 @@ int main() { // CHECK: [[MEMCPY_DST:%[0-9a-zA-Z_]+]] = bitcast %union.{{.*}}MyUnion addrspace(4)* [[L_STRUCT_ADDR]] to i8 addrspace(4)* // CHECK: [[MEMCPY_SRC:%[0-9a-zA-Z_]+]] = bitcast %union.{{.*}}MyUnion addrspace(4)* [[MEM_ARGAS]] to i8 addrspace(4)* // CHECK: call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 [[MEMCPY_DST]], i8 addrspace(4)* align 4 [[MEMCPY_SRC]], i64 12, i1 false) +// CHECK: call spir_func void @{{.*}}(%class.anon addrspace(4)* {{[^,]*}} [[LOCAL_OBJECTAS]]) diff --git a/clang/test/CodeGenSYCL/sampler.cpp b/clang/test/CodeGenSYCL/sampler.cpp index 455b939b49d3d..9fe2336fbcdd2 100644 --- a/clang/test/CodeGenSYCL/sampler.cpp +++ b/clang/test/CodeGenSYCL/sampler.cpp @@ -1,11 +1,11 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s // CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(ptr addrspace(2) [[SAMPLER_ARG:%[a-zA-Z0-9_]+]]) // CHECK-NEXT: entry: -// CHECK: [[SAMPLER_ARG]].addr = alloca ptr addrspace(2), align 8 +// CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca ptr addrspace(2), align 8 // CHECK: [[ANON:%[a-zA-Z0-9_]+]] = alloca %class.anon, align 8 // CHECK: [[ANONCAST:%[a-zA-Z0-9_.]+]] = addrspacecast ptr [[ANON]] to ptr addrspace(4) // CHECK: store ptr addrspace(2) [[SAMPLER_ARG]], ptr addrspace(4) [[SAMPLER_ARG]].addr.ascast, align 8 -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[ANON]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[ANON]]) #4 // CHECK-NEXT: [[GEP:%[a-zA-z0-9]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) [[ANONCAST]], i32 0, i32 0 // CHECK-NEXT: [[LOAD_SAMPLER_ARG:%[0-9]+]] = load ptr addrspace(2), ptr addrspace(4) [[SAMPLER_ARG]].addr.ascast, align 8 // CHECK-NEXT: call spir_func void @{{[a-zA-Z0-9_]+}}(ptr addrspace(4) {{[^,]*}} [[GEP]], ptr addrspace(2) [[LOAD_SAMPLER_ARG]]) diff --git a/clang/test/CodeGenSYCL/spir-calling-conv.cpp b/clang/test/CodeGenSYCL/spir-calling-conv.cpp index 9164773a91ccc..268699da6375c 100644 --- a/clang/test/CodeGenSYCL/spir-calling-conv.cpp +++ b/clang/test/CodeGenSYCL/spir-calling-conv.cpp @@ -1,20 +1,18 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { kernelFunc(); } -void myFunc() { } - int main() { // CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE15kernel_function() - // CHECK: call spir_func void @_Z6myFuncv() + // CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv(ptr addrspace(4) {{[^,]*}} %{{.+}}) - // CHECK: define {{.*}}spir_func void @_Z6myFuncv() + // CHECK: define internal spir_func void @_ZZ4mainENKUlvE_clEv(ptr addrspace(4) {{[^,]*}} %this) - kernel_single_task([]() { myFunc(); }); + kernel_single_task([]() {}); return 0; } diff --git a/clang/test/CodeGenSYCL/spir-enum.cpp b/clang/test/CodeGenSYCL/spir-enum.cpp index 754d85e8d4272..1fce8030280cf 100644 --- a/clang/test/CodeGenSYCL/spir-enum.cpp +++ b/clang/test/CodeGenSYCL/spir-enum.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { @@ -23,6 +23,7 @@ int main() { // CHECK: define {{.*}}spir_kernel void @_ZTSZ4test9enum_typeE15kernel_function(i32 noundef %_arg_val) // CHECK: getelementptr inbounds %class.anon, ptr addrspace(4) + // CHECK: call spir_func void @_ZZ4test9enum_typeENKUlvE_clEv(ptr addrspace(4) {{[^,]*}} %{{.+}}) test( enum_type::B ); return 0; diff --git a/clang/test/CodeGenSYCL/stall_enable_device.cpp b/clang/test/CodeGenSYCL/stall_enable_device.cpp index 490591ce3c04d..334131423d0ec 100644 --- a/clang/test/CodeGenSYCL/stall_enable_device.cpp +++ b/clang/test/CodeGenSYCL/stall_enable_device.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s // Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device. @@ -26,10 +26,12 @@ class Foo { int main() { q.submit([&](handler &h) { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() {{.*}} !stall_enable ![[NUM4:[0-9]+]] + // CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]] h.single_task( FuncObj()); // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() {{.*}} !stall_enable ![[NUM4]] + // CHECK define {{.*}}spir_func void @{{.*}}FooclEv(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM4]] Foo f; h.single_task(f); @@ -37,6 +39,7 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3() // CHECK-NOT: !stall_enable // CHECK-SAME: { + // CHECK: define {{.*}}spir_func void @{{.*}}func{{.*}} !stall_enable ![[NUM4]] h.single_task( []() { func(); }); @@ -44,6 +47,7 @@ int main() { // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() // CHECK-NOT: !stall_enable // CHECK-SAME: { + // CHECK: define {{.*}}spir_func void @{{.*}}func1{{.*}}(ptr addrspace(4) noundef align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM4]] h.single_task( []() { func1(); }); diff --git a/clang/test/CodeGenSYCL/sycl-device-static-init.cpp b/clang/test/CodeGenSYCL/sycl-device-static-init.cpp index eeb391248193f..d0161758e5ef9 100644 --- a/clang/test/CodeGenSYCL/sycl-device-static-init.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-static-init.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s // Test that static initializers do not force the emission of globals on sycl device // CHECK-NOT: $_ZN8BaseInitI12TestBaseTypeE15s_regbase_ncsdmE = comdat any @@ -7,6 +7,7 @@ // CHECK-NOT: @_ZN8BaseInitI12TestBaseTypeE15s_regbase_ncsdmE = weak_odr addrspace(1) global %struct._ZTS16RegisterBaseInit.RegisterBaseInit zeroinitializer, comdat, align 1 // CHECK-NOT: @_ZGVN8BaseInitI12TestBaseTypeE15s_regbase_ncsdmE = weak_odr global i64 0, comdat($_ZN8BaseInitI12TestBaseTypeE9s_regbaseE), align 8 // CHECK: define {{.*}}spir_kernel void @_ZTSZ4mainE11fake_kernel() +// CHECK: call spir_func void @_ZZ4mainENKUlvE_clEv struct TestBaseType {}; struct RegisterBaseInit { diff --git a/clang/test/CodeGenSYCL/union-kernel-param.cpp b/clang/test/CodeGenSYCL/union-kernel-param.cpp index 14a6e0e440232..0cd22146f5f9d 100644 --- a/clang/test/CodeGenSYCL/union-kernel-param.cpp +++ b/clang/test/CodeGenSYCL/union-kernel-param.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s // This test checks a kernel argument that is union with both array and non-array fields. @@ -33,3 +33,4 @@ int main() { // CHECK: [[MEM_ARGAS:%.*]] = addrspacecast ptr [[MEM_ARG]] to ptr addrspace(4) // CHECK: [[L_STRUCT_ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds %class.anon, ptr addrspace(4) [[LOCAL_OBJECTAS]], i32 0, i32 0 // CHECK: call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 [[L_STRUCT_ADDR]], ptr addrspace(4) align 4 [[MEM_ARGAS]], i64 12, i1 false) +// CHECK: call spir_func void @{{.*}}(ptr addrspace(4) {{[^,]*}} [[LOCAL_OBJECTAS]]) diff --git a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp index 6ea572b0f0922..dc0935195b439 100644 --- a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp +++ b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --check-prefixes=WIN,CHECK template __attribute__((sycl_kernel)) void kernel(Func F){ @@ -57,4 +57,11 @@ int main() { // CHECK: private unnamed_addr constant [17 x i8] c"_ZTSZ4mainEUlvE_\00" // CHECK: private unnamed_addr constant [17 x i8] c"_ZTSZ4mainEUliE_\00" // CHECK: private unnamed_addr constant [17 x i8] c"_ZTSZ4mainEUldE_\00" + + // On Windows, ensure that we haven't broken the 'lambda numbering' for thex + // lambda itself. + // WIN: define internal void @"??R([] {}); }); + + + // CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E16KernelNameInline() + // CHECK-NOT: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_ENKUlvE_clEv + q.submit([&](sycl::handler &h) { h.parallel_for([]() __attribute__((always_inline)) {}); }); + + // CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE1_clES2_E18KernelNameNoInline() + // CHECK: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE1_clES2_ENKUlvE_clEv + q.submit([&](sycl::handler &h) { h.parallel_for([]() __attribute__((noinline)) {}); }); + + /// The flag is ignored for ESIMD kernels + // CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE2_clES2_E15KernelNameESIMD() + // CHECK: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE2_clES2_ENKUlvE_clEv + q.submit([&](sycl::handler &h) { h.parallel_for([]() __attribute__((sycl_explicit_simd)) {}); }); + + return 0; +} diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index 891566525105b..e1c0c78bccf02 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -107,6 +107,12 @@ and not recommended to use in production environment. * nd_item class get_global_id()/get_global_linear_id() member functions Enabled by default. +**`-f[no]sycl-force-inline-kernel-lambda`** + + Enables/Disables inlining of the kernel lambda operator into the compiler + generated generated entry point function. This flag does not apply to ESIMD + kernels. + Enabled by default. **`-fgpu-inline-threshold=`** From e987074fadb50b7507985fefd6910c3fc7e4aedf Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 12 Oct 2022 09:48:23 +0100 Subject: [PATCH 3/6] [SYCL] Accept kernel lambda as clang argument --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 4 ++++ clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp | 2 +- sycl/doc/UsersManual.md | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 67ae72aa19058..cb9af9f7ffd89 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2942,7 +2942,7 @@ defm sycl_unnamed_lambda " >= clang::LangOptions::SYCLMajorVersion::SYCL_2020")>, PosFlag, NegFlag, BothFlags<[CC1Option, CoreOption], " unnamed SYCL lambda kernels">>; -defm sycl_inline_kernel_lambda +defm sycl_force_inline_kernel_lambda : BoolFOption< "sycl-force-inline-kernel-lambda", LangOpts<"SYCLForceInlineKernelLambda">, DefaultTrue, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 558debe95999b..73c2193a2fb46 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5123,6 +5123,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-sycl-std=2020"); } + if (!Args.hasFlag(options::OPT_fsycl_force_inline_kernel_lambda, + options::OPT_fno_sycl_force_inline_kernel_lambda, true)) + CmdArgs.push_back("-fno-sycl-force-inline-kernel-lambda"); + if (!Args.hasFlag(options::OPT_fsycl_unnamed_lambda, options::OPT_fno_sycl_unnamed_lambda, true)) CmdArgs.push_back("-fno-sycl-unnamed-lambda"); diff --git a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp index ba9c97a6edc79..bff837b71b874 100644 --- a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp +++ b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -Xclang -fno-sycl-force-inline-kernel-lambda -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s +// RUN: %clang -fno-sycl-force-inline-kernel-lambda -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s // // Verify the SYCL kernel routine is marked artificial and has the // expected source correlation. diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index e1c0c78bccf02..9fa1f8f9da282 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -107,7 +107,7 @@ and not recommended to use in production environment. * nd_item class get_global_id()/get_global_linear_id() member functions Enabled by default. -**`-f[no]sycl-force-inline-kernel-lambda`** +**`-f[no-]sycl-force-inline-kernel-lambda`** Enables/Disables inlining of the kernel lambda operator into the compiler generated generated entry point function. This flag does not apply to ESIMD From bc64500995fde72ed37585d2be437facf1b66b9e Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 12 Oct 2022 09:59:18 +0100 Subject: [PATCH 4/6] [SYCL] Fix new failing tests with lambda inlining --- clang/test/CodeGenSYCL/kernel_binding_decls.cpp | 2 +- clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGenSYCL/kernel_binding_decls.cpp b/clang/test/CodeGenSYCL/kernel_binding_decls.cpp index f6434057f98ec..21354f9348804 100644 --- a/clang/test/CodeGenSYCL/kernel_binding_decls.cpp +++ b/clang/test/CodeGenSYCL/kernel_binding_decls.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s #include "sycl.hpp" diff --git a/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp b/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp index d82f7caf54657..a7d82d65a8d9e 100644 --- a/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp +++ b/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s #include "sycl.hpp" From 0d3ff383752941839fd462a1598c6b974b495cca Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 12 Oct 2022 10:38:46 +0100 Subject: [PATCH 5/6] Update sycl/doc/UsersManual.md Co-authored-by: Steffen Larsen --- sycl/doc/UsersManual.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index 9fa1f8f9da282..608e8a5192102 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -110,7 +110,7 @@ and not recommended to use in production environment. **`-f[no-]sycl-force-inline-kernel-lambda`** Enables/Disables inlining of the kernel lambda operator into the compiler - generated generated entry point function. This flag does not apply to ESIMD + generated entry point function. This flag does not apply to ESIMD kernels. Enabled by default. From b611501ddc599d6ef1dd4e4421bef37198cbdda2 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 12 Oct 2022 15:32:51 +0100 Subject: [PATCH 6/6] [SYCL] Add driver test for -fno-sycl-force-inline-kernel-lambda --- clang/test/Driver/sycl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/test/Driver/sycl.c b/clang/test/Driver/sycl.c index fcdcf6e74beda..de43d054c857d 100644 --- a/clang/test/Driver/sycl.c +++ b/clang/test/Driver/sycl.c @@ -72,6 +72,11 @@ // RUN: %clang_cl -### -fsycl-device-only -fno-sycl-unnamed-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-LAMBDA // CHECK-NOT-LAMBDA: "-fno-sycl-unnamed-lambda" +// -fsycl-force-inline-kernel-lambda +// RUN: %clangxx -### -fsycl-device-only -fno-sycl-force-inline-kernel-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-INLINE +// RUN: %clang_cl -### -fsycl-device-only -fno-sycl-force-inline-kernel-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-INLINE +// CHECK-NOT-INLINE: "-fno-sycl-force-inline-kernel-lambda" + /// -fsycl-device-only triple checks // RUN: %clang -fsycl-device-only -target x86_64-unknown-linux-gnu -### %s 2>&1 \ // RUN: | FileCheck --check-prefix=DEVICE-64 %s