diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 4717fae090312..00daa04c13442 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -279,6 +279,7 @@ LANGOPT(IntelFPGA , 1, 0, "Perform ahead-of-time compilation for FPGA") LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code") LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters") LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels") +LANGOPT(SYCLForceInlineKernelLambda , 1, 0, "Force inline SYCL kernel lambdas in entry point") LANGOPT(SYCLESIMDForceStatelessMem, 1, 0, "Make accessors use USM memory in ESIMD kernels") ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used") LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 86aa1027ffda4..cb9af9f7ffd89 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2942,6 +2942,12 @@ defm sycl_unnamed_lambda " >= clang::LangOptions::SYCLMajorVersion::SYCL_2020")>, PosFlag, NegFlag, BothFlags<[CC1Option, CoreOption], " unnamed SYCL lambda kernels">>; +defm sycl_force_inline_kernel_lambda + : BoolFOption< + "sycl-force-inline-kernel-lambda", LangOpts<"SYCLForceInlineKernelLambda">, + DefaultTrue, + PosFlag, NegFlag, + BothFlags<[CC1Option, CoreOption], " force inline SYCL kernels lambda in entry point">>; def fsycl_help_EQ : Joined<["-"], "fsycl-help=">, Flags<[NoXarchOption, CoreOption]>, HelpText<"Emit help information from the " "related offline compilation tool. Valid values: all, fpga, gen, x86_64.">, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 558debe95999b..73c2193a2fb46 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5123,6 +5123,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-sycl-std=2020"); } + if (!Args.hasFlag(options::OPT_fsycl_force_inline_kernel_lambda, + options::OPT_fno_sycl_force_inline_kernel_lambda, true)) + CmdArgs.push_back("-fno-sycl-force-inline-kernel-lambda"); + if (!Args.hasFlag(options::OPT_fsycl_unnamed_lambda, options::OPT_fno_sycl_unnamed_lambda, true)) CmdArgs.push_back("-fno-sycl-unnamed-lambda"); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ec2803b37159b..81c8867373e19 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -837,6 +837,18 @@ class SingleDeviceFunctionTracker { CallGraphNode *KernelNode = Parent.getNodeForKernel(SYCLKernel); llvm::SmallVector CallStack; VisitCallNode(KernelNode, GetFDFromNode(KernelNode), CallStack); + + // Always inline the KernelBody in the kernel entry point. For ESIMD + // inlining is handled later down the pipeline. + if (KernelBody && + Parent.SemaRef.getLangOpts().SYCLForceInlineKernelLambda && + !KernelBody->hasAttr() && + !KernelBody->hasAttr() && + !KernelBody->hasAttr()) { + KernelBody->addAttr(AlwaysInlineAttr::CreateImplicit( + KernelBody->getASTContext(), {}, AttributeCommonInfo::AS_Keyword, + AlwaysInlineAttr::Keyword_forceinline)); + } } public: diff --git a/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp b/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp index 327caa1db18ef..7e4973c60f044 100644 --- a/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp +++ b/clang/test/CodeGenSYCL/check-direct-attribute-propagation.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s // Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]], // [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]], diff --git a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp index d66a69002dace..bff837b71b874 100644 --- a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp +++ b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s +// RUN: %clang -fno-sycl-force-inline-kernel-lambda -fsycl-device-only %s -S -emit-llvm -O0 -g -o - | FileCheck %s // // Verify the SYCL kernel routine is marked artificial and has the // expected source correlation. diff --git a/clang/test/CodeGenSYCL/device-functions.cpp b/clang/test/CodeGenSYCL/device-functions.cpp index e19f29b1b3cda..6415da9e8ef56 100644 --- a/clang/test/CodeGenSYCL/device-functions.cpp +++ b/clang/test/CodeGenSYCL/device-functions.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s template T bar(T arg); diff --git a/clang/test/CodeGenSYCL/device-variables.cpp b/clang/test/CodeGenSYCL/device-variables.cpp index 6559a34984def..f8c572a0e982d 100644 --- a/clang/test/CodeGenSYCL/device-variables.cpp +++ b/clang/test/CodeGenSYCL/device-variables.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s enum class test_type { value1, value2, value3 }; diff --git a/clang/test/CodeGenSYCL/kernel-handler.cpp b/clang/test/CodeGenSYCL/kernel-handler.cpp index e0b0349864d05..f94d416e6b260 100644 --- a/clang/test/CodeGenSYCL/kernel-handler.cpp +++ b/clang/test/CodeGenSYCL/kernel-handler.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT // This test checks IR generated when kernel_handler argument // (used to handle SYCL 2020 specialization constants) is passed diff --git a/clang/test/CodeGenSYCL/kernel_binding_decls.cpp b/clang/test/CodeGenSYCL/kernel_binding_decls.cpp index f6434057f98ec..21354f9348804 100644 --- a/clang/test/CodeGenSYCL/kernel_binding_decls.cpp +++ b/clang/test/CodeGenSYCL/kernel_binding_decls.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s #include "sycl.hpp" diff --git a/clang/test/CodeGenSYCL/max-concurrency.cpp b/clang/test/CodeGenSYCL/max-concurrency.cpp index 333c75f4c170d..82465ebafae52 100644 --- a/clang/test/CodeGenSYCL/max-concurrency.cpp +++ b/clang/test/CodeGenSYCL/max-concurrency.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -opaque-pointers -emit-llvm -o - %s | FileCheck %s #include "sycl.hpp" diff --git a/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp b/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp index d82f7caf54657..a7d82d65a8d9e 100644 --- a/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp +++ b/clang/test/CodeGenSYCL/no-opaque-ptr-kernel_binding_decls.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -internal-isystem %S/Inputs -fsycl-is-device -std=c++20 -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s #include "sycl.hpp" diff --git a/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp b/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp index d207aebfe86b8..ac5ee2ba871ab 100644 --- a/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_basic-kernel-wrapper.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s // This test checks that compiler generates correct kernel wrapper for basic // case. diff --git a/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp b/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp index 5790ff5f7a9f2..24cb44bfe8cdd 100644 --- a/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_check-direct-attribute-propagation.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s // Tests for IR of [[intel::scheduler_target_fmax_mhz()]], [[intel::num_simd_work_items()]], // [[intel::no_global_work_offset()]], [[intel::max_global_work_dim()]], [[sycl::reqd_sub_group_size()]], diff --git a/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp b/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp index 555dfadc3b14a..2ef3295daecd7 100644 --- a/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_device-functions.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s template T bar(T arg); diff --git a/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp b/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp index c5a19a068dea7..f6d5fad56cf52 100644 --- a/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_device-variables.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s enum class test_type { value1, value2, value3 }; diff --git a/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp b/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp index 97828a7d8e2fc..cf80fcdb15580 100644 --- a/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_kernel-handler.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NONATIVESUPPORT +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,NATIVESUPPORT // This test checks IR generated when kernel_handler argument // (used to handle SYCL 2020 specialization constants) is passed diff --git a/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp b/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp index b6e22eb3fdc39..b5d1c8dde8f34 100644 --- a/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_max-concurrency.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown -sycl-std=2020 -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s #include "sycl.hpp" diff --git a/clang/test/CodeGenSYCL/no_opaque_sampler.cpp b/clang/test/CodeGenSYCL/no_opaque_sampler.cpp index 0b99141677fdf..6165a54593cd6 100644 --- a/clang/test/CodeGenSYCL/no_opaque_sampler.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_sampler.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s // CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(%opencl.sampler_t addrspace(2)* [[SAMPLER_ARG:%[a-zA-Z0-9_]+]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca %opencl.sampler_t addrspace(2)*, align 8 diff --git a/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp b/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp index bcdff2a97613f..8c914f8919b69 100644 --- a/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_spir-calling-conv.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { diff --git a/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp b/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp index c251c2e829488..456e558cb18b0 100644 --- a/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_spir-enum.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { diff --git a/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp b/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp index c9d531cdf00f0..0084e4296fe33 100644 --- a/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_stall_enable_device.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm -o - %s | FileCheck %s // Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device. diff --git a/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp b/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp index 772e3fce4a4bc..a9092aae39e91 100644 --- a/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_union-kernel-param.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s // This test checks a kernel argument that is union with both array and non-array fields. diff --git a/clang/test/CodeGenSYCL/sampler.cpp b/clang/test/CodeGenSYCL/sampler.cpp index 600269324580a..9fe2336fbcdd2 100644 --- a/clang/test/CodeGenSYCL/sampler.cpp +++ b/clang/test/CodeGenSYCL/sampler.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck --enable-var-scope %s // CHECK: define {{.*}}spir_kernel void @{{[a-zA-Z0-9_]+}}(ptr addrspace(2) [[SAMPLER_ARG:%[a-zA-Z0-9_]+]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[SAMPLER_ARG]].addr = alloca ptr addrspace(2), align 8 diff --git a/clang/test/CodeGenSYCL/spir-calling-conv.cpp b/clang/test/CodeGenSYCL/spir-calling-conv.cpp index c345ee03be46c..268699da6375c 100644 --- a/clang/test/CodeGenSYCL/spir-calling-conv.cpp +++ b/clang/test/CodeGenSYCL/spir-calling-conv.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { diff --git a/clang/test/CodeGenSYCL/spir-enum.cpp b/clang/test/CodeGenSYCL/spir-enum.cpp index cc0978a844478..1fce8030280cf 100644 --- a/clang/test/CodeGenSYCL/spir-enum.cpp +++ b/clang/test/CodeGenSYCL/spir-enum.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { diff --git a/clang/test/CodeGenSYCL/stall_enable_device.cpp b/clang/test/CodeGenSYCL/stall_enable_device.cpp index 7e6183b397ba9..334131423d0ec 100644 --- a/clang/test/CodeGenSYCL/stall_enable_device.cpp +++ b/clang/test/CodeGenSYCL/stall_enable_device.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm -o - %s | FileCheck %s // Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device. diff --git a/clang/test/CodeGenSYCL/sycl-device-static-init.cpp b/clang/test/CodeGenSYCL/sycl-device-static-init.cpp index e9a1d7f58f653..d0161758e5ef9 100644 --- a/clang/test/CodeGenSYCL/sycl-device-static-init.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-static-init.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s // Test that static initializers do not force the emission of globals on sycl device // CHECK-NOT: $_ZN8BaseInitI12TestBaseTypeE15s_regbase_ncsdmE = comdat any diff --git a/clang/test/CodeGenSYCL/union-kernel-param.cpp b/clang/test/CodeGenSYCL/union-kernel-param.cpp index d34dec879992c..0cd22146f5f9d 100644 --- a/clang/test/CodeGenSYCL/union-kernel-param.cpp +++ b/clang/test/CodeGenSYCL/union-kernel-param.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -opaque-pointers -emit-llvm %s -o - | FileCheck %s // This test checks a kernel argument that is union with both array and non-array fields. diff --git a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp index fb02c1b876106..dc0935195b439 100644 --- a/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp +++ b/clang/test/CodeGenSYCL/unique_stable_name_windows_diff.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --check-prefixes=WIN,CHECK +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -triple spir64-unknown-unknown -aux-triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -triple x86_64-pc-windows-msvc -fsycl-is-device -disable-llvm-passes -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --check-prefixes=WIN,CHECK template __attribute__((sycl_kernel)) void kernel(Func F){ diff --git a/clang/test/Driver/sycl.c b/clang/test/Driver/sycl.c index fcdcf6e74beda..de43d054c857d 100644 --- a/clang/test/Driver/sycl.c +++ b/clang/test/Driver/sycl.c @@ -72,6 +72,11 @@ // RUN: %clang_cl -### -fsycl-device-only -fno-sycl-unnamed-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-LAMBDA // CHECK-NOT-LAMBDA: "-fno-sycl-unnamed-lambda" +// -fsycl-force-inline-kernel-lambda +// RUN: %clangxx -### -fsycl-device-only -fno-sycl-force-inline-kernel-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-INLINE +// RUN: %clang_cl -### -fsycl-device-only -fno-sycl-force-inline-kernel-lambda %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT-INLINE +// CHECK-NOT-INLINE: "-fno-sycl-force-inline-kernel-lambda" + /// -fsycl-device-only triple checks // RUN: %clang -fsycl-device-only -target x86_64-unknown-linux-gnu -### %s 2>&1 \ // RUN: | FileCheck --check-prefix=DEVICE-64 %s diff --git a/clang/test/SemaSYCL/sycl-force-inline-kernel-lambda.cpp b/clang/test/SemaSYCL/sycl-force-inline-kernel-lambda.cpp new file mode 100644 index 0000000000000..8e8a3d3b3606f --- /dev/null +++ b/clang/test/SemaSYCL/sycl-force-inline-kernel-lambda.cpp @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-INLINE +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-INLINE + +#include "sycl.hpp" + +int main() { + sycl::queue q; + + // CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E10KernelName() + // + // CHECK-NO-INLINE: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv + // CHECK-INLINE-NOT: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv + q.submit([&](sycl::handler &h) { h.parallel_for([] {}); }); + + + // CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_E16KernelNameInline() + // CHECK-NOT: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE0_clES2_ENKUlvE_clEv + q.submit([&](sycl::handler &h) { h.parallel_for([]() __attribute__((always_inline)) {}); }); + + // CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE1_clES2_E18KernelNameNoInline() + // CHECK: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE1_clES2_ENKUlvE_clEv + q.submit([&](sycl::handler &h) { h.parallel_for([]() __attribute__((noinline)) {}); }); + + /// The flag is ignored for ESIMD kernels + // CHECK: define {{.*}} spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE2_clES2_E15KernelNameESIMD() + // CHECK: call void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE2_clES2_ENKUlvE_clEv + q.submit([&](sycl::handler &h) { h.parallel_for([]() __attribute__((sycl_explicit_simd)) {}); }); + + return 0; +} diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md index 891566525105b..608e8a5192102 100644 --- a/sycl/doc/UsersManual.md +++ b/sycl/doc/UsersManual.md @@ -107,6 +107,12 @@ and not recommended to use in production environment. * nd_item class get_global_id()/get_global_linear_id() member functions Enabled by default. +**`-f[no-]sycl-force-inline-kernel-lambda`** + + Enables/Disables inlining of the kernel lambda operator into the compiler + generated entry point function. This flag does not apply to ESIMD + kernels. + Enabled by default. **`-fgpu-inline-threshold=`**