diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index e1199329506ce..391d067ee289b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3551,7 +3551,12 @@ def fsycl_esimd : Flag<["-"], "fsycl-explicit-simd">, Group, Flags<[ def fno_sycl_esimd : Flag<["-"], "fno-sycl-explicit-simd">, Group, HelpText<"Disable SYCL explicit SIMD extension">, Flags<[NoArgumentUnused, CoreOption]>; defm sycl_early_optimizations : OptOutFFlag<"sycl-early-optimizations", "Enable", "Disable", " standard optimization pipeline for SYCL device compiler", [CoreOption]>; - +def fsycl_dead_args_optimization : Flag<["-"], "fsycl-dead-args-optimization">, + Group, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Enables " + "elimination of DPC++ dead kernel arguments">; +def fno_sycl_dead_args_optimization : Flag<["-"], "fno-sycl-dead-args-optimization">, + Group, Flags<[NoArgumentUnused, CoreOption]>, HelpText<"Disables " + "elimination of DPC++ dead kernel arguments">; //===----------------------------------------------------------------------===// // CC1 Options //===----------------------------------------------------------------------===// diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ff2d3e5065d16..43372ea755cbd 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3514,6 +3514,9 @@ class OffloadingActionBuilder final { /// Flag to signal if the user requested device code split. bool DeviceCodeSplit = false; + /// Flag to signal if DAE optimization is turned on. + bool EnableDAE = false; + /// The SYCL actions for the current input. ActionList SYCLDeviceActions; @@ -3951,7 +3954,7 @@ class OffloadingActionBuilder final { ActionList WrapperInputs; // post link is not optional - even if not splitting, always need to // process specialization constants - bool MultiFileActionDeps = !isSpirvAOT || DeviceCodeSplit; + bool MultiFileActionDeps = !isSpirvAOT || DeviceCodeSplit || EnableDAE; types::ID PostLinkOutType = isNVPTX || !MultiFileActionDeps ? types::TY_LLVM_BC : types::TY_Tempfiletable; @@ -4108,6 +4111,9 @@ class OffloadingActionBuilder final { WrapDeviceOnlyBinary = Args.hasArg(options::OPT_fsycl_link_EQ); auto *DeviceCodeSplitArg = Args.getLastArg(options::OPT_fsycl_device_code_split_EQ); + EnableDAE = + Args.hasFlag(options::OPT_fsycl_dead_args_optimization, + options::OPT_fno_sycl_dead_args_optimization, false); // -fsycl-device-code-split is an alias to // -fsycl-device-code-split=per_source DeviceCodeSplit = DeviceCodeSplitArg && diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index cfd2e992061c3..8d1d5f1d62641 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4123,6 +4123,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-sycl-opt"); } + // Turn on Dead Parameter Elimination Optimization with early optimizations + if (!RawTriple.isNVPTX() && + Args.hasFlag(options::OPT_fsycl_dead_args_optimization, + options::OPT_fno_sycl_dead_args_optimization, false)) + CmdArgs.push_back("-fenable-sycl-dae"); // Pass the triple of host when doing SYCL auto AuxT = llvm::Triple(llvm::sys::getProcessTriple()); @@ -7807,6 +7812,11 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, // OPT_fsycl_device_code_split is not checked as it is an alias to // -fsycl-device-code-split=per_source + // Turn on Dead Parameter Elimination Optimization with early optimizations + if (!getToolChain().getTriple().isNVPTX() && + TCArgs.hasFlag(options::OPT_fsycl_dead_args_optimization, + options::OPT_fno_sycl_dead_args_optimization, false)) + addArgs(CmdArgs, TCArgs, {"-emit-param-info"}); if (JA.getType() == types::TY_LLVM_BC) { // single file output requested - this means only perform necessary IR // transformations (like specialization constant intrinsic lowering) and diff --git a/clang/test/Driver/sycl-device-optimizations.cpp b/clang/test/Driver/sycl-device-optimizations.cpp index 7080bcedbd065..71e1f345df652 100644 --- a/clang/test/Driver/sycl-device-optimizations.cpp +++ b/clang/test/Driver/sycl-device-optimizations.cpp @@ -28,3 +28,11 @@ // RUN: %clang_cl -### -fsycl -fintelfpga %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-NO-SYCL-EARLY-OPTS %s // CHECK-NO-SYCL-EARLY-OPTS: "-fno-sycl-early-optimizations" + +/// Check that Dead Parameter Elimination Optimization is enabled +// RUN: %clang -### -fsycl -fsycl-dead-args-optimization %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-DAE %s +// RUN: %clang_cl -### -fsycl -fsycl-dead-args-optimization %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-DAE %s +// CHECK-DAE: clang{{.*}} "-fenable-sycl-dae" +// CHECK-DAE: sycl-post-link{{.*}} "-emit-param-info" diff --git a/sycl/test/basic_tests/sampler/sampler.cpp b/sycl/test/basic_tests/sampler/sampler.cpp index cf87d58475b4f..fc8158dae9cef 100644 --- a/sycl/test/basic_tests/sampler/sampler.cpp +++ b/sycl/test/basic_tests/sampler/sampler.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t.out -L %opencl_libs_dir -lOpenCL // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/kernel_from_file/hw.cpp b/sycl/test/kernel_from_file/hw.cpp index b7c4e573be65c..d073be5ff40c4 100644 --- a/sycl/test/kernel_from_file/hw.cpp +++ b/sycl/test/kernel_from_file/hw.cpp @@ -2,7 +2,9 @@ // CUDA does not support SPIR-V. //-fsycl-targets=%sycl_triple -// RUN: %clangxx -fsycl-device-only -fno-sycl-use-bitcode -Xclang -fsycl-int-header=%t.h -c %s -o %t.spv -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict +// Runtime assumes that the image passed with SYCL_USE_KERNEL_SPV has no +// eliminated arguments, compile without early optimizations. +// RUN: %clangxx -fsycl-device-only -fno-sycl-early-optimizations -fno-sycl-use-bitcode -Xclang -fsycl-int-header=%t.h -c %s -o %t.spv -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning -Wno-sycl-strict // RUN: %clangxx -include %t.h -g %s -o %t.out -lsycl -I %sycl_include -Xclang -verify-ignore-unexpected=note,warning // RUN: env SYCL_BE=%sycl_be SYCL_USE_KERNEL_SPV=%t.spv %t.out | FileCheck %s // CHECK: Passed diff --git a/sycl/test/multi_ptr/multi_ptr.cpp b/sycl/test/multi_ptr/multi_ptr.cpp index 57f2f1c0d9a2d..3d80ded385303 100644 --- a/sycl/test/multi_ptr/multi_ptr.cpp +++ b/sycl/test/multi_ptr/multi_ptr.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// RUN: %clangxx -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out +// RUN: %clangxx -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization %s -o %t1.out // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out diff --git a/sycl/test/scheduler/HandleException.cpp b/sycl/test/scheduler/HandleException.cpp index a9fbd3cc9d8d9..365a84ee9411e 100644 --- a/sycl/test/scheduler/HandleException.cpp +++ b/sycl/test/scheduler/HandleException.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out #include #include diff --git a/sycl/test/scheduler/HostAccDestruction.cpp b/sycl/test/scheduler/HostAccDestruction.cpp index fda1c29298c0c..6b3fa8f107fed 100644 --- a/sycl/test/scheduler/HostAccDestruction.cpp +++ b/sycl/test/scheduler/HostAccDestruction.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -I %sycl_source_dir %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out // RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER //==---------------------- HostAccDestruction.cpp --------------------------==// // diff --git a/sycl/test/scheduler/ReleaseResourcesTest.cpp b/sycl/test/scheduler/ReleaseResourcesTest.cpp index c62353ee91b89..0b8c6b12abfea 100644 --- a/sycl/test/scheduler/ReleaseResourcesTest.cpp +++ b/sycl/test/scheduler/ReleaseResourcesTest.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: env SYCL_PI_TRACE=2 %CPU_RUN_PLACEHOLDER %t.out 2>&1 %CPU_CHECK_PLACEHOLDER // RUN: env SYCL_PI_TRACE=2 %GPU_RUN_PLACEHOLDER %t.out 2>&1 %GPU_CHECK_PLACEHOLDER diff --git a/sycl/test/separate-compile/test.cpp b/sycl/test/separate-compile/test.cpp index 076de5fc0e9da..60644ae3610df 100644 --- a/sycl/test/separate-compile/test.cpp +++ b/sycl/test/separate-compile/test.cpp @@ -3,13 +3,13 @@ // // >> ---- compile src1 // >> device compilation... -// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_a.h %s -c -o a_kernel.bc -I %sycl_include -Wno-sycl-strict +// RUN: %clangxx -fsycl-device-only -fno-sycl-early-optimizations -Xclang -fsycl-int-header=sycl_ihdr_a.h %s -c -o a_kernel.bc -I %sycl_include -Wno-sycl-strict // >> host compilation... // RUN: %clangxx -include sycl_ihdr_a.h -g -c %s -o a.o -I %sycl_include -Wno-sycl-strict // // >> ---- compile src2 // >> device compilation... -// RUN: %clangxx -DB_CPP=1 -fsycl-device-only -Xclang -fsycl-int-header=sycl_ihdr_b.h %s -c -o b_kernel.bc -I %sycl_include -Wno-sycl-strict +// RUN: %clangxx -DB_CPP=1 -fsycl-device-only -fno-sycl-early-optimizations -Xclang -fsycl-int-header=sycl_ihdr_b.h %s -c -o b_kernel.bc -I %sycl_include -Wno-sycl-strict // >> host compilation... // RUN: %clangxx -DB_CPP=1 -include sycl_ihdr_b.h -g -c %s -o b.o -I %sycl_include -Wno-sycl-strict // diff --git a/sycl/test/usm/pfor_flatten.cpp b/sycl/test/usm/pfor_flatten.cpp index 4445e267e32e8..e64119c489c26 100644 --- a/sycl/test/usm/pfor_flatten.cpp +++ b/sycl/test/usm/pfor_flatten.cpp @@ -1,7 +1,7 @@ // UNSUPPORTED: cuda // CUDA does not support the unnamed lambda extension. // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda %s -o %t1.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda -fsycl-dead-args-optimization %s -o %t1.out // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out