diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 84baae14b5011..6553b83ffde6f 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4204,8 +4204,11 @@ class OffloadingActionBuilder final { // device objects for future host link. Device libraries should // be linked by default to resolve any undefined reference. const auto *TC = ToolChains.front(); - if (TC->getTriple().getSubArch() != - llvm::Triple::SPIRSubArch_fpga) { + llvm::Triple TT(TC->getTriple()); + bool isAOT = TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga || + TT.getSubArch() == llvm::Triple::SPIRSubArch_gen || + TT.getSubArch() == llvm::Triple::SPIRSubArch_x86_64; + if (TT.getSubArch() != llvm::Triple::SPIRSubArch_fpga) { SYCLDeviceLibLinked = addSYCLDeviceLibs(TC, FullSYCLLinkBinaryList, true, C.getDefaultToolChain() @@ -4220,11 +4223,26 @@ class OffloadingActionBuilder final { else FullDeviceLinkAction = DeviceLinkAction; auto *PostLinkAction = C.MakeAction( - FullDeviceLinkAction, types::TY_LLVM_BC, types::TY_LLVM_BC); + FullDeviceLinkAction, types::TY_LLVM_BC, + types::TY_Tempfiletable); + PostLinkAction->setRTSetsSpecConstants(!isAOT); + auto *ExtractIRFilesAction = C.MakeAction( + PostLinkAction, types::TY_Tempfilelist, types::TY_Tempfilelist); + // single column w/o title fits TY_Tempfilelist format + ExtractIRFilesAction->addExtractColumnTform( + FileTableTformJobAction::COL_CODE, false /*drop titles*/); auto *TranslateAction = C.MakeAction( - PostLinkAction, types::TY_Image); + ExtractIRFilesAction, types::TY_Tempfilelist); + + ActionList TformInputs{PostLinkAction, TranslateAction}; + auto *ReplaceFilesAction = C.MakeAction( + TformInputs, types::TY_Tempfiletable, types::TY_Tempfiletable); + ReplaceFilesAction->addReplaceColumnTform( + FileTableTformJobAction::COL_CODE, + FileTableTformJobAction::COL_CODE); + SYCLLinkBinary = C.MakeAction( - TranslateAction, types::TY_Object); + ReplaceFilesAction, types::TY_Object); } else { auto *Link = C.MakeAction(SYCLLinkBinaryList, types::TY_Image); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 72f0f961732e2..24a22a2b9772b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -695,6 +695,29 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args, } } +/// Simple check to see if the optimization level is at -O2 or higher. +/// For -fsycl (DPC++) -O2 is the default. +static bool isSYCLOptimizationO2orHigher(const ArgList &Args) { + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + return true; + + if (A->getOption().matches(options::OPT_O0)) + return false; + + assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); + + StringRef S(A->getValue()); + unsigned OptLevel = 0; + if (S.getAsInteger(10, OptLevel)) + return false; + return OptLevel > 1; + } + // No -O setting seen, default is -O2 for device. + return true; +} + /// Vectorize at all optimization levels greater than 1 except for -Oz. /// For -Oz the loop vectorizer is disabled, while the slp vectorizer is /// enabled. @@ -4738,9 +4761,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } // Turn on Dead Parameter Elimination Optimization with early optimizations + // TODO: Enable DAE by default without the Optimization level check in the + // driver. The enabling can be done in CodeGenOpt, and we can pass an + // option to explicitly disable/disable here. if (!(RawTriple.isAMDGCN()) && Args.hasFlag(options::OPT_fsycl_dead_args_optimization, - options::OPT_fno_sycl_dead_args_optimization, false)) + options::OPT_fno_sycl_dead_args_optimization, + isSYCLOptimizationO2orHigher(Args))) CmdArgs.push_back("-fenable-sycl-dae"); bool IsMSVC = AuxT.isWindowsMSVCEnvironment(); if (IsMSVC) { @@ -9134,7 +9161,8 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, // Turn on Dead Parameter Elimination Optimization with early optimizations if (!(getToolChain().getTriple().isAMDGCN()) && TCArgs.hasFlag(options::OPT_fsycl_dead_args_optimization, - options::OPT_fno_sycl_dead_args_optimization, false)) + options::OPT_fno_sycl_dead_args_optimization, + isSYCLOptimizationO2orHigher(TCArgs))) addArgs(CmdArgs, TCArgs, {"-emit-param-info"}); // Enable PI program metadata if (getToolChain().getTriple().isNVPTX()) diff --git a/clang/test/Driver/sycl-device-lib.cpp b/clang/test/Driver/sycl-device-lib.cpp index 69ab4dced9891..2bcc5967b751f 100644 --- a/clang/test/Driver/sycl-device-lib.cpp +++ b/clang/test/Driver/sycl-device-lib.cpp @@ -128,7 +128,7 @@ /// ########################################################################### /// test llvm-link behavior for fno-sycl-device-lib -// RUN: %clangxx -fsycl -fno-sycl-device-lib=all %s -### 2>&1 \ +// RUN: %clangxx -fsycl -fno-sycl-dead-args-optimization -fno-sycl-device-lib=all %s -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_LLVM_LINK_NO_DEVICE_LIB // SYCL_LLVM_LINK_NO_DEVICE_LIB: clang{{.*}} "-cc1" {{.*}} "-fsycl-is-device" // SYCL_LLVM_LINK_NO_DEVICE_LIB-NOT: llvm-link{{.*}} "-only-needed" diff --git a/clang/test/Driver/sycl-device-optimizations.cpp b/clang/test/Driver/sycl-device-optimizations.cpp index f43b7347f5555..a24bd73fcdb6c 100644 --- a/clang/test/Driver/sycl-device-optimizations.cpp +++ b/clang/test/Driver/sycl-device-optimizations.cpp @@ -30,13 +30,21 @@ // CHECK-NO-SYCL-EARLY-OPTS: "-fno-sycl-early-optimizations" /// Check that Dead Parameter Elimination Optimization is enabled -// RUN: %clang -### -fsycl -fsycl-dead-args-optimization %s 2>&1 \ +// RUN: %clang -### -fsycl %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-DAE %s -// RUN: %clang_cl -### -fsycl -fsycl-dead-args-optimization %s 2>&1 \ +// RUN: %clang_cl -### -fsycl %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-DAE %s // CHECK-DAE: clang{{.*}} "-fenable-sycl-dae" // CHECK-DAE: sycl-post-link{{.*}} "-emit-param-info" +/// Check that Dead Parameter Elimination Optimization is disabled +// RUN: %clang -### -fsycl -fno-sycl-dead-args-optimization %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-NO-DAE %s +// RUN: %clang_cl -### -fsycl -fno-sycl-dead-args-optimization %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-NO-DAE %s +// CHECK-NO-DAE-NOT: clang{{.*}} "-fenable-sycl-dae" +// CHECK-NO-DAE-NOT: sycl-post-link{{.*}} "-emit-param-info" + // Check "-fgpu-inline-threshold" is passed to the front-end: // RUN: %clang -### -fsycl -fgpu-inline-threshold=100000 %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-THRESH %s diff --git a/clang/test/Driver/sycl-intelfpga-static-lib.cpp b/clang/test/Driver/sycl-intelfpga-static-lib.cpp index b9f5c12c7ac45..768d4dc783ace 100644 --- a/clang/test/Driver/sycl-intelfpga-static-lib.cpp +++ b/clang/test/Driver/sycl-intelfpga-static-lib.cpp @@ -10,7 +10,7 @@ // RUN: llvm-ar cr %t.a %t1_bundle.o /// Check phases with static lib -// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -fintelfpga %t.a -ccc-print-phases 2>&1 \ +// RUN: %clangxx -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-dead-args-optimization -fno-sycl-device-lib=all -fintelfpga %t.a -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK_PHASES %s // CHECK_PHASES: 0: input, "[[INPUT:.+\.a]]", object, (host-sycl) // CHECK_PHASES: 1: linker, {0}, image, (host-sycl) diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 6ea8c915837ae..fee4f71de6153 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -471,9 +471,11 @@ // CHK-LINK-UB: 1: clang-offload-unbundler, {0}, object // CHK-LINK-UB: 2: linker, {1}, image, (device-sycl) // CHK-LINK-UB: 3: sycl-post-link, {2}, ir, (device-sycl) -// CHK-LINK-UB: 4: llvm-spirv, {3}, image, (device-sycl) -// CHK-LINK-UB: 5: clang-offload-wrapper, {4}, object, (device-sycl) -// CHK-LINK-UB: 6: offload, "device-sycl (spir64-unknown-unknown)" {5}, object +// CHK-LINK-UB: 4: file-table-tform, {3}, tempfilelist, (device-sycl) +// CHK-LINK-UB: 5: llvm-spirv, {4}, tempfilelist, (device-sycl) +// CHK-LINK-UB: 6: file-table-tform, {3, 5}, tempfiletable, (device-sycl) +// CHK-LINK-UB: 7: clang-offload-wrapper, {6}, object, (device-sycl) +// CHK-LINK-UB: 8: offload, "device-sycl (spir64-unknown-unknown)" {7}, object /// ########################################################################### @@ -487,9 +489,11 @@ // CHK-LINK: 2: compiler, {1}, ir, (device-sycl) // CHK-LINK: 3: linker, {2}, image, (device-sycl) // CHK-LINK: 4: sycl-post-link, {3}, ir, (device-sycl) -// CHK-LINK: 5: llvm-spirv, {4}, image, (device-sycl) -// CHK-LINK: 6: clang-offload-wrapper, {5}, object, (device-sycl) -// CHK-LINK: 7: offload, "device-sycl (spir64-unknown-unknown)" {6}, object +// CHK-LINK: 5: file-table-tform, {4}, tempfilelist, (device-sycl) +// CHK-LINK: 6: llvm-spirv, {5}, tempfilelist, (device-sycl) +// CHK-LINK: 7: file-table-tform, {4, 6}, tempfiletable, (device-sycl) +// CHK-LINK: 8: clang-offload-wrapper, {7}, object, (device-sycl) +// CHK-LINK: 9: offload, "device-sycl (spir64-unknown-unknown)" {8}, object /// ########################################################################### diff --git a/sycl/test/scheduler/ReleaseResourcesTest.cpp b/sycl/test/scheduler/ReleaseResourcesTest.cpp index e0651b201d00b..aca6f1c52bc58 100644 --- a/sycl/test/scheduler/ReleaseResourcesTest.cpp +++ b/sycl/test/scheduler/ReleaseResourcesTest.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-dead-args-optimization -I %sycl_source_dir %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out // RUN: %RUN_ON_HOST %t.out //==------------------- ReleaseResourcesTests.cpp --------------------------==//