diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h index 15921528c960a..189bc2208c9ff 100644 --- a/clang/include/clang/Driver/Action.h +++ b/clang/include/clang/Driver/Action.h @@ -57,6 +57,7 @@ class Action { InputClass = 0, BindArchClass, OffloadClass, + ForEachWrappingClass, PreprocessJobClass, PrecompileJobClass, HeaderModulePrecompileJobClass, @@ -737,7 +738,15 @@ class SYCLPostLinkJobAction : public JobAction { void anchor() override; public: - SYCLPostLinkJobAction(Action *Input, types::ID OutputType); + // The tempfiletable management relies on a shadowing the main file type by + // types::TY_Tempfiletable. The problem of shadowing is it prevents its + // integration with clang tools that relies on the file type to properly set + // args. + // We "trick" the driver by declaring the underlying file type and set a + // "true output type" which will be used by the SYCLPostLinkJobAction + // to properly set the job. + SYCLPostLinkJobAction(Action *Input, types::ID ShadowOutputType, + types::ID TrueOutputType); static bool classof(const Action *A) { return A->getKind() == SYCLPostLinkJobClass; @@ -747,8 +756,11 @@ class SYCLPostLinkJobAction : public JobAction { bool getRTSetsSpecConstants() const { return RTSetsSpecConsts; } + types::ID getTrueType() const { return TrueOutputType; } + private: bool RTSetsSpecConsts = true; + types::ID TrueOutputType; }; class BackendCompileJobAction : public JobAction { @@ -771,6 +783,9 @@ class FileTableTformJobAction : public JobAction { void anchor() override; public: + static constexpr const char *COL_CODE = "Code"; + static constexpr const char *COL_ZERO = "0"; + struct Tform { enum Kind { EXTRACT, @@ -791,8 +806,10 @@ class FileTableTformJobAction : public JobAction { SmallVector TheArgs; }; - FileTableTformJobAction(Action *Input, types::ID OutputType); - FileTableTformJobAction(ActionList &Inputs, types::ID OutputType); + FileTableTformJobAction(Action *Input, types::ID ShadowOutputType, + types::ID TrueOutputType); + FileTableTformJobAction(ActionList &Inputs, types::ID ShadowOutputType, + types::ID TrueOutputType); // Deletes all columns except the one with given name. void addExtractColumnTform(StringRef ColumnName, bool WithColTitle = true); @@ -820,7 +837,10 @@ class FileTableTformJobAction : public JobAction { const ArrayRef getTforms() const { return Tforms; } + types::ID getTrueType() const { return TrueOutputType; } + private: + types::ID TrueOutputType; SmallVector Tforms; // transformation actions requested // column to copy single file from if requested @@ -849,6 +869,30 @@ class StaticLibJobAction : public JobAction { } }; +/// Wrap all jobs performed between TFormInput (excluded) and Job (included) +/// behind a `llvm-foreach` call. +/// +/// Assumptions: +/// - No change of toolchain, boundarch and offloading kind should occur +/// within the sub-region; +/// - No job should produce multiple outputs; +/// - Results of action within the sub-region should not be used outside the +/// wrapped region. +/// Note: this doesn't bind to a tool directly and this need special casing +/// anyhow. Hence why this is an Action and not a JobAction, even if there is a +/// command behind. +class ForEachWrappingAction : public Action { +public: + ForEachWrappingAction(JobAction *TFormInput, JobAction *Job); + + JobAction *getTFormInput() const; + JobAction *getJobAction() const; + + static bool classof(const Action *A) { + return A->getKind() == ForEachWrappingClass; + } +}; + } // namespace driver } // namespace clang diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h index 597e643ead5ca..dd523cea9c340 100644 --- a/clang/include/clang/Driver/Job.h +++ b/clang/include/clang/Driver/Job.h @@ -319,6 +319,8 @@ class JobList { /// Clear the job list. void clear(); + /// Return a mutable list of Jobs for llvm-foreach wrapping. + list_type &getJobsForOverride() { return Jobs; } const list_type &getJobs() const { return Jobs; } bool empty() const { return Jobs.empty(); } diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp index 4ff09ca4e216b..3a1b5261d6810 100644 --- a/clang/lib/Driver/Action.cpp +++ b/clang/lib/Driver/Action.cpp @@ -59,6 +59,8 @@ const char *Action::getClassName(ActionClass AC) { return "append-footer"; case StaticLibJobClass: return "static-lib-linker"; + case ForEachWrappingClass: + return "foreach"; } llvm_unreachable("invalid class"); @@ -474,8 +476,11 @@ SPIRCheckJobAction::SPIRCheckJobAction(Action *Input, types::ID Type) void SYCLPostLinkJobAction::anchor() {} -SYCLPostLinkJobAction::SYCLPostLinkJobAction(Action *Input, types::ID Type) - : JobAction(SYCLPostLinkJobClass, Input, Type) {} +SYCLPostLinkJobAction::SYCLPostLinkJobAction(Action *Input, + types::ID ShadowOutputType, + types::ID TrueOutputType) + : JobAction(SYCLPostLinkJobClass, Input, ShadowOutputType), + TrueOutputType(TrueOutputType) {} void BackendCompileJobAction::anchor() {} @@ -489,12 +494,17 @@ BackendCompileJobAction::BackendCompileJobAction(Action *Input, void FileTableTformJobAction::anchor() {} -FileTableTformJobAction::FileTableTformJobAction(Action *Input, types::ID Type) - : JobAction(FileTableTformJobClass, Input, Type) {} +FileTableTformJobAction::FileTableTformJobAction(Action *Input, + types::ID ShadowOutputType, + types::ID TrueOutputType) + : JobAction(FileTableTformJobClass, Input, ShadowOutputType), + TrueOutputType(TrueOutputType) {} FileTableTformJobAction::FileTableTformJobAction(ActionList &Inputs, - types::ID Type) - : JobAction(FileTableTformJobClass, Inputs, Type) {} + types::ID ShadowOutputType, + types::ID TrueOutputType) + : JobAction(FileTableTformJobClass, Inputs, ShadowOutputType), + TrueOutputType(TrueOutputType) {} void FileTableTformJobAction::addExtractColumnTform(StringRef ColumnName, bool WithColTitle) { @@ -533,3 +543,15 @@ void StaticLibJobAction::anchor() {} StaticLibJobAction::StaticLibJobAction(ActionList &Inputs, types::ID Type) : JobAction(StaticLibJobClass, Inputs, Type) {} + +ForEachWrappingAction::ForEachWrappingAction(JobAction *TFormInput, + JobAction *Job) + : Action(ForEachWrappingClass, {TFormInput, Job}, Job->getType()) {} + +JobAction *ForEachWrappingAction::getTFormInput() const { + return llvm::cast(getInputs()[0]); +} + +JobAction *ForEachWrappingAction::getJobAction() const { + return llvm::cast(getInputs()[1]); +} diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 1e19957ad921f..0d90bf7902e7b 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3896,7 +3896,7 @@ class OffloadingActionBuilder final { SmallVector GpuArchList; /// Build the last steps for CUDA after all BC files have been linked. - Action *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) { + JobAction *finalizeNVPTXDependences(Action *Input, const llvm::Triple &TT) { auto *BA = C.getDriver().ConstructPhaseAction( C, Args, phases::Backend, Input, AssociatedOffloadKind); if (TT.getOS() != llvm::Triple::NVCL) { @@ -3906,10 +3906,11 @@ class OffloadingActionBuilder final { return C.MakeAction(DeviceActions, types::TY_CUDA_FATBIN); } - return BA; + return cast(BA); } - Action *finalizeAMDGCNDependences(Action *Input, const llvm::Triple &TT) { + JobAction *finalizeAMDGCNDependences(Action *Input, + const llvm::Triple &TT) { auto *BA = C.getDriver().ConstructPhaseAction( C, Args, phases::Backend, Input, AssociatedOffloadKind); @@ -3919,7 +3920,7 @@ class OffloadingActionBuilder final { ActionList AL = {AA}; Action *LinkAction = C.MakeAction(AL, types::TY_Image); ActionList HIPActions = {LinkAction}; - Action *HIPFatBinary = + JobAction *HIPFatBinary = C.MakeAction(HIPActions, types::TY_HIP_FATBIN); return HIPFatBinary; } @@ -4052,7 +4053,7 @@ class OffloadingActionBuilder final { else FullDeviceLinkAction = DeviceLinkAction; auto *PostLinkAction = C.MakeAction( - FullDeviceLinkAction, types::TY_LLVM_BC); + FullDeviceLinkAction, types::TY_LLVM_BC, types::TY_LLVM_BC); auto *TranslateAction = C.MakeAction( PostLinkAction, types::TY_Image); SYCLLinkBinary = C.MakeAction( @@ -4324,6 +4325,7 @@ class OffloadingActionBuilder final { auto TT = SYCLTripleList[I]; auto isNVPTX = (*TC)->getTriple().isNVPTX(); auto isAMDGCN = (*TC)->getTriple().isAMDGCN(); + auto isSPIR = (*TC)->getTriple().isSPIR(); bool isSpirvAOT = TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga || TT.getSubArch() == llvm::Triple::SPIRSubArch_gen || TT.getSubArch() == llvm::Triple::SPIRSubArch_x86_64; @@ -4335,8 +4337,6 @@ class OffloadingActionBuilder final { // directly to the backend compilation step (aocr) or wrapper (aocx) else if (types::isFPGA(Input->getType())) { Action *FPGAAOTAction; - constexpr char COL_CODE[] = "Code"; - constexpr char COL_ZERO[] = "0"; if (Input->getType() == types::TY_FPGA_AOCR || Input->getType() == types::TY_FPGA_AOCR_EMU) // Generate AOCX/AOCR @@ -4347,8 +4347,10 @@ class OffloadingActionBuilder final { else llvm_unreachable("Unexpected FPGA input type."); auto *RenameAction = C.MakeAction( - FPGAAOTAction, types::TY_Tempfilelist); - RenameAction->addRenameColumnTform(COL_ZERO, COL_CODE); + FPGAAOTAction, types::TY_Tempfilelist, types::TY_Tempfilelist); + RenameAction->addRenameColumnTform( + FileTableTformJobAction::COL_ZERO, + FileTableTformJobAction::COL_CODE); auto *DeviceWrappingAction = C.MakeAction( RenameAction, types::TY_Object); DA.add(*DeviceWrappingAction, **TC, /*BoundArch=*/nullptr, @@ -4362,7 +4364,7 @@ class OffloadingActionBuilder final { // The linkage actions subgraph leading to the offload wrapper. // [cond] Means incoming/outgoing dependence is created only when cond // is true. A function of: - // n - target is NVPTX + // n - target is NVPTX/AMDGCN // a - SPIRV AOT compilation is requested // s - device code split requested // * - "all other cases" @@ -4379,17 +4381,18 @@ class OffloadingActionBuilder final { // .--------------------------------------. // | PostLink | // .--------------------------------------. - // [+n] [+*] [+] - // | | | - // .----------------. .-----------------. | - // | FileTableTform | | FileTableTform | | - // | (copy "Code") | | (extract "Code")| | - // .----------------. .-----------------. | - // [.] [-] | - // | | | + // [+*] [+] + // | | + // .-----------------. | + // | FileTableTform | | + // | (extract "Code")| | + // .-----------------. | + // [-] | + // --------------------| | // [.] [-*] | // .---------------. .-------------------. | - // | finalizeNVPTX | | SPIRVTranslator | | + // | finalizeNVPTX | | SPIRVTranslator | | + // | finalizeAMDGCN | | | | // .---------------. .-------------------. | // [.] [-as] [-!a] | // | | | | @@ -4399,13 +4402,13 @@ class OffloadingActionBuilder final { // | .----------------. | | // | [-s] | | // | | | | - // [.] [-a] [-!a] [+] - // .------------------------------------. - // | FileTableTform | - // | (replace "Code") | - // .------------------------------------. - // | - // [+] + // | [-a] [-!a] [+] + // | .--------------------. + // -----------[-n]| FileTableTform | + // | (replace "Code") | + // .--------------------. + // | + // [+*] // .--------------------------------------. // | OffloadWrapper | // .--------------------------------------. @@ -4420,7 +4423,7 @@ class OffloadingActionBuilder final { // When spv online link is supported by all backends, the fallback // device libraries are only needed when current toolchain is using // AOT compilation. - if (!isNVPTX && !isAMDGCN) { + if (isSPIR) { SYCLDeviceLibLinked = addSYCLDeviceLibs( *TC, FullLinkObjects, true, C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment()); @@ -4432,18 +4435,7 @@ class OffloadingActionBuilder final { C.MakeAction(FullLinkObjects, types::TY_LLVM_BC); else FullDeviceLinkAction = DeviceLinkAction; - // setup some flags upfront - - if ((isNVPTX || isAMDGCN) && DeviceCodeSplit) { - // TODO Temporary limitation, need to support code splitting for PTX - const Driver &D = C.getDriver(); - const std::string &OptName = - D.getOpts() - .getOption(options::OPT_fsycl_device_code_split) - .getPrefixedName(); - D.Diag(diag::err_drv_unsupported_opt_for_target) - << OptName << (*TC)->getTriple().str(); - } + // reflects whether current target is ahead-of-time and can't support // runtime setting of specialization constants bool isAOT = isNVPTX || isAMDGCN || isSpirvAOT; @@ -4452,50 +4444,48 @@ class OffloadingActionBuilder final { ActionList WrapperInputs; // post link is not optional - even if not splitting, always need to // process specialization constants + + types::ID PostLinkOutType = + isSPIR ? types::TY_Tempfiletable : FullDeviceLinkAction->getType(); + // For SPIR-V targets, force TY_Tempfiletable. auto *PostLinkAction = C.MakeAction( - FullDeviceLinkAction, types::TY_Tempfiletable); + FullDeviceLinkAction, PostLinkOutType, types::TY_Tempfiletable); PostLinkAction->setRTSetsSpecConstants(!isAOT); - constexpr char COL_CODE[] = "Code"; + auto *ExtractIRFilesAction = C.MakeAction( + PostLinkAction, + isSPIR ? types::TY_Tempfilelist : PostLinkAction->getType(), + types::TY_Tempfilelist); + // single column w/o title fits TY_Tempfilelist format + ExtractIRFilesAction->addExtractColumnTform( + FileTableTformJobAction::COL_CODE, false /*drop titles*/); if (isNVPTX || isAMDGCN) { - // Make extraction copy the only remaining code file instead of - // creating a new table with a single entry. - // TODO: Process all PTX code files in file table to enable code - // splitting for PTX target. - auto *ExtractIRFilesAction = C.MakeAction( - PostLinkAction, types::TY_LLVM_BC); - ExtractIRFilesAction->addCopySingleFileTform(COL_CODE, 0); - - Action *FinAction; - if (isNVPTX) { - FinAction = finalizeNVPTXDependences(ExtractIRFilesAction, - (*TC)->getTriple()); - } else /* isAMDGCN */ { - FinAction = finalizeAMDGCNDependences(ExtractIRFilesAction, + JobAction *FinAction = + isNVPTX ? finalizeNVPTXDependences(ExtractIRFilesAction, + (*TC)->getTriple()) + : finalizeAMDGCNDependences(ExtractIRFilesAction, (*TC)->getTriple()); - } - ActionList TformInputs{PostLinkAction, FinAction}; + auto *ForEachWrapping = C.MakeAction( + ExtractIRFilesAction, FinAction); - // Replace the only code entry in the table, as confirmed by the - // previous transformation. + ActionList TformInputs{PostLinkAction, ForEachWrapping}; auto *ReplaceFilesAction = C.MakeAction( - TformInputs, types::TY_Tempfiletable); - ReplaceFilesAction->addReplaceCellTform(COL_CODE, 0); + TformInputs, types::TY_Tempfiletable, types::TY_Tempfiletable); + ReplaceFilesAction->addReplaceColumnTform( + FileTableTformJobAction::COL_CODE, + FileTableTformJobAction::COL_CODE); + WrapperInputs.push_back(ReplaceFilesAction); } else { // For SPIRV-based targets - translate to SPIRV then optionally // compile ahead-of-time to native architecture - auto *ExtractIRFilesAction = C.MakeAction( - PostLinkAction, types::TY_Tempfilelist); - // single column w/o title fits TY_Tempfilelist format - ExtractIRFilesAction->addExtractColumnTform(COL_CODE, - false /*drop titles*/); - Action *BuildCodeAction = C.MakeAction( - ExtractIRFilesAction, types::TY_Tempfilelist); + Action *BuildCodeAction = + (Action *)C.MakeAction( + ExtractIRFilesAction, types::TY_Tempfilelist); // After the Link, wrap the files before the final host link - if (isSpirvAOT) { + if (isAOT) { types::ID OutType = types::TY_Tempfilelist; if (!DeviceCodeSplit) { OutType = (TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga) @@ -4526,8 +4516,10 @@ class OffloadingActionBuilder final { } ActionList TformInputs{PostLinkAction, BuildCodeAction}; auto *ReplaceFilesAction = C.MakeAction( - TformInputs, types::TY_Tempfiletable); - ReplaceFilesAction->addReplaceColumnTform(COL_CODE, COL_CODE); + TformInputs, types::TY_Tempfiletable, types::TY_Tempfiletable); + ReplaceFilesAction->addReplaceColumnTform( + FileTableTformJobAction::COL_CODE, + FileTableTformJobAction::COL_CODE); WrapperInputs.push_back(ReplaceFilesAction); } @@ -6218,6 +6210,26 @@ static std::string GetTriplePlusArchString(const ToolChain *TC, return TriplePlusArch; } +static void CollectForEachInputs( + InputInfoList &InputInfos, const Action *SourceAction, const ToolChain *TC, + StringRef BoundArch, Action::OffloadKind TargetDeviceOffloadKind, + const std::map, InputInfo> + &CachedResults) { + for (const Action *Input : SourceAction->getInputs()) { + // Search for the Input, if not in the cache assume actions were collapsed + // so recurse. + auto Lookup = CachedResults.find( + {Input, + GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}); + if (Lookup != CachedResults.end()) { + InputInfos.push_back(Lookup->second); + } else { + CollectForEachInputs(InputInfos, Input, TC, BoundArch, + TargetDeviceOffloadKind, CachedResults); + } + } +} + InputInfo Driver::BuildJobsForAction( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, @@ -6333,6 +6345,58 @@ InputInfo Driver::BuildJobsForActionNoCache( TargetDeviceOffloadKind); } + if (const ForEachWrappingAction *FEA = dyn_cast(A)) { + // Check that the main action wasn't already processed. + auto MainActionOutput = CachedResults.find( + {FEA->getJobAction(), + GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}); + if (MainActionOutput != CachedResults.end()) { + // The input was processed on behalf of another foreach. + // Add entry in cache and return. + CachedResults[{FEA, GetTriplePlusArchString(TC, BoundArch, + TargetDeviceOffloadKind)}] = + MainActionOutput->second; + return MainActionOutput->second; + } + + // Build commands for the TFormInput then take any command added after as + // needing a llvm-foreach wrapping. + BuildJobsForAction(C, FEA->getTFormInput(), TC, BoundArch, + /*AtTopLevel=*/false, MultipleArchs, LinkingOutput, + CachedResults, TargetDeviceOffloadKind); + unsigned OffsetIdx = C.getJobs().size(); + BuildJobsForAction(C, FEA->getJobAction(), TC, BoundArch, + /*AtTopLevel=*/false, MultipleArchs, LinkingOutput, + CachedResults, TargetDeviceOffloadKind); + + auto begin = C.getJobs().getJobsForOverride().begin() + OffsetIdx; + auto end = C.getJobs().getJobsForOverride().end(); + + // Steal the commands. + llvm::SmallVector, 4> JobsToWrap( + std::make_move_iterator(begin), std::make_move_iterator(end)); + C.getJobs().getJobsForOverride().erase(begin, end); + + InputInfo ActionResult; + for (std::unique_ptr Cmd : + llvm::make_range(std::make_move_iterator(JobsToWrap.begin()), + std::make_move_iterator(JobsToWrap.end()))) { + const JobAction *SourceAction = cast(&Cmd->getSource()); + + ActionResult = CachedResults.at( + {SourceAction, + GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}); + InputInfoList InputInfos; + CollectForEachInputs(InputInfos, SourceAction, TC, BoundArch, + TargetDeviceOffloadKind, CachedResults); + const Tool *Creator = &Cmd->getCreator(); + + tools::SYCL::constructLLVMForeachCommand( + C, *SourceAction, std::move(Cmd), InputInfos, ActionResult, Creator, + "", types::getTypeTempSuffix(ActionResult.getType())); + } + return ActionResult; + } ActionList Inputs = A->getInputs(); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index f3cda040c936b..07927a190e977 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -390,6 +390,7 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const { case Action::InputClass: case Action::BindArchClass: case Action::OffloadClass: + case Action::ForEachWrappingClass: case Action::LipoJobClass: case Action::DsymutilJobClass: case Action::VerifyDebugInfoJobClass: diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index dc3c39af9f16c..2eb49d68c0ee1 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8834,8 +8834,10 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const { + const SYCLPostLinkJobAction *SYCLPostLink = + dyn_cast(&JA); // Construct sycl-post-link command. - assert(isa(JA) && "Expecting SYCL post link job!"); + assert(SYCLPostLink && "Expecting SYCL post link job!"); ArgStringList CmdArgs; // See if device code splitting is requested @@ -8865,13 +8867,13 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, // Enable PI program metadata if (getToolChain().getTriple().isNVPTX()) addArgs(CmdArgs, TCArgs, {"-emit-program-metadata"}); - if (JA.getType() == types::TY_LLVM_BC) { + if (SYCLPostLink->getTrueType() == types::TY_LLVM_BC) { // single file output requested - this means only perform necessary IR // transformations (like specialization constant intrinsic lowering) and // output LLVMIR addArgs(CmdArgs, TCArgs, {"-ir-output-only"}); } else { - assert(JA.getType() == types::TY_Tempfiletable); + assert(SYCLPostLink->getTrueType() == types::TY_Tempfiletable); // Symbol file and specialization constant info generation is mandatory - // add options unconditionally addArgs(CmdArgs, TCArgs, {"-symbols"}); @@ -8886,8 +8888,7 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, addArgs(CmdArgs, TCArgs, {StringRef(getSYCLPostLinkOptimizationLevel(TCArgs))}); // specialization constants processing is mandatory - auto *SYCLPostLink = llvm::dyn_cast(&JA); - if (SYCLPostLink && SYCLPostLink->getRTSetsSpecConstants()) + if (SYCLPostLink->getRTSetsSpecConstants()) addArgs(CmdArgs, TCArgs, {"-spec-const=rt"}); else addArgs(CmdArgs, TCArgs, {"-spec-const=default"}); diff --git a/clang/test/Driver/sycl-offload-amdgcn.cpp b/clang/test/Driver/sycl-offload-amdgcn.cpp index cbfaad9980337..ca7ff0474fda6 100644 --- a/clang/test/Driver/sycl-offload-amdgcn.cpp +++ b/clang/test/Driver/sycl-offload-amdgcn.cpp @@ -28,12 +28,13 @@ // CHK-PHASES-NO-CC: 9: assembler, {8}, object, (host-sycl) // CHK-PHASES-NO-CC: 10: linker, {9}, image, (host-sycl) // CHK-PHASES-NO-CC: 11: linker, {5}, ir, (device-sycl) -// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, tempfiletable, (device-sycl) +// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl) // CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl) // CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl) // CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl) // CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl) // CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl) -// CHK-PHASES-NO-CC: 18: file-table-tform, {12, 17}, tempfiletable, (device-sycl) -// CHK-PHASES-NO-CC: 19: clang-offload-wrapper, {18}, object, (device-sycl) -// CHK-PHASES-NO-CC: 20: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa-sycldevice)" {19}, image +// CHK-PHASES-NO-CC: 18: foreach, {13, 17}, hip-fatbin, (device-sycl) +// CHK-PHASES-NO-CC: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl) +// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl) +// CHK-PHASES-NO-CC: 21: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa-sycldevice)" {20}, image diff --git a/clang/test/Driver/sycl-offload-nvptx.cpp b/clang/test/Driver/sycl-offload-nvptx.cpp index fb8fb31a68cea..d04b1eae99e0a 100644 --- a/clang/test/Driver/sycl-offload-nvptx.cpp +++ b/clang/test/Driver/sycl-offload-nvptx.cpp @@ -5,16 +5,23 @@ /// Check action graph. // RUN: %clangxx -### -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ -// RUN: -fsycl-targets=nvptx64-nvidia-nvcl-sycldevice --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ // RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-ACTIONS %s -// CHK-ACTIONS: "-cc1" "-triple" "nvptx64-nvidia-nvcl-sycldevice" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}} "-fsycl-is-device"{{.*}} "-Wno-sycl-strict"{{.*}} "-sycl-std=2020" {{.*}} "-internal-isystem" "{{.*}}bin{{[/\\]+}}..{{[/\\]+}}include{{[/\\]+}}sycl"{{.*}} "-mlink-builtin-bitcode" "{{.*}}libspirv.bc"{{.*}} "-mlink-builtin-bitcode" "{{.*}}libdevice{{.*}}.10.bc"{{.*}} "-target-feature" "+ptx42"{{.*}} "-target-sdk-version=[[CUDA_VERSION:[0-9.]+]]"{{.*}} "-target-cpu" "sm_50"{{.*}} "-std=c++11"{{.*}} + +// CHK-ACTIONS: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}} "-fsycl-is-device"{{.*}} "-Wno-sycl-strict"{{.*}} "-sycl-std=2020" {{.*}} "-emit-llvm-bc" {{.*}} "-internal-isystem" "{{.*}}bin{{[/\\]+}}..{{[/\\]+}}include{{[/\\]+}}sycl"{{.*}} "-mlink-builtin-bitcode" "{{.*}}libspirv.bc"{{.*}} "-mlink-builtin-bitcode" "{{.*}}libdevice{{.*}}.10.bc"{{.*}} "-target-feature" "+ptx42"{{.*}} "-target-sdk-version=[[CUDA_VERSION:[0-9.]+]]"{{.*}} "-target-cpu" "sm_50"{{.*}} "-std=c++11"{{.*}} +// CHK-ACTIONS: sycl-post-link{{.*}} "-split=auto" +// CHK-ACTIONS: file-table-tform" "-extract=Code" "-drop_titles" +// CHK-ACTIONS: llvm-foreach" {{.*}} "--" "{{.*}}clang-13" +// CHK-ACTIONS: llvm-foreach" {{.*}} "--" "{{.*}}ptxas" +// CHK-ACTIONS: llvm-foreach" {{.*}} "--" "{{.*}}fatbinary" +// CHK-ACTIONS: file-table-tform" "-replace=Code,Code" // CHK-ACTIONS-NOT: "-mllvm -sycl-opt" // CHK-ACTIONS: clang-offload-wrapper"{{.*}} "-host=x86_64-unknown-linux-gnu" "-target=nvptx64" "-kind=sycl"{{.*}} /// Check phases w/out specifying a compute capability. // RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ -// RUN: -fsycl-targets=nvptx64-nvidia-nvcl-sycldevice %s 2>&1 \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s // CHK-PHASES-NO-CC: 0: input, "{{.*}}", c++, (host-sycl) // CHK-PHASES-NO-CC: 1: append-footer, {0}, c++, (host-sycl) @@ -22,22 +29,26 @@ // CHK-PHASES-NO-CC: 3: input, "{{.*}}", c++, (device-sycl, sm_50) // CHK-PHASES-NO-CC: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, sm_50) // CHK-PHASES-NO-CC: 5: compiler, {4}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_50)" {5}, c++-cpp-output +// CHK-PHASES-NO-CC: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {5}, c++-cpp-output // CHK-PHASES-NO-CC: 7: compiler, {6}, ir, (host-sycl) // CHK-PHASES-NO-CC: 8: backend, {7}, assembler, (host-sycl) // CHK-PHASES-NO-CC: 9: assembler, {8}, object, (host-sycl) // CHK-PHASES-NO-CC: 10: linker, {9}, image, (host-sycl) // CHK-PHASES-NO-CC: 11: linker, {5}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, sm_50) // CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, sm_50) // CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 15: file-table-tform, {12, 14}, tempfiletable, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 16: clang-offload-wrapper, {15}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 17: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_50)" {16}, image +// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 16: linker, {14, 15}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 17: foreach, {13, 16}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 18: file-table-tform, {12, 17}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 19: clang-offload-wrapper, {18}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 20: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {19}, image /// Check phases specifying a compute capability. // RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ -// RUN: -fsycl-targets=nvptx64-nvidia-nvcl-sycldevice \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda \ +N.) // RUN: -Xsycl-target-backend "--cuda-gpu-arch=sm_35" %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES %s // CHK-PHASES: 0: input, "{{.*}}", c++, (host-sycl) @@ -46,15 +57,18 @@ // CHK-PHASES: 3: input, "{{.*}}", c++, (device-sycl, sm_35) // CHK-PHASES: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, sm_35) // CHK-PHASES: 5: compiler, {4}, ir, (device-sycl, sm_35) -// CHK-PHASES: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_35)" {5}, c++-cpp-output +// CHK-PHASES: 6: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {5}, c++-cpp-output // CHK-PHASES: 7: compiler, {6}, ir, (host-sycl) // CHK-PHASES: 8: backend, {7}, assembler, (host-sycl) // CHK-PHASES: 9: assembler, {8}, object, (host-sycl) // CHK-PHASES: 10: linker, {9}, image, (host-sycl) // CHK-PHASES: 11: linker, {5}, ir, (device-sycl, sm_35) -// CHK-PHASES: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, sm_35) +// CHK-PHASES: 12: sycl-post-link, {11}, ir, (device-sycl, sm_35) // CHK-PHASES: 13: file-table-tform, {12}, ir, (device-sycl, sm_35) // CHK-PHASES: 14: backend, {13}, assembler, (device-sycl, sm_35) -// CHK-PHASES: 15: file-table-tform, {12, 14}, tempfiletable, (device-sycl, sm_35) -// CHK-PHASES: 16: clang-offload-wrapper, {15}, object, (device-sycl, sm_35) -// CHK-PHASES: 17: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_35)" {16}, image +// CHK-PHASES: 15: assembler, {14}, object, (device-sycl, sm_35) +// CHK-PHASES: 16: linker, {14, 15}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 17: foreach, {13, 16}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 18: file-table-tform, {12, 17}, tempfiletable, (device-sycl, sm_35) +// CHK-PHASES: 19: clang-offload-wrapper, {18}, object, (device-sycl, sm_35) +// CHK-PHASES: 20: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {19}, image