From 483ce9c277bacde95b3b9d8863e1e3ff7d8dcc47 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 11 May 2021 10:26:22 +0100 Subject: [PATCH 01/12] [SYCL][CUDA] Adds PI CUDA support for reqd_work_group_size attribute This commit adds support for reqd_work_group_size in the PI CUDA backend by extracting the attribute as program metadata. The program metadata accompanies the binary when passed to the backend and it is up to the backend if they extract any useful metadata. This adds two additional parameters to piProgramCreateWithBinary for passing the program metadata. Program metadata is transported as a properties created by sycl-post-link, so this commit also changes the behaviour of the NVPTX path for linkage actions leading to the offload wrapper. These changes uses file tables for the NVPTX path as well to allow generation and preservation of properties. This assumes that the file table only ever contains a single row if taking the NVPTX path and will fail otherwise. Signed-off-by: Steffen Larsen --- clang/include/clang/Driver/Action.h | 18 +- clang/lib/Driver/Action.cpp | 10 + clang/lib/Driver/Driver.cpp | 75 ++++--- clang/lib/Driver/ToolChains/Clang.cpp | 25 ++- llvm/include/llvm/Support/PropertySetIO.h | 1 + llvm/include/llvm/Support/SimpleTable.h | 3 + llvm/lib/Support/PropertySetIO.cpp | 1 + llvm/lib/Support/SimpleTable.cpp | 10 + .../sycl-post-link/emit_program_metadata.ll | 24 +++ .../file-table-tform/file-table-tform.cpp | 79 +++++-- llvm/tools/sycl-post-link/sycl-post-link.cpp | 48 ++++- sycl/include/CL/sycl/detail/pi.h | 10 +- sycl/include/CL/sycl/detail/pi.hpp | 2 + sycl/plugins/cuda/pi_cuda.cpp | 196 ++++++++++++------ sycl/plugins/cuda/pi_cuda.hpp | 7 + sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp | 4 +- sycl/plugins/level_zero/pi_level_zero.cpp | 13 +- sycl/plugins/opencl/pi_opencl.cpp | 13 +- sycl/plugins/rocm/pi_rocm.cpp | 6 +- sycl/source/detail/pi.cpp | 1 + .../program_manager/program_manager.cpp | 30 ++- sycl/unittests/kernel-and-program/Cache.cpp | 3 +- sycl/unittests/misc/KernelBuildOptions.cpp | 3 +- sycl/unittests/pi/cuda/test_kernels.cpp | 64 +++--- 24 files changed, 473 insertions(+), 173 deletions(-) create mode 100644 llvm/test/tools/sycl-post-link/emit_program_metadata.ll diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h index 061cac249c906..31990c6695dd0 100644 --- a/clang/include/clang/Driver/Action.h +++ b/clang/include/clang/Driver/Action.h @@ -772,7 +772,7 @@ class FileTableTformJobAction : public JobAction { public: struct Tform { - enum Kind { EXTRACT, EXTRACT_DROP_TITLE, REPLACE, RENAME }; + enum Kind { EXTRACT, EXTRACT_DROP_TITLE, REPLACE, REPLACE_CELL, RENAME }; Tform() = default; Tform(Kind K, std::initializer_list Args) : TheKind(K) { @@ -794,18 +794,34 @@ class FileTableTformJobAction : public JobAction { // from another file table passed as input to this action. void addReplaceColumnTform(StringRef From, StringRef To); + // Replaces a cell in this table with column title and row + // with the file name passed as input to this action. + void addReplaceCellTform(StringRef ColumnName, int Row); + // Renames a column with title in this table with a column with title // passed as input to this action. void addRenameColumnTform(StringRef From, StringRef To); + // Specifies that, instead of generating a new table, the transformation + // should copy the file in the only remaining row at into the + // output file. + void setCopySingleFileColumn(StringRef ColumnName); + static bool classof(const Action *A) { return A->getKind() == FileTableTformJobClass; } const ArrayRef getTforms() const { return Tforms; } + const std::string getCopySingleFileColumnName() const { + return CopySingleFileColumnName; + } + private: SmallVector Tforms; // transformation actions requested + + // column to copy single file from if requested + std::string CopySingleFileColumnName; }; class AppendFooterJobAction : public JobAction { diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp index a330722d705de..e928dc92db7d5 100644 --- a/clang/lib/Driver/Action.cpp +++ b/clang/lib/Driver/Action.cpp @@ -507,11 +507,21 @@ void FileTableTformJobAction::addReplaceColumnTform(StringRef From, Tforms.emplace_back(Tform(Tform::REPLACE, {From, To})); } +void FileTableTformJobAction::addReplaceCellTform(StringRef ColumnName, + int Row) { + Tforms.emplace_back( + Tform(Tform::REPLACE_CELL, {ColumnName, std::to_string(Row)})); +} + void FileTableTformJobAction::addRenameColumnTform(StringRef From, StringRef To) { Tforms.emplace_back(Tform(Tform::RENAME, {From, To})); } +void FileTableTformJobAction::setCopySingleFileColumn(StringRef ColumnName) { + CopySingleFileColumnName = ColumnName.str(); +} + void AppendFooterJobAction::anchor() {} AppendFooterJobAction::AppendFooterJobAction(Action *Input, types::ID Type) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 42f1ea275d8e7..322a27ebc99ab 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4378,19 +4378,19 @@ class OffloadingActionBuilder final { // .--------------------------------------. // | PostLink | // .--------------------------------------. - // [.n] [+*] [+*] + // [+n] [+*] [+] // | | | - // | .-----------------. | - // | | FileTableTform | | - // | | (extract "Code")| | - // | .-----------------. | - // | [-] | + // .----------------. .-----------------. | + // | FileTableTform | | FileTableTform | | + // | (copy "Code") | | (extract "Code")| | + // .----------------. .-----------------. | + // [.] [-] | // | | | - // | [-*] | - // .-------------. .-------------------. | - // |finalizeNVPTX| | SPIRVTranslator | | - // .-------------. .-------------------. | - // | [-as] [-!a] | + // [.] [-*] | + // .---------------. .-------------------. | + // | finalizeNVPTX | | SPIRVTranslator | | + // .---------------. .-------------------. | + // [.] [-as] [-!a] | // | | | | // | [-s] | | // | .----------------. | | @@ -4398,13 +4398,13 @@ class OffloadingActionBuilder final { // | .----------------. | | // | [-s] | | // | | | | - // | [-a] [-!a] [+] - // | .--------------------. - // | | FileTableTform | - // | | (replace "Code") | - // | .--------------------. - // | | - // [.n] [+*] + // [.] [-a] [-!a] [+] + // .------------------------------------. + // | FileTableTform | + // | (replace "Code") | + // .------------------------------------. + // | + // [+] // .--------------------------------------. // | OffloadWrapper | // .--------------------------------------. @@ -4451,24 +4451,38 @@ class OffloadingActionBuilder final { ActionList WrapperInputs; // post link is not optional - even if not splitting, always need to // process specialization constants - types::ID PostLinkOutType = - isNVPTX || isAMDGCN ? types::TY_LLVM_BC : types::TY_Tempfiletable; auto *PostLinkAction = C.MakeAction( - FullDeviceLinkAction, PostLinkOutType); + FullDeviceLinkAction, types::TY_Tempfiletable); PostLinkAction->setRTSetsSpecConstants(!isAOT); - if (isNVPTX) { - Action *FinAction = - finalizeNVPTXDependences(PostLinkAction, (*TC)->getTriple()); - WrapperInputs.push_back(FinAction); - } else if (isAMDGCN) { - Action *FinAction = - finalizeAMDGCNDependences(PostLinkAction, (*TC)->getTriple()); - WrapperInputs.push_back(FinAction); + constexpr char COL_CODE[] = "Code"; + + if (isNVPTX || isAMDGCN) { + // Make extraction copy the only remaining code file instead of + // creating a new table with a single entry. + auto *ExtractIRFilesAction = C.MakeAction( + PostLinkAction, types::TY_LLVM_BC); + ExtractIRFilesAction->setCopySingleFileColumn(COL_CODE); + + Action *FinAction; + if (isNVPTX) { + FinAction = finalizeNVPTXDependences(ExtractIRFilesAction, + (*TC)->getTriple()); + } else /* isAMDGCN */ { + FinAction = finalizeAMDGCNDependences(ExtractIRFilesAction, + (*TC)->getTriple()); + } + ActionList TformInputs{PostLinkAction, FinAction}; + + // Replace the only code entry in the table, as confirmed by the + // previous transformation. + auto *ReplaceFilesAction = C.MakeAction( + TformInputs, types::TY_Tempfiletable); + ReplaceFilesAction->addReplaceCellTform(COL_CODE, 0); + WrapperInputs.push_back(ReplaceFilesAction); } else { // For SPIRV-based targets - translate to SPIRV then optionally // compile ahead-of-time to native architecture - constexpr char COL_CODE[] = "Code"; auto *ExtractIRFilesAction = C.MakeAction( PostLinkAction, types::TY_Tempfilelist); // single column w/o title fits TY_Tempfilelist format @@ -4513,6 +4527,7 @@ class OffloadingActionBuilder final { ReplaceFilesAction->addReplaceColumnTform(COL_CODE, COL_CODE); WrapperInputs.push_back(ReplaceFilesAction); } + // After the Link, wrap the files before the final host link auto *DeviceWrappingAction = C.MakeAction( WrapperInputs, types::TY_Object); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index d6bf82935943b..db6c2fc7285e3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8859,6 +8859,9 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, TCArgs.hasFlag(options::OPT_fsycl_dead_args_optimization, options::OPT_fno_sycl_dead_args_optimization, false)) addArgs(CmdArgs, TCArgs, {"-emit-param-info"}); + // Enable PI program metadata + if (getToolChain().getTriple().isNVPTX()) + addArgs(CmdArgs, TCArgs, {"-emit-program-metadata"}); if (JA.getType() == types::TY_LLVM_BC) { // single file output requested - this means only perform necessary IR // transformations (like specialization constant intrinsic lowering) and @@ -8945,6 +8948,15 @@ void FileTableTform::ConstructJob(Compilation &C, const JobAction &JA, addArgs(CmdArgs, TCArgs, {Arg}); break; } + case FileTableTformJobAction::Tform::REPLACE_CELL: { + assert(Tf.TheArgs.size() == 2 && "column name and row id expected"); + SmallString<128> Arg("-replace_cell="); + Arg += Tf.TheArgs[0]; + Arg += ","; + Arg += Tf.TheArgs[1]; + addArgs(CmdArgs, TCArgs, {Arg}); + break; + } case FileTableTformJobAction::Tform::RENAME: { assert(Tf.TheArgs.size() == 2 && "from/to names expected"); SmallString<128> Arg("-rename="); @@ -8956,16 +8968,23 @@ void FileTableTform::ConstructJob(Compilation &C, const JobAction &JA, } } } - // 2) add output option + // 2) add copy_single_file option if requested + if (!TformJob.getCopySingleFileColumnName().empty()) { + SmallString<128> Arg("-copy_single_file="); + Arg += TformJob.getCopySingleFileColumnName(); + addArgs(CmdArgs, TCArgs, {Arg}); + } + + // 3) add output option assert(Output.isFilename() && "table tform output must be a file"); addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()}); - // 3) add inputs + // 4) add inputs for (const auto &Input : Inputs) { assert(Input.isFilename() && "table tform input must be a file"); addArgs(CmdArgs, TCArgs, {Input.getFilename()}); } - // 4) finally construct and add a command to the compilation + // 5) finally construct and add a command to the compilation C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::None(), TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 3c332e3ca1f0b..ee5c8443deb06 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -188,6 +188,7 @@ class PropertySetRegistry { "SYCL/specialization constants default values"; static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask"; static constexpr char SYCL_KERNEL_PARAM_OPT_INFO[] = "SYCL/kernel param opt"; + static constexpr char SYCL_PROGRAM_METADATA[] = "SYCL/program metadata"; static constexpr char SYCL_MISC_PROP[] = "SYCL/misc properties"; static constexpr char SYCL_ASSERT_USED[] = "SYCL/assert used"; diff --git a/llvm/include/llvm/Support/SimpleTable.h b/llvm/include/llvm/Support/SimpleTable.h index 85ce21beb53ca..8bda35f8b598c 100644 --- a/llvm/include/llvm/Support/SimpleTable.h +++ b/llvm/include/llvm/Support/SimpleTable.h @@ -97,6 +97,9 @@ class SimpleTable { Error replaceColumn(StringRef Name, const SimpleTable &Src, StringRef SrcName = ""); + // Replaces the value in a cell at a given row and column with the new value. + Error updateCellValue(int Row, StringRef ColName, StringRef NewValue); + // Renames a column. Error renameColumn(StringRef OldName, StringRef NewName); diff --git a/llvm/lib/Support/PropertySetIO.cpp b/llvm/lib/Support/PropertySetIO.cpp index 03ad6bb0b4cf9..1e92f1f8b9e3f 100644 --- a/llvm/lib/Support/PropertySetIO.cpp +++ b/llvm/lib/Support/PropertySetIO.cpp @@ -197,6 +197,7 @@ constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[]; constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[]; constexpr char PropertySetRegistry::SYCL_SPEC_CONSTANTS_DEFAULT_VALUES[]; constexpr char PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO[]; +constexpr char PropertySetRegistry::SYCL_PROGRAM_METADATA[]; constexpr char PropertySetRegistry::SYCL_MISC_PROP[]; constexpr char PropertySetRegistry::SYCL_ASSERT_USED[]; diff --git a/llvm/lib/Support/SimpleTable.cpp b/llvm/lib/Support/SimpleTable.cpp index a89087275c4d0..cbfdbaea6dbc7 100644 --- a/llvm/lib/Support/SimpleTable.cpp +++ b/llvm/lib/Support/SimpleTable.cpp @@ -109,6 +109,16 @@ Error SimpleTable::replaceColumn(StringRef Name, const SimpleTable &Src, return Error::success(); } +Error SimpleTable::updateCellValue(int Row, StringRef ColName, + StringRef NewValue) { + if (getNumColumns() == 0) + return makeError("empty table"); + if (Row > getNumRows()) + return makeError("row index out of bounds"); + Rows[Row][getColumnId(ColName)] = NewValue.str(); + return Error::success(); +} + Error SimpleTable::renameColumn(StringRef OldName, StringRef NewName) { int I = getColumnId(OldName); diff --git a/llvm/test/tools/sycl-post-link/emit_program_metadata.ll b/llvm/test/tools/sycl-post-link/emit_program_metadata.ll new file mode 100644 index 0000000000000..93b71be47bd81 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/emit_program_metadata.ll @@ -0,0 +1,24 @@ +; This test checks that the post-link tool generates SYCL program metadata. +; +; RUN: sycl-post-link -emit-program-metadata -S %s -o %t.files.table +; RUN: FileCheck %s -input-file=%t.files.table --check-prefixes CHECK-TABLE +; RUN: FileCheck %s -input-file=%t.files_0.prop --match-full-lines --check-prefixes CHECK-PROP + +target triple = "spir64-unknown-unknown-sycldevice" + +!0 = !{i32 1, i32 2, i32 4} + +define weak_odr spir_kernel void @SpirKernel1(float %arg1) !reqd_work_group_size !0 { + call void @foo(float %arg1) + ret void +} + +declare void @foo(float) + +; CHECK-PROP: [SYCL/program metadata] +; // Base64 encoding in the prop file (including 8 bytes length): +; CHECK-PROP-NEXT: SpirKernel1@reqd_work_group_size=2|gBAAAAAAAAQAAAAACAAAAQAAAAA + +; CHECK-TABLE: [Code|Properties] +; CHECK-TABLE-NEXT: {{.*}}files_0.prop +; CHECK-TABLE-EMPTY: diff --git a/llvm/tools/file-table-tform/file-table-tform.cpp b/llvm/tools/file-table-tform/file-table-tform.cpp index 6cbccd37a4bf4..a1c15e143e44a 100644 --- a/llvm/tools/file-table-tform/file-table-tform.cpp +++ b/llvm/tools/file-table-tform/file-table-tform.cpp @@ -78,6 +78,7 @@ static cl::opt Output("o", cl::Required, cl::cat(FileTableTformCat)); static constexpr char OPT_REPLACE[] = "replace"; +static constexpr char OPT_REPLACE_CELL[] = "replace_cell"; static constexpr char OPT_RENAME[] = "rename"; static constexpr char OPT_EXTRACT[] = "extract"; @@ -85,6 +86,11 @@ static cl::list TformReplace{ OPT_REPLACE, cl::ZeroOrMore, cl::desc("replace a column"), cl::value_desc(""), cl::cat(FileTableTformCat)}; +static cl::list TformReplaceCell{ + OPT_REPLACE_CELL, cl::ZeroOrMore, cl::desc("replace a cell"), + cl::value_desc(","), + cl::cat(FileTableTformCat)}; + static cl::list TformRename{ OPT_RENAME, cl::ZeroOrMore, cl::desc("rename a column"), cl::value_desc(","), cl::cat(FileTableTformCat)}; @@ -98,6 +104,12 @@ static cl::opt DropTitles{"drop_titles", cl::Optional, cl::desc("drop column titles"), cl::cat(FileTableTformCat)}; +static cl::opt CopySingleFile{ + "copy_single_file", cl::Optional, + cl::desc("copy the only remaining file in specified column after " + "transformation"), + cl::cat(FileTableTformCat)}; + Error makeToolError(Twine Msg) { return make_error("*** " + llvm::Twine(ToolName) + " ERROR: " + Msg, @@ -152,6 +164,10 @@ struct TformCmd { [&](TformCmd *Cmd) { return Cmd->consumeSingleInput(Cur, End); }) + .Case(OPT_REPLACE_CELL, + [&](TformCmd *Cmd) { + return Cmd->consumeSingleInput(Cur, End); + }) .Case(OPT_RENAME, [&](TformCmd *Cmd) { return Error::success(); }) .Case(OPT_EXTRACT, [&](TformCmd *Cmd) { return Error::success(); }); return F(this); @@ -174,6 +190,18 @@ struct TformCmd { Twine(OPT_REPLACE)); return Error::success(); }) + .Case(OPT_REPLACE_CELL, + [&](TformCmd *Cmd) -> Error { + // argument is + if (Arg.empty()) + return makeUserError("empty argument in " + + Twine(OPT_REPLACE_CELL)); + Arg.split(Args, ','); + if (Args.size() != 2 || Args[0].empty() || Args[1].empty()) + return makeUserError("invalid argument in " + + Twine(OPT_REPLACE_CELL)); + return Error::success(); + }) .Case(OPT_RENAME, [&](TformCmd *Cmd) -> Error { // argument is , @@ -217,6 +245,16 @@ struct TformCmd { Table.replaceColumn(Args[0], *Table1->get(), Args[1]); return Res ? std::move(Res) : std::move(Error::success()); }) + .Case(OPT_REPLACE_CELL, + [&](TformCmd *Cmd) -> Error { + assert(Args.size() == 2 && Cmd->Inputs.size() == 1); + const int Row = std::stoi(Args[1].str()); + if (Row > Table.getNumRows()) + return makeUserError("row index out of bounds"); + Error Res = + Table.updateCellValue(Row, Args[0], Cmd->Inputs[0]); + return Res ? std::move(Res) : std::move(Error::success()); + }) .Case(OPT_RENAME, [&](TformCmd *Cmd) -> Error { // argument is , @@ -267,8 +305,8 @@ int main(int argc, char **argv) { // yet, as an order across all command line options-commands needs to be // established first to properly map inputs to commands. - auto Lists = {std::addressof(TformReplace), std::addressof(TformRename), - std::addressof(TformExtract)}; + auto Lists = {std::addressof(TformReplace), std::addressof(TformReplaceCell), + std::addressof(TformRename), std::addressof(TformExtract)}; for (const auto *L : Lists) { for (auto It = L->begin(); It != L->end(); It++) { @@ -308,16 +346,33 @@ int main(int argc, char **argv) { Error Res = Cmd->execute(*Table->get()); CHECK_AND_EXIT(std::move(Res)); } - // Finally, write the result - std::error_code EC; - raw_fd_ostream Out{Output, EC, sys::fs::OpenFlags::OF_None}; - if (EC) - CHECK_AND_EXIT(createFileError(Output, EC)); - Table->get()->write(Out, !DropTitles); - - if (Out.has_error()) - CHECK_AND_EXIT(createFileError(Output, Out.error())); - Out.close(); + if (!CopySingleFile.empty()) { + // Copy the file from the only remaining row at specified column + if (Table.get()->getNumRows() > 1) + CHECK_AND_EXIT(makeUserError("cannot copy files from multiple rows")); + if (Table.get()->getNumRows() == 0) + CHECK_AND_EXIT(makeUserError("no rows remaining after transformation")); + StringRef FileToCopy = (*Table.get())[0].getCell(CopySingleFile, ""); + + if (FileToCopy.empty()) + CHECK_AND_EXIT(makeUserError("no file found in specified column")); + + std::error_code EC = sys::fs::copy_file(FileToCopy, Output); + if (EC) + CHECK_AND_EXIT(createFileError(Output, EC)); + } else { + // Write the transformed table to file + std::error_code EC; + raw_fd_ostream Out{Output, EC, sys::fs::OpenFlags::OF_None}; + + if (EC) + CHECK_AND_EXIT(createFileError(Output, EC)); + Table->get()->write(Out, !DropTitles); + + if (Out.has_error()) + CHECK_AND_EXIT(createFileError(Output, Out.error())); + Out.close(); + } return 0; } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index bdd27ec41b4bb..869d3ec7d9dfc 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -164,12 +164,17 @@ static cl::opt EmitKernelParamInfo{ "emit-param-info", cl::desc("emit kernel parameter optimization info"), cl::cat(PostLinkCat)}; +static cl::opt EmitProgramMetadata{"emit-program-metadata", + cl::desc("emit SYCL program metadata"), + cl::cat(PostLinkCat)}; + struct ImagePropSaveInfo { bool NeedDeviceLibReqMask; bool DoSpecConst; bool SetSpecConstAtRT; bool SpecConstsMet; bool EmitKernelParamInfo; + bool EmitProgramMetadata; bool IsEsimdKernel; }; @@ -376,6 +381,21 @@ static HasAssertStatus hasAssertInFunctionCallGraph(llvm::Function *Func) { return No_Assert; } +// Gets reqd_work_group_size information for function Func. +static std::vector +getKernelReqdWorkGroupSizeMetadata(const Function &Func) { + auto ReqdWorkGroupSizeMD = Func.getMetadata("reqd_work_group_size"); + if (!ReqdWorkGroupSizeMD || ReqdWorkGroupSizeMD->getNumOperands() != 3) + return {}; + uint32_t X = mdconst::extract(ReqdWorkGroupSizeMD->getOperand(0)) + ->getZExtValue(); + uint32_t Y = mdconst::extract(ReqdWorkGroupSizeMD->getOperand(1)) + ->getZExtValue(); + uint32_t Z = mdconst::extract(ReqdWorkGroupSizeMD->getOperand(2)) + ->getZExtValue(); + return {X, Y, Z}; +} + // Input parameter KernelModuleMap is a map containing groups of kernels with // same values of the sycl-module-id attribute. ResSymbolsLists is a vector of // kernel name lists. Each vector element is a string with kernel names from the @@ -563,6 +583,22 @@ static string_vector saveDeviceImageProperty( } } + // Metadata names may be composite so we keep them alive until the + // properties have been written. + SmallVector MetadataNames; + if (ImgPSInfo.EmitProgramMetadata) { + // Add reqd_work_group_size information to program metadata + for (const Function &Func : ResultModules[I]->functions()) { + std::vector KernelReqdWorkGroupSize = + getKernelReqdWorkGroupSizeMetadata(Func); + if (KernelReqdWorkGroupSize.empty()) + continue; + MetadataNames.push_back(Func.getName().str() + "@reqd_work_group_size"); + PropSet[llvm::util::PropertySetRegistry::SYCL_PROGRAM_METADATA].insert( + {MetadataNames[MetadataNames.size() - 1], KernelReqdWorkGroupSize}); + } + } + if (ImgPSInfo.IsEsimdKernel) { PropSet[llvm::util::PropertySetRegistry::SYCL_MISC_PROP].insert( {"isEsimdImage", true}); @@ -747,7 +783,8 @@ static TableFiles processOneModule(std::unique_ptr M, bool IsEsimd, { ImagePropSaveInfo ImgPSInfo = { true, DoSpecConst, SetSpecConstAtRT, - SpecConstsMet, EmitKernelParamInfo, IsEsimd}; + SpecConstsMet, EmitKernelParamInfo, EmitProgramMetadata, + IsEsimd}; string_vector Files = saveDeviceImageProperty(ResultModules, ImgPSInfo); std::copy(Files.begin(), Files.end(), std::back_inserter(TblFiles[COL_PROPS])); @@ -897,8 +934,10 @@ int main(int argc, char **argv) { bool DoSplitEsimd = SplitEsimd.getNumOccurrences() > 0; bool DoSpecConst = SpecConstLower.getNumOccurrences() > 0; bool DoParamInfo = EmitKernelParamInfo.getNumOccurrences() > 0; + bool DoProgMetadata = EmitProgramMetadata.getNumOccurrences() > 0; - if (!DoSplit && !DoSpecConst && !DoSymGen && !DoParamInfo && !DoSplitEsimd) { + if (!DoSplit && !DoSpecConst && !DoSymGen && !DoParamInfo && + !DoProgMetadata && !DoSplitEsimd) { errs() << "no actions specified; try --help for usage info\n"; return 1; } @@ -922,6 +961,11 @@ int main(int argc, char **argv) { << " -" << IROutputOnly.ArgStr << "\n"; return 1; } + if (IROutputOnly && DoProgMetadata) { + errs() << "error: -" << EmitProgramMetadata.ArgStr << " can't be used with" + << " -" << IROutputOnly.ArgStr << "\n"; + return 1; + } SMDiagnostic Err; std::unique_ptr M = parseIRFile(InputFilename, Err, Context); // It is OK to use raw pointer here as we control that it does not outlive M diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 49c44946000f0..e8b025a294e39 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -689,9 +689,16 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; #define __SYCL_PI_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask" /// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h #define __SYCL_PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO "SYCL/kernel param opt" +/// PropertySetRegistry::SYCL_KERNEL_PROGRAM_METADATA defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_PROGRAM_METADATA "SYCL/program metadata" /// PropertySetRegistry::SYCL_MISC_PROP defined in PropertySetIO.h #define __SYCL_PI_PROPERTY_SET_SYCL_MISC_PROP "SYCL/misc properties" +/// Program metadata tags recognized by the PI backends. For kernels the tag +/// must appear after the kernel name. +#define __SYCL_PI_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE \ + "@reqd_work_group_size" + /// This struct is a record of the device binary information. If the Kind field /// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec /// field can still be specific and denote e.g. FPGA target. It must match the @@ -1111,7 +1118,8 @@ __SYCL_EXPORT pi_result piclProgramCreateWithSource(pi_context context, __SYCL_EXPORT pi_result piProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, pi_program *ret_program); + pi_int32 *binary_status, const pi_device_binary_property *metadata, + size_t metadata_length, pi_program *ret_program); __SYCL_EXPORT pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index 4d621d2e33113..a0f553309b5cf 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -358,6 +358,7 @@ class DeviceBinaryImage { const PropertyRange &getKernelParamOptInfo() const { return KernelParamOptInfo; } + const PropertyRange &getProgramMetadata() const { return ProgramMetadata; } virtual ~DeviceBinaryImage() {} protected: @@ -369,6 +370,7 @@ class DeviceBinaryImage { DeviceBinaryImage::PropertyRange SpecConstIDMap; DeviceBinaryImage::PropertyRange DeviceLibReqMask; DeviceBinaryImage::PropertyRange KernelParamOptInfo; + DeviceBinaryImage::PropertyRange ProgramMetadata; }; /// Tries to determine the device binary image foramat. Returns diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index ba7e57ee7fbf3..e0e121cc4e632 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -475,13 +475,52 @@ pi_result enqueueEventWait(pi_queue queue, pi_event event) { } _pi_program::_pi_program(pi_context ctxt) - : module_{nullptr}, binary_{}, - binarySizeInBytes_{0}, refCount_{1}, context_{ctxt} { + : module_{nullptr}, binary_{}, binarySizeInBytes_{0}, refCount_{1}, + context_{ctxt}, kernelReqdWorkGroupSizeMD_{} { cuda_piContextRetain(context_); } _pi_program::~_pi_program() { cuda_piContextRelease(context_); } +bool get_kernel_metadata(std::string metadataName, const char *tag, + std::string &kernelName) { + const size_t tagLength = strlen(tag); + const size_t metadataNameLength = metadataName.length(); + if (metadataNameLength >= tagLength && + metadataName.compare(metadataNameLength - tagLength, tagLength, tag) == + 0) { + kernelName = metadataName.substr(0, metadataNameLength - tagLength); + return true; + } + return false; +} + +pi_result _pi_program::set_metadata(const pi_device_binary_property *metadata, + size_t length) { + for (size_t i = 0; i < length; ++i) { + const pi_device_binary_property metadataElement = metadata[i]; + std::string metadataElementName{metadataElement->Name}; + std::string kernelName; + + // If metadata is reqd_work_group_size record it for the corresponding + // kernel name. + if (get_kernel_metadata(metadataElementName, + __SYCL_PI_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE, + kernelName)) { + assert(metadataElement->ValSize != sizeof(std::uint32_t) * 3 && + "Unexpected size for reqd_work_group_size metadata"); + + // Get pointer to data, skipping 64-bit size at the start of the data. + const auto *reqdWorkGroupElements = + reinterpret_cast(metadataElement->ValAddr) + 2; + kernelReqdWorkGroupSizeMD_[kernelName] = + std::make_tuple(reqdWorkGroupElements[0], reqdWorkGroupElements[1], + reqdWorkGroupElements[2]); + } + } + return PI_SUCCESS; +} + pi_result _pi_program::set_binary(const char *source, size_t length) { assert((binary_ == nullptr && binarySizeInBytes_ == 0) && "Re-setting program binary data which has already been set"); @@ -530,6 +569,8 @@ pi_result _pi_program::build_program(const char *build_options) { /// Note: This is currently only being used by the SYCL program class for the /// has_kernel method, so an alternative would be to move the has_kernel /// query to PI and use cuModuleGetFunction to check for a kernel. +/// Note: Another alternative is to add kernel names as metadata, like with +/// reqd_work_group_size. std::string getKernelNames(pi_program program) { std::string source(program->binary_, program->binary_ + program->binarySizeInBytes_); @@ -2394,6 +2435,74 @@ pi_result cuda_piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index, return retErr; } +pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, + pi_kernel_group_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + + // here we want to query about a kernel's cuda blocks! + + if (kernel != nullptr) { + + switch (param_name) { + case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { + int max_threads = 0; + cl::sycl::detail::pi::assertion( + cuFuncGetAttribute(&max_threads, + CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, + kernel->get()) == CUDA_SUCCESS); + return getInfo(param_value_size, param_value, param_value_size_ret, + size_t(max_threads)); + } + case PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: { + size_t group_size[3] = {0, 0, 0}; + const auto &reqd_wg_size_md_map = + kernel->program_->kernelReqdWorkGroupSizeMD_; + const auto reqd_wg_size_md = reqd_wg_size_md_map.find(kernel->name_); + if (reqd_wg_size_md != reqd_wg_size_md_map.end()) { + const auto reqd_wg_size = reqd_wg_size_md->second; + group_size[0] = std::get<0>(reqd_wg_size); + group_size[1] = std::get<1>(reqd_wg_size); + group_size[2] = std::get<2>(reqd_wg_size); + } + return getInfoArray(3, param_value_size, param_value, + param_value_size_ret, group_size); + } + case PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { + // OpenCL LOCAL == CUDA SHARED + int bytes = 0; + cl::sycl::detail::pi::assertion( + cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, + kernel->get()) == CUDA_SUCCESS); + return getInfo(param_value_size, param_value, param_value_size_ret, + pi_uint64(bytes)); + } + case PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { + // Work groups should be multiples of the warp size + int warpSize = 0; + cl::sycl::detail::pi::assertion( + cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, + device->get()) == CUDA_SUCCESS); + return getInfo(param_value_size, param_value, param_value_size_ret, + static_cast(warpSize)); + } + case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { + // OpenCL PRIVATE == CUDA LOCAL + int bytes = 0; + cl::sycl::detail::pi::assertion( + cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, + kernel->get()) == CUDA_SUCCESS); + return getInfo(param_value_size, param_value, param_value_size_ret, + pi_uint64(bytes)); + } + default: + __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); + } + } + + return PI_INVALID_KERNEL; +} + pi_result cuda_piEnqueueKernelLaunch( pi_queue command_queue, pi_kernel kernel, pi_uint32 work_dim, const size_t *global_work_offset, const size_t *global_work_size, @@ -2413,10 +2522,17 @@ pi_result cuda_piEnqueueKernelLaunch( int threadsPerBlock[3] = {32, 1, 1}; size_t maxWorkGroupSize = 0u; size_t maxThreadsPerBlock[3] = {}; + size_t reqdThreadsPerBlock[3] = {}; bool providedLocalWorkGroupSize = (local_work_size != nullptr); { - pi_result retError = cuda_piDeviceGetInfo( + pi_result retError = cuda_piKernelGetGroupInfo( + kernel, command_queue->device_, + PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, + sizeof(reqdThreadsPerBlock), reqdThreadsPerBlock, nullptr); + assert(retError == PI_SUCCESS); + + retError = cuda_piDeviceGetInfo( command_queue->device_, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(maxThreadsPerBlock), maxThreadsPerBlock, nullptr); assert(retError == PI_SUCCESS); @@ -2429,6 +2545,10 @@ pi_result cuda_piEnqueueKernelLaunch( if (providedLocalWorkGroupSize) { auto isValid = [&](int dim) { + if (reqdThreadsPerBlock[dim] != 0 && + local_work_size[dim] != reqdThreadsPerBlock[dim]) + return PI_INVALID_WORK_GROUP_SIZE; + if (local_work_size[dim] > maxThreadsPerBlock[dim]) return PI_INVALID_WORK_ITEM_SIZE; // Checks that local work sizes are a divisor of the global work sizes @@ -2756,7 +2876,8 @@ pi_result cuda_piProgramCreate(pi_context, const void *, size_t, pi_program *) { pi_result cuda_piProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, pi_program *program) { + pi_int32 *binary_status, const pi_device_binary_property *metadata, + size_t metadata_length, pi_program *program) { // Ignore unused parameter (void)binary_status; @@ -2773,6 +2894,8 @@ pi_result cuda_piProgramCreateWithBinary( std::unique_ptr<_pi_program> retProgram{new _pi_program{context}}; + retProgram->set_metadata(metadata, metadata_length); + const bool has_length = (lengths != nullptr); size_t length = has_length ? lengths[0] @@ -3050,71 +3173,6 @@ pi_result cuda_piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, return PI_INVALID_KERNEL; } -pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, - pi_kernel_group_info param_name, - size_t param_value_size, void *param_value, - size_t *param_value_size_ret) { - - // here we want to query about a kernel's cuda blocks! - - if (kernel != nullptr) { - - switch (param_name) { - case PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { - int max_threads = 0; - cl::sycl::detail::pi::assertion( - cuFuncGetAttribute(&max_threads, - CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, - kernel->get()) == CUDA_SUCCESS); - return getInfo(param_value_size, param_value, param_value_size_ret, - size_t(max_threads)); - } - case PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: { - // Returns the work-group size specified in the kernel source or IL. - // If the work-group size is not specified in the kernel source or IL, - // (0, 0, 0) is returned. - // https://www.khronos.org/registry/OpenCL/sdk/2.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html - - // TODO: can we extract the work group size from the PTX? - size_t group_size[3] = {0, 0, 0}; - return getInfoArray(3, param_value_size, param_value, - param_value_size_ret, group_size); - } - case PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE: { - // OpenCL LOCAL == CUDA SHARED - int bytes = 0; - cl::sycl::detail::pi::assertion( - cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, - kernel->get()) == CUDA_SUCCESS); - return getInfo(param_value_size, param_value, param_value_size_ret, - pi_uint64(bytes)); - } - case PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { - // Work groups should be multiples of the warp size - int warpSize = 0; - cl::sycl::detail::pi::assertion( - cuDeviceGetAttribute(&warpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, - device->get()) == CUDA_SUCCESS); - return getInfo(param_value_size, param_value, param_value_size_ret, - static_cast(warpSize)); - } - case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: { - // OpenCL PRIVATE == CUDA LOCAL - int bytes = 0; - cl::sycl::detail::pi::assertion( - cuFuncGetAttribute(&bytes, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, - kernel->get()) == CUDA_SUCCESS); - return getInfo(param_value_size, param_value, param_value_size_ret, - pi_uint64(bytes)); - } - default: - __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); - } - } - - return PI_INVALID_KERNEL; -} - pi_result cuda_piKernelGetSubGroupInfo( pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index 0804da17d2281..1ae3f9c464ada 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -504,6 +504,10 @@ struct _pi_program { std::atomic_uint32_t refCount_; _pi_context *context_; + // Metadata + std::unordered_map> + kernelReqdWorkGroupSizeMD_; + constexpr static size_t MAX_LOG_SIZE = 8192u; char errorLog_[MAX_LOG_SIZE], infoLog_[MAX_LOG_SIZE]; @@ -513,6 +517,9 @@ struct _pi_program { _pi_program(pi_context ctxt); ~_pi_program(); + pi_result set_metadata(const pi_device_binary_property *metadata, + size_t length); + pi_result set_binary(const char *binary, size_t binarySizeInBytes); pi_result build_program(const char* build_options); diff --git a/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp b/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp index 63fc720f49eee..b359a057fe945 100644 --- a/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp +++ b/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp @@ -740,7 +740,9 @@ pi_result piProgramCreate(pi_context, const void *, size_t, pi_program *) { pi_result piProgramCreateWithBinary(pi_context, pi_uint32, const pi_device *, const size_t *, const unsigned char **, - pi_int32 *, pi_program *) { + pi_int32 *, + const pi_device_binary_property *, size_t, + pi_program *) { DIE_NO_IMPLEMENTATION; return PI_SUCCESS; } diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index e0c9c497477d2..8876bfbfe42da 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2936,12 +2936,13 @@ pi_result piProgramCreate(pi_context Context, const void *ILBytes, return PI_SUCCESS; } -pi_result piProgramCreateWithBinary(pi_context Context, pi_uint32 NumDevices, - const pi_device *DeviceList, - const size_t *Lengths, - const unsigned char **Binaries, - pi_int32 *BinaryStatus, - pi_program *Program) { +pi_result piProgramCreateWithBinary( + pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, + const size_t *Lengths, const unsigned char **Binaries, + pi_int32 *BinaryStatus, const pi_device_binary_property *Metadata, + size_t MetadataLength, pi_program *Program) { + (void)Metadata; + (void)MetadataLength; PI_ASSERT(Context, PI_INVALID_CONTEXT); PI_ASSERT(DeviceList && NumDevices, PI_INVALID_VALUE); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 1d762aaa5fafb..21ebe1980da6b 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -635,12 +635,13 @@ pi_result piclProgramCreateWithSource(pi_context context, pi_uint32 count, return ret_err; } -pi_result piProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, - const pi_device *device_list, - const size_t *lengths, - const unsigned char **binaries, - pi_int32 *binary_status, - pi_program *ret_program) { +pi_result piProgramCreateWithBinary( + pi_context context, pi_uint32 num_devices, const pi_device *device_list, + const size_t *lengths, const unsigned char **binaries, + pi_int32 *binary_status, const pi_device_binary_property *metadata, + size_t metadata_length, pi_program *ret_program) { + (void)metadata; + (void)metadata_length; pi_result ret_err = PI_INVALID_OPERATION; *ret_program = cast(clCreateProgramWithBinary( diff --git a/sycl/plugins/rocm/pi_rocm.cpp b/sycl/plugins/rocm/pi_rocm.cpp index 4f7b5a83523aa..3d4fce5d03300 100644 --- a/sycl/plugins/rocm/pi_rocm.cpp +++ b/sycl/plugins/rocm/pi_rocm.cpp @@ -2742,7 +2742,8 @@ pi_result rocm_piProgramCreate(pi_context context, const void *il, pi_result rocm_piProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, pi_program *program) { + pi_int32 *binary_status, const pi_device_binary_property *metadata, + size_t metadata_length, pi_program *program) { assert(context != nullptr); assert(binaries != nullptr); assert(program != nullptr); @@ -2756,6 +2757,9 @@ pi_result rocm_piProgramCreateWithBinary( std::unique_ptr<_pi_program> retProgram{new _pi_program{context}}; + // TODO: Set metadata here and use reqd_work_group_size information. + // See cuda_piProgramCreateWithBinary + const bool has_length = (lengths != nullptr); size_t length = has_length ? lengths[0] diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index aa77a7e819d00..e460146fc3644 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -690,6 +690,7 @@ void DeviceBinaryImage::init(pi_device_binary Bin) { SpecConstIDMap.init(Bin, __SYCL_PI_PROPERTY_SET_SPEC_CONST_MAP); DeviceLibReqMask.init(Bin, __SYCL_PI_PROPERTY_SET_DEVICELIB_REQ_MASK); KernelParamOptInfo.init(Bin, __SYCL_PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); + ProgramMetadata.init(Bin, __SYCL_PI_PROPERTY_SET_PROGRAM_METADATA); } } // namespace pi diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 298ae7e4e11d0..93d38ffe5b5a3 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -64,10 +64,10 @@ ProgramManager &ProgramManager::getInstance() { return GlobalHandler::instance().getProgramManager(); } -static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, - const device &Device, - const unsigned char *Data, - size_t DataLen) { +static RT::PiProgram +createBinaryProgram(const ContextImplPtr Context, const device &Device, + const unsigned char *Data, size_t DataLen, + const std::vector Metadata) { const detail::plugin &Plugin = Context->getPlugin(); #ifndef _NDEBUG pi_uint32 NumDevices = 0; @@ -84,7 +84,7 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, pi_int32 BinaryStatus = CL_SUCCESS; Plugin.call( Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data, - &BinaryStatus, &Program); + &BinaryStatus, Metadata.data(), Metadata.size(), &Program); if (BinaryStatus != CL_SUCCESS) { throw runtime_error("Creating program with binary failed.", BinaryStatus); @@ -339,12 +339,17 @@ RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, "SPIR-V online compilation is not supported in this context", PI_INVALID_OPERATION); + // Get program metadata from properties + auto ProgMetadata = Img.getProgramMetadata(); + std::vector ProgMetadataVector{ + ProgMetadata.begin(), ProgMetadata.end()}; + // Load the image const ContextImplPtr Ctx = getSyclObjImpl(Context); - RT::PiProgram Res = - Format == PI_DEVICE_BINARY_TYPE_SPIRV - ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize) - : createBinaryProgram(Ctx, Device, RawImg.BinaryStart, ImgSize); + RT::PiProgram Res = Format == PI_DEVICE_BINARY_TYPE_SPIRV + ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize) + : createBinaryProgram(Ctx, Device, RawImg.BinaryStart, + ImgSize, ProgMetadataVector); { std::lock_guard Lock(MNativeProgramsMutex); @@ -453,13 +458,18 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, const detail::plugin &Plugin = ContextImpl->getPlugin(); RT::PiProgram NativePrg; + // Get program metadata from properties + auto ProgMetadata = Img.getProgramMetadata(); + std::vector ProgMetadataVector{ + ProgMetadata.begin(), ProgMetadata.end()}; + auto BinProg = PersistentDeviceCodeCache::getItemFromDisc( Device, Img, SpecConsts, CompileOpts + LinkOpts); if (BinProg.size()) { // TODO: Build for multiple devices once supported by program manager NativePrg = createBinaryProgram(ContextImpl, Device, (const unsigned char *)BinProg[0].data(), - BinProg[0].size()); + BinProg[0].size(), ProgMetadataVector); } else { NativePrg = createPIProgram(Img, Context, Device); if (Prg) diff --git a/sycl/unittests/kernel-and-program/Cache.cpp b/sycl/unittests/kernel-and-program/Cache.cpp index 58d2a568a51c6..bafb4777c040a 100644 --- a/sycl/unittests/kernel-and-program/Cache.cpp +++ b/sycl/unittests/kernel-and-program/Cache.cpp @@ -102,7 +102,8 @@ static pi_result redefinedProgramCreateWithSource(pi_context context, static pi_result redefinedProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, pi_program *ret_program) { + pi_int32 *binary_status, const pi_device_binary_property *metadata, + size_t metadata_length, pi_program *ret_program) { *ret_program = reinterpret_cast(1); return PI_SUCCESS; } diff --git a/sycl/unittests/misc/KernelBuildOptions.cpp b/sycl/unittests/misc/KernelBuildOptions.cpp index eeaf9b827226e..ef6d31a0d4270 100644 --- a/sycl/unittests/misc/KernelBuildOptions.cpp +++ b/sycl/unittests/misc/KernelBuildOptions.cpp @@ -142,7 +142,8 @@ static pi_result redefinedProgramCreateWithSource(pi_context context, static pi_result redefinedProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, pi_program *ret_program) { + pi_int32 *binary_status, const pi_device_binary_property *metadata, + size_t metadata_length, pi_program *ret_program) { return PI_SUCCESS; } diff --git a/sycl/unittests/pi/cuda/test_kernels.cpp b/sycl/unittests/pi/cuda/test_kernels.cpp index d43cd6a7cd8bd..5c424ac9e26de 100644 --- a/sycl/unittests/pi/cuda/test_kernels.cpp +++ b/sycl/unittests/pi/cuda/test_kernels.cpp @@ -139,10 +139,11 @@ TEST_F(CudaKernelsTest, PICreateProgramAndKernel) { pi_program prog; pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ((plugin->call_nocheck( - context_, 1, &device_, nullptr, - (const unsigned char **)&ptxSource, &binary_status, &prog)), - PI_SUCCESS); + ASSERT_EQ( + (plugin->call_nocheck( + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, + &binary_status, nullptr, 0, &prog)), + PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( prog, 1, &device_, "", nullptr, nullptr)), @@ -162,10 +163,11 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSimple) { /// use it at some point in the future, pass it anyway and check the result. /// Same goes for all the other tests in this file. pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ((plugin->call_nocheck( - context_, 1, &device_, nullptr, - (const unsigned char **)&ptxSource, &binary_status, &prog)), - PI_SUCCESS); + ASSERT_EQ( + (plugin->call_nocheck( + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, + &binary_status, nullptr, 0, &prog)), + PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( @@ -191,10 +193,11 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSetTwice) { pi_program prog; pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ((plugin->call_nocheck( - context_, 1, &device_, nullptr, - (const unsigned char **)&ptxSource, &binary_status, &prog)), - PI_SUCCESS); + ASSERT_EQ( + (plugin->call_nocheck( + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, + &binary_status, nullptr, 0, &prog)), + PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( @@ -229,10 +232,11 @@ TEST_F(CudaKernelsTest, PIKernelSetMemObj) { pi_program prog; pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ((plugin->call_nocheck( - context_, 1, &device_, nullptr, - (const unsigned char **)&ptxSource, &binary_status, &prog)), - PI_SUCCESS); + ASSERT_EQ( + (plugin->call_nocheck( + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, + &binary_status, nullptr, 0, &prog)), + PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( @@ -264,10 +268,11 @@ TEST_F(CudaKernelsTest, PIkerneldispatch) { pi_program prog; pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ((plugin->call_nocheck( - context_, 1, &device_, nullptr, - (const unsigned char **)&ptxSource, &binary_status, &prog)), - PI_SUCCESS); + ASSERT_EQ( + (plugin->call_nocheck( + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, + &binary_status, nullptr, 0, &prog)), + PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( @@ -307,10 +312,11 @@ TEST_F(CudaKernelsTest, PIkerneldispatchTwo) { pi_program prog; pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ((plugin->call_nocheck( - context_, 1, &device_, nullptr, - (const unsigned char **)&twoParams, &binary_status, &prog)), - PI_SUCCESS); + ASSERT_EQ( + (plugin->call_nocheck( + context_, 1, &device_, nullptr, (const unsigned char **)&twoParams, + &binary_status, nullptr, 0, &prog)), + PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( @@ -362,11 +368,11 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSetTwiceOneLocal) { pi_program prog; pi_int32 binary_status = PI_SUCCESS; - ASSERT_EQ( - (plugin->call_nocheck( - context_, 1, &device_, nullptr, - (const unsigned char **)&threeParamsTwoLocal, &binary_status, &prog)), - PI_SUCCESS); + ASSERT_EQ((plugin->call_nocheck( + context_, 1, &device_, nullptr, + (const unsigned char **)&threeParamsTwoLocal, &binary_status, + nullptr, 0, &prog)), + PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( From de11767092501a63901209a37db5879266a85c83 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 13 May 2021 15:14:37 +0100 Subject: [PATCH 02/12] Fix driver offload test and minor changes Signed-off-by: Steffen Larsen --- clang/test/Driver/sycl-offload-nvptx.cpp | 20 ++++++++++++-------- sycl/plugins/cuda/pi_cuda.cpp | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/clang/test/Driver/sycl-offload-nvptx.cpp b/clang/test/Driver/sycl-offload-nvptx.cpp index 3f683d41fa136..fb8fb31a68cea 100644 --- a/clang/test/Driver/sycl-offload-nvptx.cpp +++ b/clang/test/Driver/sycl-offload-nvptx.cpp @@ -28,10 +28,12 @@ // CHK-PHASES-NO-CC: 9: assembler, {8}, object, (host-sycl) // CHK-PHASES-NO-CC: 10: linker, {9}, image, (host-sycl) // CHK-PHASES-NO-CC: 11: linker, {5}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 13: backend, {12}, assembler, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 14: clang-offload-wrapper, {13}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 15: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_50)" {14}, image +// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 15: file-table-tform, {12, 14}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 16: clang-offload-wrapper, {15}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 17: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_50)" {16}, image /// Check phases specifying a compute capability. // RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ @@ -50,7 +52,9 @@ // CHK-PHASES: 9: assembler, {8}, object, (host-sycl) // CHK-PHASES: 10: linker, {9}, image, (host-sycl) // CHK-PHASES: 11: linker, {5}, ir, (device-sycl, sm_35) -// CHK-PHASES: 12: sycl-post-link, {11}, ir, (device-sycl, sm_35) -// CHK-PHASES: 13: backend, {12}, assembler, (device-sycl, sm_35) -// CHK-PHASES: 14: clang-offload-wrapper, {13}, object, (device-sycl, sm_35) -// CHK-PHASES: 15: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_35)" {14}, image +// CHK-PHASES: 12: sycl-post-link, {11}, tempfiletable, (device-sycl, sm_35) +// CHK-PHASES: 13: file-table-tform, {12}, ir, (device-sycl, sm_35) +// CHK-PHASES: 14: backend, {13}, assembler, (device-sycl, sm_35) +// CHK-PHASES: 15: file-table-tform, {12, 14}, tempfiletable, (device-sycl, sm_35) +// CHK-PHASES: 16: clang-offload-wrapper, {15}, object, (device-sycl, sm_35) +// CHK-PHASES: 17: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (nvptx64-nvidia-nvcl-sycldevice:sm_35)" {16}, image diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index e0e121cc4e632..bece46800785f 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -2440,7 +2440,7 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, size_t param_value_size, void *param_value, size_t *param_value_size_ret) { - // here we want to query about a kernel's cuda blocks! + // Here we want to query about a kernel's cuda blocks! if (kernel != nullptr) { From a6ccafce3b8fd26f7a045aded3f5b82a7c8e8882 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 17 May 2021 10:22:08 +0100 Subject: [PATCH 03/12] Adjusting for feedback and more testing Signed-off-by: Steffen Larsen --- llvm/lib/Support/SimpleTable.cpp | 2 +- llvm/test/tools/file-table-tform/Inputs/s.txt | 2 + .../file-table-tform-single.test | 8 +++ llvm/tools/sycl-post-link/sycl-post-link.cpp | 6 ++- llvm/unittests/Support/SimpleTableTest.cpp | 9 +++- sycl/plugins/cuda/pi_cuda.cpp | 3 +- sycl/unittests/pi/cuda/test_kernels.cpp | 54 +++++++++++++++++++ 7 files changed, 79 insertions(+), 5 deletions(-) create mode 100644 llvm/test/tools/file-table-tform/Inputs/s.txt create mode 100644 llvm/test/tools/file-table-tform/file-table-tform-single.test diff --git a/llvm/lib/Support/SimpleTable.cpp b/llvm/lib/Support/SimpleTable.cpp index cbfdbaea6dbc7..9132a63b8f4b7 100644 --- a/llvm/lib/Support/SimpleTable.cpp +++ b/llvm/lib/Support/SimpleTable.cpp @@ -113,7 +113,7 @@ Error SimpleTable::updateCellValue(int Row, StringRef ColName, StringRef NewValue) { if (getNumColumns() == 0) return makeError("empty table"); - if (Row > getNumRows()) + if (Row > getNumRows() || Row < 0) return makeError("row index out of bounds"); Rows[Row][getColumnId(ColName)] = NewValue.str(); return Error::success(); diff --git a/llvm/test/tools/file-table-tform/Inputs/s.txt b/llvm/test/tools/file-table-tform/Inputs/s.txt new file mode 100644 index 0000000000000..babdf2fced655 --- /dev/null +++ b/llvm/test/tools/file-table-tform/Inputs/s.txt @@ -0,0 +1,2 @@ +[A|B|C|D] +aaa|bbb|100|XXX diff --git a/llvm/test/tools/file-table-tform/file-table-tform-single.test b/llvm/test/tools/file-table-tform/file-table-tform-single.test new file mode 100644 index 0000000000000..75234cea00088 --- /dev/null +++ b/llvm/test/tools/file-table-tform/file-table-tform-single.test @@ -0,0 +1,8 @@ +-- Insert %S/Inputs/gold.txt into column A at row index 0 +RUN: file-table-tform --replace_cell=A,0 %S/Inputs/s.txt %S/Inputs/gold.txt -o t.txt + +-- Copy file in column A from the only row +RUN: file-table-tform --copy_single_file=A t.txt -o u.txt + +-- Verify result +RUN: diff u.txt %S/Inputs/gold.txt diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 869d3ec7d9dfc..21d83a68ead50 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -587,6 +587,9 @@ static string_vector saveDeviceImageProperty( // properties have been written. SmallVector MetadataNames; if (ImgPSInfo.EmitProgramMetadata) { + auto &ProgramMetadata = + PropSet[llvm::util::PropertySetRegistry::SYCL_PROGRAM_METADATA]; + // Add reqd_work_group_size information to program metadata for (const Function &Func : ResultModules[I]->functions()) { std::vector KernelReqdWorkGroupSize = @@ -594,8 +597,7 @@ static string_vector saveDeviceImageProperty( if (KernelReqdWorkGroupSize.empty()) continue; MetadataNames.push_back(Func.getName().str() + "@reqd_work_group_size"); - PropSet[llvm::util::PropertySetRegistry::SYCL_PROGRAM_METADATA].insert( - {MetadataNames[MetadataNames.size() - 1], KernelReqdWorkGroupSize}); + ProgramMetadata.insert({MetadataNames.back(), KernelReqdWorkGroupSize}); } } diff --git a/llvm/unittests/Support/SimpleTableTest.cpp b/llvm/unittests/Support/SimpleTableTest.cpp index c17ea89ddfe28..135cc98dd6f13 100644 --- a/llvm/unittests/Support/SimpleTableTest.cpp +++ b/llvm/unittests/Support/SimpleTableTest.cpp @@ -46,6 +46,8 @@ TEST(SimpleTable, Operations) { auto ReplaceCodeWith = "a_0.spv\n" "a_1.spv\n"; + auto ReplaceSinglePropertyWith = "a_2.props"; + auto MemBuf = MemoryBuffer::getMemBuffer(Content); auto MemBufRepl = MemoryBuffer::getMemBuffer(ReplaceCodeWith); // Create tables from the strings above @@ -60,6 +62,11 @@ TEST(SimpleTable, Operations) { if (Error Err = Table->get()->replaceColumn("Code", *TableRepl->get(), "")) FAIL() << "SimpleTable::replaceColumn failed: " << Err << "\n"; + // -- Update cell + if (Error Err = Table->get()->updateCellValue(1, "Properties", + ReplaceSinglePropertyWith)) + FAIL() << "SimpleTable::updateCellValue failed: " << Err << "\n"; + // -- Add SmallVector NewCol = {"a_0.mnf", "a_1.mnf"}; if (Error Err = Table->get()->addColumn("Manifest", NewCol)) @@ -78,7 +85,7 @@ TEST(SimpleTable, Operations) { } auto Expected = "[Code|Properties|Manifest]\n" "a_0.spv|a_0.props|a_0.mnf\n" - "a_1.spv|a_1.props|a_1.mnf\n"; + "a_1.spv|a_2.props|a_1.mnf\n"; ASSERT_EQ(Result, Expected); } diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index bece46800785f..09ec7aedf97af 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -507,7 +507,8 @@ pi_result _pi_program::set_metadata(const pi_device_binary_property *metadata, if (get_kernel_metadata(metadataElementName, __SYCL_PI_PROGRAM_METADATA_TAG_REQD_WORK_GROUP_SIZE, kernelName)) { - assert(metadataElement->ValSize != sizeof(std::uint32_t) * 3 && + assert(metadataElement->ValSize == + sizeof(std::uint64_t) + sizeof(std::uint32_t) * 3 && "Unexpected size for reqd_work_group_size metadata"); // Get pointer to data, skipping 64-bit size at the start of the data. diff --git a/sycl/unittests/pi/cuda/test_kernels.cpp b/sycl/unittests/pi/cuda/test_kernels.cpp index 5c424ac9e26de..56ecea55e9a10 100644 --- a/sycl/unittests/pi/cuda/test_kernels.cpp +++ b/sycl/unittests/pi/cuda/test_kernels.cpp @@ -156,6 +156,60 @@ TEST_F(CudaKernelsTest, PICreateProgramAndKernel) { ASSERT_NE(kern, nullptr); } +TEST_F(CudaKernelsTest, PICreateProgramAndKernelWithMetadata) { + + std::vector reqdWorkGroupSizeMD; + reqdWorkGroupSizeMD.reserve(5); + // 64-bit representing bit size + reqdWorkGroupSizeMD.push_back(96); + reqdWorkGroupSizeMD.push_back(0); + // reqd_work_group_size x + reqdWorkGroupSizeMD.push_back(8); + // reqd_work_group_size y + reqdWorkGroupSizeMD.push_back(16); + // reqd_work_group_size z + reqdWorkGroupSizeMD.push_back(32); + + const char *reqdWorkGroupSizeMDConstName = + "_Z8myKernelPi@reqd_work_group_size"; + std::vector reqdWorkGroupSizeMDName( + reqdWorkGroupSizeMDConstName, + reqdWorkGroupSizeMDConstName + strlen(reqdWorkGroupSizeMDConstName) + 1); + _pi_device_binary_property_struct reqdWorkGroupSizeMDProp = { + reqdWorkGroupSizeMDName.data(), reqdWorkGroupSizeMD.data(), + pi_property_type::PI_PROPERTY_TYPE_BYTE_ARRAY, + sizeof(std::uint64_t) + sizeof(std::uint32_t) * 3}; + pi_device_binary_property reqdWorkGroupSizeMDPropPointer = + &reqdWorkGroupSizeMDProp; + + pi_program prog; + pi_int32 binary_status = PI_SUCCESS; + ASSERT_EQ( + (plugin.call_nocheck( + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, + &binary_status, &reqdWorkGroupSizeMDPropPointer, 1, &prog)), + PI_SUCCESS); + + ASSERT_EQ((plugin.call_nocheck( + prog, 1, &device_, "", nullptr, nullptr)), + PI_SUCCESS); + + pi_kernel kern; + ASSERT_EQ((plugin.call_nocheck( + prog, "_Z8myKernelPi", &kern)), + PI_SUCCESS); + ASSERT_NE(kern, nullptr); + + size_t compileWGSize[3] = {0}; + ASSERT_EQ((plugin.call_nocheck( + kern, device_, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, + sizeof(size_t) * 3, compileWGSize, nullptr)), + PI_SUCCESS); + for (int i = 0; i < 3; ++i) { + ASSERT_EQ(compileWGSize[i], reqdWorkGroupSizeMD[i + 2]); + } +} + TEST_F(CudaKernelsTest, PIKernelArgumentSimple) { pi_program prog; From 4b04b37418aa88dff1f62e9329dbf73bd6f5907f Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 17 May 2021 12:08:17 +0100 Subject: [PATCH 04/12] Remove redundant check Signed-off-by: Steffen Larsen --- llvm/tools/file-table-tform/file-table-tform.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/tools/file-table-tform/file-table-tform.cpp b/llvm/tools/file-table-tform/file-table-tform.cpp index a1c15e143e44a..b923303c60c03 100644 --- a/llvm/tools/file-table-tform/file-table-tform.cpp +++ b/llvm/tools/file-table-tform/file-table-tform.cpp @@ -247,10 +247,9 @@ struct TformCmd { }) .Case(OPT_REPLACE_CELL, [&](TformCmd *Cmd) -> Error { + // argument is , assert(Args.size() == 2 && Cmd->Inputs.size() == 1); const int Row = std::stoi(Args[1].str()); - if (Row > Table.getNumRows()) - return makeUserError("row index out of bounds"); Error Res = Table.updateCellValue(Row, Args[0], Cmd->Inputs[0]); return Res ? std::move(Res) : std::move(Error::success()); From 5e829f1eec2115600f511018a39d027098941882 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Tue, 18 May 2021 15:41:20 +0100 Subject: [PATCH 05/12] Documentation changed to reflect post-link step changes Signed-off-by: Steffen Larsen --- sycl/doc/CompilerAndRuntimeDesign.md | 15 +- sycl/doc/images/DeviceLinkAndWrap.svg | 915 +++++++++++++++++++----- sycl/doc/images/DevicePTXProcessing.svg | 885 ++++++++++++++++------- 3 files changed, 1391 insertions(+), 424 deletions(-) diff --git a/sycl/doc/CompilerAndRuntimeDesign.md b/sycl/doc/CompilerAndRuntimeDesign.md index 44874cfbde60f..e98233eb73951 100644 --- a/sycl/doc/CompilerAndRuntimeDesign.md +++ b/sycl/doc/CompilerAndRuntimeDesign.md @@ -465,9 +465,15 @@ factors. Each edge is also annotated with the input/output file type. The diagram does not show the `llvm-foreach` tool invocations for clarity. This tool invokes given command line over each file in a file list. In this diagram the tool is applied to `llvm-spirv` and AOT backend whenever the -input/output type is `TY_tempfilelist`. The second invocation of the -`file-table-tform` takes two inputs - the file table and a file list coming -either from `llvm-spirv` or from the AOT backend. +input/output type is `TY_tempfilelist` and the target is not PTX. +Following this, `file-table-tform` takes two inputs - the file table and a file +list coming either from `llvm-spirv` or from the AOT backend. +Targeting PTX currently only accepts a single input file for processing, so +`file-table-tform` is used to extract the code file from the file table, which +is then processed by the +["PTX target processing" step](#device-code-post-link-step-for-CUDA). +The resulting device binary is inserted back into the file table in place of the +extracted code file using `file-table-tform`. ##### Device code splitting @@ -547,7 +553,8 @@ objects for the CUDA target are linked together alongside using the NVPTX backend and assembled into a cubin using the `ptxas` tool (part of the CUDA SDK). The PTX file and cubin are assembled together using `fatbinary` to produce a CUDA fatbin. The CUDA fatbin -is then passed to the offload wrapper tool. +then replaces the llvm bitcode file in the file table generated by +`sycl-post-link`. The resulting table is passed to the offload wrapper tool. ![NVPTX AOT build](images/DevicePTXProcessing.svg) diff --git a/sycl/doc/images/DeviceLinkAndWrap.svg b/sycl/doc/images/DeviceLinkAndWrap.svg index 99ffe673132b5..10ea7704f79d8 100644 --- a/sycl/doc/images/DeviceLinkAndWrap.svg +++ b/sycl/doc/images/DeviceLinkAndWrap.svg @@ -9,7 +9,7 @@ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" sodipodi:docname="DeviceLinkAndWrap.svg" - inkscape:version="1.0.2 (e86c870879, 2021-01-15, custom)" + inkscape:version="1.0.2-2 (e86c870879, 2021-01-15)" id="svg8" version="1.1" viewBox="0 0 205.79753 221.03191" @@ -1494,6 +1494,339 @@ id="path2339" inkscape:connector-curvature="0" /> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id="base" + inkscape:snap-bbox="false" + inkscape:snap-grids="true" + inkscape:snap-global="false" /> @@ -1539,6 +1875,21 @@ id="g841" inkscape:label="pics_device_link_graph" transform="matrix(0.35277777,0,0,-0.35277777,-16.630949,187.55186)"> + + + @@ -1742,7 +2093,7 @@ @@ -1762,17 +2113,17 @@ id="path999" inkscape:connector-curvature="0" /> @@ -1782,37 +2133,32 @@ id="path1007" inkscape:connector-curvature="0" /> - @@ -1827,72 +2173,109 @@ id="path1025" inkscape:connector-curvature="0" /> - - + + + + + + + + + - + + + + + + + - - - - - - - - - - - - - - + transform="translate(-4.1152937,19.101767)" + id="g1355-2"> Split code + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + file + + + + + - + + + + + table + + + + + - + + + + + tform + + + copy “Code” + + + + All targets + + diff --git a/sycl/doc/images/DevicePTXProcessing.svg b/sycl/doc/images/DevicePTXProcessing.svg index 3e53d9e95c3d4..a639d2906a6c8 100644 --- a/sycl/doc/images/DevicePTXProcessing.svg +++ b/sycl/doc/images/DevicePTXProcessing.svg @@ -9,7 +9,7 @@ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" sodipodi:docname="DevicePTXProcessing.svg" - inkscape:version="0.92.3 (2405546, 2018-03-11)" + inkscape:version="1.0.2-2 (e86c870879, 2021-01-15)" id="svg8" version="1.1" viewBox="0 0 205.79753 221.03191" @@ -31,21 +31,6 @@ d="M 5.77,0.0 L -2.88,5.0 L -2.88,-5.0 L 5.77,0.0 z " id="path4942" /> - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,194.96288)"> @@ -2049,7 +2287,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,194.96288)"> @@ -2071,73 +2309,71 @@ + style="fill:#4472c4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="M 90.140434,11.109217 H 126.9281 v 8.078611 l -1.61572,1.615722 H 90.140434 Z" /> + style="fill:#375c9e;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="m 125.31238,20.80355 0.32455,-1.291166 1.29117,-0.324556 z" /> - Device LLVM IR - + style="stroke-width:0.352778" + x="99.965302" + y="15.401005">File table + style="fill:#4472c4;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.352778" + d="m 108.97877,5.5423837 v 3.5489439 h -0.80434 V 5.5423837 Z m 0.80433,3.1467782 -1.2065,2.4130001 -1.2065,-2.4130001 z" /> + style="fill:#4472c4;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.352778" + d="m 108.97877,20.824717 v 4.780139 h -0.80434 v -4.780139 z m 0.80433,4.377972 -1.2065,2.413 -1.2065,-2.413 z" /> + style="fill:#428374;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="m 65.824475,64.217895 c 0,-0.807861 0.814917,-1.4605 1.820334,-1.4605 1.005416,0 1.820333,0.652639 1.820333,1.4605 0,0.807861 -0.814917,1.4605 -1.820333,1.4605 -1.005417,0 -1.820334,-0.652639 -1.820334,-1.4605 z" /> + style="fill:#428374;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="m 97.99781,64.260228 c 0,-0.807861 0.81491,-1.4605 1.82033,-1.4605 1.00542,0 1.82033,0.652639 1.82033,1.4605 0,0.807861 -0.81491,1.4605 -1.82033,1.4605 -1.00542,0 -1.82033,-0.652639 -1.82033,-1.4605 z" /> + style="fill:#428374;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="m 129.91714,64.217895 c 0,-0.807861 0.81492,-1.4605 1.82033,-1.4605 1.00542,0 1.82034,0.652639 1.82034,1.4605 0,0.807861 -0.81492,1.4605 -1.82034,1.4605 -1.00541,0 -1.82033,-0.652639 -1.82033,-1.4605 z" /> + style="fill:#428374;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="m 158.74614,64.514228 c 0,-0.807861 0.81492,-1.4605 1.82033,-1.4605 1.00542,0 1.82034,0.652639 1.82034,1.4605 0,0.807861 -0.81492,1.4605 -1.82034,1.4605 -1.00541,0 -1.82033,-0.652639 -1.82033,-1.4605 z" /> + style="fill:#428374;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="m 98.607104,182.9885 c 0,-0.80786 0.81492,-1.4605 1.820326,-1.4605 1.00542,0 1.82034,0.65264 1.82034,1.4605 0,0.80786 -0.81492,1.4605 -1.82034,1.4605 -1.005406,0 -1.820326,-0.65264 -1.820326,-1.4605 z" /> + style="fill:#428374;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="m 139.92443,182.81917 c 0,-0.80786 0.81492,-1.4605 1.82034,-1.4605 1.00541,0 1.82033,0.65264 1.82033,1.4605 0,0.80786 -0.81492,1.4605 -1.82033,1.4605 -1.00542,0 -1.82034,-0.65264 -1.82034,-1.4605 z" /> + style="fill:#428374;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="m 66.391439,183.03083 c 0,-0.80786 0.814917,-1.4605 1.820334,-1.4605 1.005416,0 1.820333,0.65264 1.820333,1.4605 0,0.80786 -0.814917,1.4605 -1.820333,1.4605 -1.005417,0 -1.820334,-0.65264 -1.820334,-1.4605 z" /> + transform="matrix(0.35277777,0,0,-0.35277777,-8.162362,195.32856)"> @@ -2158,7 +2394,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-7.104029,195.09572)"> @@ -2179,7 +2415,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-7.104029,195.09572)"> - Clang - + x="101.31163" + y="84.249756">Clang + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> @@ -2220,7 +2454,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> @@ -2241,7 +2475,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> - clang - + x="0 7.7220001 15.444 23.166 30.94416">clang + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> - - - + x="0">- + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> - offload - + x="0 7.7922001 15.5142 23.2362 30.9582 38.736359 46.528561">offload + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> - - - + x="0">- + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> - wrapper - + x="0 7.7922001 15.5142 23.2362 30.9582 38.736359 46.458359">wrapper + transform="matrix(0.35277777,0,0,-0.35277777,-47.755856,220.70306)"> @@ -2359,7 +2583,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-47.755856,220.70306)"> @@ -2380,7 +2604,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-47.755856,220.70306)"> - PTX target processing - + x="81.597427" + y="60.97049">PTX target processing + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> @@ -2421,7 +2643,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> @@ -2443,34 +2665,32 @@ + style="fill:#ffc000;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="M 90.140434,199.98533 H 126.9281 v 12.41778 l -2.48355,2.48356 H 90.140434 Z" /> + style="fill:#cd9a00;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="m 124.44455,214.88667 0.49741,-1.98685 1.98614,-0.49671 z" /> + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> - Wrapper object - + x="0 12.11652 16.79184 23.517 30.831841 38.146679 45.138599 50.038559 53.141399 60.540482 67.911484 71.267036 78.258957 84.099602">Wrapper object + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> @@ -2491,7 +2711,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> @@ -2513,104 +2733,97 @@ + style="fill:#4472c4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="M 93.315434,206.88567 H 123.7531 v 6.6675 l -1.3335,1.3335 H 93.315434 Z" /> + style="fill:#375c9e;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.352778" + d="m 122.4196,214.88667 0.26811,-1.0668 1.06539,-0.2667 z" /> + transform="matrix(0.35277777,0,0,-0.35277777,-47.188892,221.19433)"> - Device code - + x="0 8.6493597 15.484464 21.841393 25.076113 30.968927 37.930607 41.151264 46.917503 54.329231 61.712833">Device code + style="fill:#4472c4;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.352778" + d="m 108.97877,194.20683 v 3.78954 h -0.80434 v -3.78954 z m 0.80433,3.38738 -1.2065,2.413 -1.2065,-2.413 z" /> + style="fill:#4472c4;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.352778" + d="m 108.97877,214.90783 v 3.21116 h -0.80434 v -3.21116 z m 0.80433,2.809 -1.2065,2.413 -1.2065,-2.413 z" /> - (from sycl-post-link) - + x="28.692085" + y="17.476978">(from sycl-post-link) + id="g1668" + transform="translate(-29.00824,13.042864)"> - libspirv.bc - + sodipodi:role="line">libspirv.bc + transform="translate(-29.00824,11.98453)"> - libdevice.bc - + sodipodi:role="line">libdevice.bc + transform="matrix(0.35277777,0,0,-0.35277777,-7.4064096,222.43894)"> @@ -2631,7 +2844,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-6.3480766,222.2061)"> @@ -2652,7 +2865,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-6.3480766,222.2061)"> - ptxas - + x="102.06758" + y="113.47669">ptxas + transform="matrix(0.35277777,0,0,-0.35277777,-7.4064096,246.33646)"> @@ -2693,7 +2904,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-6.3480766,246.10362)"> @@ -2714,7 +2925,7 @@ + transform="matrix(0.35277777,0,0,-0.35277777,-6.3480766,246.10362)"> - fatbin - + x="102.06758" + y="137.37424">fatbin - - ptx - + id="tspan3794">ptx - cubin - + id="tspan3916">cubin - - LLVM IR - + x="110.90725" + y="73.327454">LLVM IR - CUDA fatbin - + id="tspan2303">CUDA fatbin - (to host linker) - + x="31.587437" + y="216.68318">(to host linker) - (nvptx backend) - + x="91.899139" + y="88.973877">(nvptx backend) - (linked) - + x="97.467636" + y="19.142481">(Single row) + + + + + + + + + + + + + + + + + + + + + + file-table-tform + (Copy "Code") + LLVM IR + + + + + + + + + + + + + + + + + + + + + + file-table-tform + (Replace "Code") + File table + From 40dd00c339c86671bfcb20122ce382fdb1c11407 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 20 May 2021 16:29:14 +0100 Subject: [PATCH 06/12] Make copy_single_file a transformation Signed-off-by: Steffen Larsen --- clang/include/clang/Driver/Action.h | 17 ++- clang/lib/Driver/Action.cpp | 6 +- clang/lib/Driver/Driver.cpp | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 21 +-- .../file-table-tform-single.test | 2 +- .../file-table-tform/file-table-tform.cpp | 122 ++++++++++++------ 6 files changed, 110 insertions(+), 60 deletions(-) diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h index 31990c6695dd0..15921528c960a 100644 --- a/clang/include/clang/Driver/Action.h +++ b/clang/include/clang/Driver/Action.h @@ -772,7 +772,14 @@ class FileTableTformJobAction : public JobAction { public: struct Tform { - enum Kind { EXTRACT, EXTRACT_DROP_TITLE, REPLACE, REPLACE_CELL, RENAME }; + enum Kind { + EXTRACT, + EXTRACT_DROP_TITLE, + REPLACE, + REPLACE_CELL, + RENAME, + COPY_SINGLE_FILE + }; Tform() = default; Tform(Kind K, std::initializer_list Args) : TheKind(K) { @@ -803,9 +810,9 @@ class FileTableTformJobAction : public JobAction { void addRenameColumnTform(StringRef From, StringRef To); // Specifies that, instead of generating a new table, the transformation - // should copy the file in the only remaining row at into the + // should copy the file at column and row into the // output file. - void setCopySingleFileColumn(StringRef ColumnName); + void addCopySingleFileTform(StringRef ColumnName, int Row); static bool classof(const Action *A) { return A->getKind() == FileTableTformJobClass; @@ -813,10 +820,6 @@ class FileTableTformJobAction : public JobAction { const ArrayRef getTforms() const { return Tforms; } - const std::string getCopySingleFileColumnName() const { - return CopySingleFileColumnName; - } - private: SmallVector Tforms; // transformation actions requested diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp index e928dc92db7d5..4ff09ca4e216b 100644 --- a/clang/lib/Driver/Action.cpp +++ b/clang/lib/Driver/Action.cpp @@ -518,8 +518,10 @@ void FileTableTformJobAction::addRenameColumnTform(StringRef From, Tforms.emplace_back(Tform(Tform::RENAME, {From, To})); } -void FileTableTformJobAction::setCopySingleFileColumn(StringRef ColumnName) { - CopySingleFileColumnName = ColumnName.str(); +void FileTableTformJobAction::addCopySingleFileTform(StringRef ColumnName, + int Row) { + Tforms.emplace_back( + Tform(Tform::COPY_SINGLE_FILE, {ColumnName, std::to_string(Row)})); } void AppendFooterJobAction::anchor() {} diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 322a27ebc99ab..49263ce920c72 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4462,7 +4462,7 @@ class OffloadingActionBuilder final { // creating a new table with a single entry. auto *ExtractIRFilesAction = C.MakeAction( PostLinkAction, types::TY_LLVM_BC); - ExtractIRFilesAction->setCopySingleFileColumn(COL_CODE); + ExtractIRFilesAction->addCopySingleFileTform(COL_CODE, 0); Action *FinAction; if (isNVPTX) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index db6c2fc7285e3..285d80a133eb9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8966,25 +8966,28 @@ void FileTableTform::ConstructJob(Compilation &C, const JobAction &JA, addArgs(CmdArgs, TCArgs, {Arg}); break; } + case FileTableTformJobAction::Tform::COPY_SINGLE_FILE: { + assert(Tf.TheArgs.size() == 2 && "column name and row id expected"); + SmallString<128> Arg("-copy_single_file="); + Arg += Tf.TheArgs[0]; + Arg += ","; + Arg += Tf.TheArgs[1]; + addArgs(CmdArgs, TCArgs, {Arg}); + break; + } } - } - // 2) add copy_single_file option if requested - if (!TformJob.getCopySingleFileColumnName().empty()) { - SmallString<128> Arg("-copy_single_file="); - Arg += TformJob.getCopySingleFileColumnName(); - addArgs(CmdArgs, TCArgs, {Arg}); } - // 3) add output option + // 2) add output option assert(Output.isFilename() && "table tform output must be a file"); addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()}); - // 4) add inputs + // 3) add inputs for (const auto &Input : Inputs) { assert(Input.isFilename() && "table tform input must be a file"); addArgs(CmdArgs, TCArgs, {Input.getFilename()}); } - // 5) finally construct and add a command to the compilation + // 4) finally construct and add a command to the compilation C.addCommand(std::make_unique( JA, *this, ResponseFileSupport::None(), TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), diff --git a/llvm/test/tools/file-table-tform/file-table-tform-single.test b/llvm/test/tools/file-table-tform/file-table-tform-single.test index 75234cea00088..b64e4baf8c419 100644 --- a/llvm/test/tools/file-table-tform/file-table-tform-single.test +++ b/llvm/test/tools/file-table-tform/file-table-tform-single.test @@ -2,7 +2,7 @@ RUN: file-table-tform --replace_cell=A,0 %S/Inputs/s.txt %S/Inputs/gold.txt -o t.txt -- Copy file in column A from the only row -RUN: file-table-tform --copy_single_file=A t.txt -o u.txt +RUN: file-table-tform --copy_single_file=A,0 t.txt -o u.txt -- Verify result RUN: diff u.txt %S/Inputs/gold.txt diff --git a/llvm/tools/file-table-tform/file-table-tform.cpp b/llvm/tools/file-table-tform/file-table-tform.cpp index b923303c60c03..5638b40b4d15d 100644 --- a/llvm/tools/file-table-tform/file-table-tform.cpp +++ b/llvm/tools/file-table-tform/file-table-tform.cpp @@ -81,6 +81,7 @@ static constexpr char OPT_REPLACE[] = "replace"; static constexpr char OPT_REPLACE_CELL[] = "replace_cell"; static constexpr char OPT_RENAME[] = "rename"; static constexpr char OPT_EXTRACT[] = "extract"; +static constexpr char OPT_COPY_SINGLE_FILE[] = "copy_single_file"; static cl::list TformReplace{ OPT_REPLACE, cl::ZeroOrMore, cl::desc("replace a column"), @@ -100,16 +101,16 @@ static cl::list TformExtract{ cl::desc("extract column(s) identified by names"), cl::value_desc(",,..."), cl::cat(FileTableTformCat)}; +static cl::list TformCopySingleFile{ + OPT_COPY_SINGLE_FILE, cl::ZeroOrMore, + cl::desc("copy the file in a cell as make it the output"), + cl::value_desc(","), + cl::cat(FileTableTformCat)}; + static cl::opt DropTitles{"drop_titles", cl::Optional, cl::desc("drop column titles"), cl::cat(FileTableTformCat)}; -static cl::opt CopySingleFile{ - "copy_single_file", cl::Optional, - cl::desc("copy the only remaining file in specified column after " - "transformation"), - cl::cat(FileTableTformCat)}; - Error makeToolError(Twine Msg) { return make_error("*** " + llvm::Twine(ToolName) + " ERROR: " + Msg, @@ -169,7 +170,9 @@ struct TformCmd { return Cmd->consumeSingleInput(Cur, End); }) .Case(OPT_RENAME, [&](TformCmd *Cmd) { return Error::success(); }) - .Case(OPT_EXTRACT, [&](TformCmd *Cmd) { return Error::success(); }); + .Case(OPT_EXTRACT, [&](TformCmd *Cmd) { return Error::success(); }) + .Case(OPT_COPY_SINGLE_FILE, + [&](TformCmd *Cmd) { return Error::success(); }); return F(this); } @@ -192,7 +195,7 @@ struct TformCmd { }) .Case(OPT_REPLACE_CELL, [&](TformCmd *Cmd) -> Error { - // argument is + // argument is , if (Arg.empty()) return makeUserError("empty argument in " + Twine(OPT_REPLACE_CELL)); @@ -216,15 +219,30 @@ struct TformCmd { Args.push_back(Names.second); return Error::success(); }) - .Case(OPT_EXTRACT, [&](TformCmd *Cmd) -> Error { - // argument is ,,... (1 or more) + .Case( + OPT_EXTRACT, + [&](TformCmd *Cmd) -> Error { + // argument is ,,... (1 or more) + if (Arg.empty()) + return makeUserError("empty argument in " + + Twine(OPT_RENAME)); + SmallVector Names; + Arg.split(Names, ','); + if (std::find(Names.begin(), Names.end(), "") != Names.end()) + return makeUserError("empty name in " + Twine(OPT_RENAME)); + std::copy(Names.begin(), Names.end(), + std::back_inserter(Args)); + return Error::success(); + }) + .Case(OPT_COPY_SINGLE_FILE, [&](TformCmd *Cmd) -> Error { + // argument is , if (Arg.empty()) - return makeUserError("empty argument in " + Twine(OPT_RENAME)); - SmallVector Names; - Arg.split(Names, ','); - if (std::find(Names.begin(), Names.end(), "") != Names.end()) - return makeUserError("empty name in " + Twine(OPT_RENAME)); - std::copy(Names.begin(), Names.end(), std::back_inserter(Args)); + return makeUserError("empty argument in " + + Twine(OPT_COPY_SINGLE_FILE)); + Arg.split(Args, ','); + if (Args.size() != 2 || Args[0].empty() || Args[1].empty()) + return makeUserError("invalid argument in " + + Twine(OPT_COPY_SINGLE_FILE)); return Error::success(); }); return F(this); @@ -261,11 +279,30 @@ struct TformCmd { Error Res = Table.renameColumn(Args[0], Args[1]); return Res ? std::move(Res) : std::move(Error::success()); }) - .Case(OPT_EXTRACT, [&](TformCmd *Cmd) -> Error { - // argument is ,,... (1 or more) - assert(!Args.empty()); - Error Res = Table.peelColumns(Args); - return Res ? std::move(Res) : std::move(Error::success()); + .Case(OPT_EXTRACT, + [&](TformCmd *Cmd) -> Error { + // argument is ,,... (1 or more) + assert(!Args.empty()); + Error Res = Table.peelColumns(Args); + return Res ? std::move(Res) : std::move(Error::success()); + }) + .Case(OPT_COPY_SINGLE_FILE, [&](TformCmd *Cmd) -> Error { + // argument is , + assert(Args.size() == 2); + const int Row = std::stoi(Args[1].str()); + if (Row >= Table.getNumRows()) + return makeUserError("row index is out of bounds"); + + // Copy the file from the only remaining row at specified + // column + StringRef FileToCopy = Table[Row].getCell(Args[0], ""); + + if (FileToCopy.empty()) + return makeUserError("no file found in specified column"); + + std::error_code EC = sys::fs::copy_file(FileToCopy, Output); + return EC ? createFileError(Output, EC) + : std::move(Error::success()); }); return F(this); } @@ -290,13 +327,16 @@ int main(int argc, char **argv) { "File table transformation tool.\n" "Inputs and output of this tool is a \"file table\" files containing\n" "2D table of strings with optional row of column titles. Based on\n" - "transformation actions passed via the command line, the tool " - "transforms the first input file table and emits a new one as a result.\n" + "transformation actions passed via the command line, the tool\n" + "transforms the first input file table and emits either a new file\n" + "table or a copy of a file in a cell of the input table.\n" "\n" "Transformation actions are:\n" "- replace a column\n" + "- replace a cell\n" "- rename a column\n" - "- extract column(s)\n"); + "- extract column(s)\n" + "- ouput a copy of a file in a cell\n"); std::map Cmds; @@ -305,7 +345,8 @@ int main(int argc, char **argv) { // established first to properly map inputs to commands. auto Lists = {std::addressof(TformReplace), std::addressof(TformReplaceCell), - std::addressof(TformRename), std::addressof(TformExtract)}; + std::addressof(TformRename), std::addressof(TformExtract), + std::addressof(TformCopySingleFile)}; for (const auto *L : Lists) { for (auto It = L->begin(); It != L->end(); It++) { @@ -326,6 +367,19 @@ int main(int argc, char **argv) { CHECK_AND_EXIT(makeUserError("no inputs")); std::string &InputFile = *CurInput++; + // ensure that if copy_single_file is specified, it must be the last tform + bool HasCopySingleFileTform = false; + for (auto &P : Cmds) { + if (HasCopySingleFileTform) { + CHECK_AND_EXIT( + makeUserError("copy_single_file must be the last transformation")); + } + if (P.second->Kind != OPT_COPY_SINGLE_FILE) { + continue; + } + HasCopySingleFileTform = true; + } + for (auto &P : Cmds) { TformCmd::UPtrTy &Cmd = P.second; // this will advance cur iterator as far as needed @@ -346,21 +400,9 @@ int main(int argc, char **argv) { CHECK_AND_EXIT(std::move(Res)); } - if (!CopySingleFile.empty()) { - // Copy the file from the only remaining row at specified column - if (Table.get()->getNumRows() > 1) - CHECK_AND_EXIT(makeUserError("cannot copy files from multiple rows")); - if (Table.get()->getNumRows() == 0) - CHECK_AND_EXIT(makeUserError("no rows remaining after transformation")); - StringRef FileToCopy = (*Table.get())[0].getCell(CopySingleFile, ""); - - if (FileToCopy.empty()) - CHECK_AND_EXIT(makeUserError("no file found in specified column")); - - std::error_code EC = sys::fs::copy_file(FileToCopy, Output); - if (EC) - CHECK_AND_EXIT(createFileError(Output, EC)); - } else { + // If copy_single_file was specified the output file is generated by the + // corresponding transformation. + if (!HasCopySingleFileTform) { // Write the transformed table to file std::error_code EC; raw_fd_ostream Out{Output, EC, sys::fs::OpenFlags::OF_None}; From 25eef456dba7ee37c27bb8eaaf1e7856a5da212f Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Fri, 21 May 2021 12:55:11 +0100 Subject: [PATCH 07/12] Adjusts updateCellValue parameters, comments, and errors Signed-off-by: Steffen Larsen --- clang/lib/Driver/Driver.cpp | 2 ++ llvm/include/llvm/Support/SimpleTable.h | 4 ++-- llvm/lib/Support/SimpleTable.cpp | 2 +- llvm/tools/file-table-tform/file-table-tform.cpp | 9 +++------ llvm/unittests/Support/SimpleTableTest.cpp | 2 +- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 49263ce920c72..00730ae307b5c 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4460,6 +4460,8 @@ class OffloadingActionBuilder final { if (isNVPTX || isAMDGCN) { // Make extraction copy the only remaining code file instead of // creating a new table with a single entry. + // TODO: Process all PTX code files in file table when code-splitting + // is implemented to PTX target. auto *ExtractIRFilesAction = C.MakeAction( PostLinkAction, types::TY_LLVM_BC); ExtractIRFilesAction->addCopySingleFileTform(COL_CODE, 0); diff --git a/llvm/include/llvm/Support/SimpleTable.h b/llvm/include/llvm/Support/SimpleTable.h index 8bda35f8b598c..e91f288a6bc26 100644 --- a/llvm/include/llvm/Support/SimpleTable.h +++ b/llvm/include/llvm/Support/SimpleTable.h @@ -97,8 +97,8 @@ class SimpleTable { Error replaceColumn(StringRef Name, const SimpleTable &Src, StringRef SrcName = ""); - // Replaces the value in a cell at a given row and column with the new value. - Error updateCellValue(int Row, StringRef ColName, StringRef NewValue); + // Replaces the value in a cell at a given column and row with the new value. + Error updateCellValue(StringRef ColName, int Row, StringRef NewValue); // Renames a column. Error renameColumn(StringRef OldName, StringRef NewName); diff --git a/llvm/lib/Support/SimpleTable.cpp b/llvm/lib/Support/SimpleTable.cpp index 9132a63b8f4b7..41aeb57b15d28 100644 --- a/llvm/lib/Support/SimpleTable.cpp +++ b/llvm/lib/Support/SimpleTable.cpp @@ -109,7 +109,7 @@ Error SimpleTable::replaceColumn(StringRef Name, const SimpleTable &Src, return Error::success(); } -Error SimpleTable::updateCellValue(int Row, StringRef ColName, +Error SimpleTable::updateCellValue(StringRef ColName, int Row, StringRef NewValue) { if (getNumColumns() == 0) return makeError("empty table"); diff --git a/llvm/tools/file-table-tform/file-table-tform.cpp b/llvm/tools/file-table-tform/file-table-tform.cpp index 5638b40b4d15d..1ded228953a00 100644 --- a/llvm/tools/file-table-tform/file-table-tform.cpp +++ b/llvm/tools/file-table-tform/file-table-tform.cpp @@ -103,7 +103,7 @@ static cl::list TformExtract{ static cl::list TformCopySingleFile{ OPT_COPY_SINGLE_FILE, cl::ZeroOrMore, - cl::desc("copy the file in a cell as make it the output"), + cl::desc("copy the file in a cell and make it the output"), cl::value_desc(","), cl::cat(FileTableTformCat)}; @@ -269,7 +269,7 @@ struct TformCmd { assert(Args.size() == 2 && Cmd->Inputs.size() == 1); const int Row = std::stoi(Args[1].str()); Error Res = - Table.updateCellValue(Row, Args[0], Cmd->Inputs[0]); + Table.updateCellValue(Args[0], Row, Cmd->Inputs[0]); return Res ? std::move(Res) : std::move(Error::success()); }) .Case(OPT_RENAME, @@ -374,10 +374,7 @@ int main(int argc, char **argv) { CHECK_AND_EXIT( makeUserError("copy_single_file must be the last transformation")); } - if (P.second->Kind != OPT_COPY_SINGLE_FILE) { - continue; - } - HasCopySingleFileTform = true; + HasCopySingleFileTform = P.second->Kind == OPT_COPY_SINGLE_FILE; } for (auto &P : Cmds) { diff --git a/llvm/unittests/Support/SimpleTableTest.cpp b/llvm/unittests/Support/SimpleTableTest.cpp index 135cc98dd6f13..3626ed9d864fa 100644 --- a/llvm/unittests/Support/SimpleTableTest.cpp +++ b/llvm/unittests/Support/SimpleTableTest.cpp @@ -63,7 +63,7 @@ TEST(SimpleTable, Operations) { FAIL() << "SimpleTable::replaceColumn failed: " << Err << "\n"; // -- Update cell - if (Error Err = Table->get()->updateCellValue(1, "Properties", + if (Error Err = Table->get()->updateCellValue("Properties", 1, ReplaceSinglePropertyWith)) FAIL() << "SimpleTable::updateCellValue failed: " << Err << "\n"; From c952da0263d5cfe4feeda3f717a4885aa35daf7b Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 24 May 2021 09:36:13 +0100 Subject: [PATCH 08/12] Change code-splitting TODO Signed-off-by: Steffen Larsen --- clang/lib/Driver/Driver.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 00730ae307b5c..6112ea4902d76 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4460,8 +4460,8 @@ class OffloadingActionBuilder final { if (isNVPTX || isAMDGCN) { // Make extraction copy the only remaining code file instead of // creating a new table with a single entry. - // TODO: Process all PTX code files in file table when code-splitting - // is implemented to PTX target. + // TODO: Process all PTX code files in file table to enable code + // splitting for PTX target. auto *ExtractIRFilesAction = C.MakeAction( PostLinkAction, types::TY_LLVM_BC); ExtractIRFilesAction->addCopySingleFileTform(COL_CODE, 0); From 177c14d891dc9cf040f1d8e5a0319d7d16ce6ffc Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Thu, 3 Jun 2021 11:14:48 +0100 Subject: [PATCH 09/12] Added assertion for 3 reqd_work_group_size metadata operands Signed-off-by: Steffen Larsen --- llvm/tools/sycl-post-link/sycl-post-link.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 21d83a68ead50..eacb63b4792ec 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -385,8 +385,10 @@ static HasAssertStatus hasAssertInFunctionCallGraph(llvm::Function *Func) { static std::vector getKernelReqdWorkGroupSizeMetadata(const Function &Func) { auto ReqdWorkGroupSizeMD = Func.getMetadata("reqd_work_group_size"); - if (!ReqdWorkGroupSizeMD || ReqdWorkGroupSizeMD->getNumOperands() != 3) + if (!ReqdWorkGroupSizeMD) return {}; + // TODO: Remove 3-operand assumption when it is relaxed. + assert(ReqdWorkGroupSizeMD->getNumOperands() == 3); uint32_t X = mdconst::extract(ReqdWorkGroupSizeMD->getOperand(0)) ->getZExtValue(); uint32_t Y = mdconst::extract(ReqdWorkGroupSizeMD->getOperand(1)) From c8173253bcc699b44f5def0b7050a080aec44233 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 28 Jun 2021 18:35:20 +0100 Subject: [PATCH 10/12] Adds piProgramCreateWithBinary comment and makes new parameters more consistent Signed-off-by: Steffen Larsen --- sycl/include/CL/sycl/detail/pi.h | 20 +++++++++++++++++-- sycl/plugins/cuda/pi_cuda.cpp | 6 +++--- sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp | 5 ++--- sycl/plugins/level_zero/pi_level_zero.cpp | 6 +++--- sycl/plugins/opencl/pi_opencl.cpp | 6 +++--- sycl/plugins/rocm/pi_rocm.cpp | 4 ++-- .../program_manager/program_manager.cpp | 2 +- sycl/unittests/kernel-and-program/Cache.cpp | 4 ++-- sycl/unittests/misc/KernelBuildOptions.cpp | 4 ++-- sycl/unittests/pi/cuda/test_kernels.cpp | 18 ++++++++--------- 10 files changed, 45 insertions(+), 30 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index e8b025a294e39..9a0e9470e7bda 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -1115,11 +1115,27 @@ __SYCL_EXPORT pi_result piclProgramCreateWithSource(pi_context context, const size_t *lengths, pi_program *ret_program); +/// Creates a PI program for a context and loads the given binary into it. +/// +/// \param context is the PI context to associate the program with. +/// \param num_devices is the number of devices in device_list. +/// \param device_list is a pointer to a list of devices. These devices must all +/// be in context. +/// \param lengths is an array of sizes in bytes of the binary in binaries. +/// \param binaries is a pointer to a list of program binaries. +/// \param num_metadata_entries is the number of metadata entries in metadata. +/// \param metadata is a pointer to a list of program metadata entries. The +/// use of metadata entries is backend-defined. +/// \param binary_status returns whether the program binary was loaded +/// succesfully or not, for each device in device_list. +/// binary_status is ignored if it is null and otherwise +/// it must be an array of num_devices elements. +/// \param program is the PI program created from the program binaries. __SYCL_EXPORT pi_result piProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, const pi_device_binary_property *metadata, - size_t metadata_length, pi_program *ret_program); + size_t num_metadata_entries, const pi_device_binary_property *metadata, + pi_int32 *binary_status, pi_program *ret_program); __SYCL_EXPORT pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 09ec7aedf97af..8e49494d13f83 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -2877,8 +2877,8 @@ pi_result cuda_piProgramCreate(pi_context, const void *, size_t, pi_program *) { pi_result cuda_piProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, const pi_device_binary_property *metadata, - size_t metadata_length, pi_program *program) { + size_t num_metadata_entries, const pi_device_binary_property *metadata, + pi_int32 *binary_status, pi_program *program) { // Ignore unused parameter (void)binary_status; @@ -2895,7 +2895,7 @@ pi_result cuda_piProgramCreateWithBinary( std::unique_ptr<_pi_program> retProgram{new _pi_program{context}}; - retProgram->set_metadata(metadata, metadata_length); + retProgram->set_metadata(metadata, num_metadata_entries); const bool has_length = (lengths != nullptr); size_t length = has_length diff --git a/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp b/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp index b359a057fe945..79961f17fc6a1 100644 --- a/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp +++ b/sycl/plugins/esimd_cpu/pi_esimd_cpu.cpp @@ -740,9 +740,8 @@ pi_result piProgramCreate(pi_context, const void *, size_t, pi_program *) { pi_result piProgramCreateWithBinary(pi_context, pi_uint32, const pi_device *, const size_t *, const unsigned char **, - pi_int32 *, - const pi_device_binary_property *, size_t, - pi_program *) { + size_t, const pi_device_binary_property *, + pi_int32 *, pi_program *) { DIE_NO_IMPLEMENTATION; return PI_SUCCESS; } diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 8876bfbfe42da..c1fd48e9015b4 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2939,10 +2939,10 @@ pi_result piProgramCreate(pi_context Context, const void *ILBytes, pi_result piProgramCreateWithBinary( pi_context Context, pi_uint32 NumDevices, const pi_device *DeviceList, const size_t *Lengths, const unsigned char **Binaries, - pi_int32 *BinaryStatus, const pi_device_binary_property *Metadata, - size_t MetadataLength, pi_program *Program) { + size_t NumMetadataEntries, const pi_device_binary_property *Metadata, + pi_int32 *BinaryStatus, pi_program *Program) { (void)Metadata; - (void)MetadataLength; + (void)NumMetadataEntries; PI_ASSERT(Context, PI_INVALID_CONTEXT); PI_ASSERT(DeviceList && NumDevices, PI_INVALID_VALUE); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 21ebe1980da6b..82124b08f0c07 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -638,10 +638,10 @@ pi_result piclProgramCreateWithSource(pi_context context, pi_uint32 count, pi_result piProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, const pi_device_binary_property *metadata, - size_t metadata_length, pi_program *ret_program) { + size_t num_metadata_entries, const pi_device_binary_property *metadata, + pi_int32 *binary_status, pi_program *ret_program) { (void)metadata; - (void)metadata_length; + (void)num_metadata_entries; pi_result ret_err = PI_INVALID_OPERATION; *ret_program = cast(clCreateProgramWithBinary( diff --git a/sycl/plugins/rocm/pi_rocm.cpp b/sycl/plugins/rocm/pi_rocm.cpp index 3d4fce5d03300..026c1fa8d5f10 100644 --- a/sycl/plugins/rocm/pi_rocm.cpp +++ b/sycl/plugins/rocm/pi_rocm.cpp @@ -2742,8 +2742,8 @@ pi_result rocm_piProgramCreate(pi_context context, const void *il, pi_result rocm_piProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, const pi_device_binary_property *metadata, - size_t metadata_length, pi_program *program) { + size_t num_metadata_entries, const pi_device_binary_property *metadata, + pi_int32 *binary_status, pi_program *program) { assert(context != nullptr); assert(binaries != nullptr); assert(program != nullptr); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 93d38ffe5b5a3..548fc80368cd7 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -84,7 +84,7 @@ createBinaryProgram(const ContextImplPtr Context, const device &Device, pi_int32 BinaryStatus = CL_SUCCESS; Plugin.call( Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data, - &BinaryStatus, Metadata.data(), Metadata.size(), &Program); + Metadata.size(), Metadata.data(), &BinaryStatus, &Program); if (BinaryStatus != CL_SUCCESS) { throw runtime_error("Creating program with binary failed.", BinaryStatus); diff --git a/sycl/unittests/kernel-and-program/Cache.cpp b/sycl/unittests/kernel-and-program/Cache.cpp index bafb4777c040a..946f54f5de596 100644 --- a/sycl/unittests/kernel-and-program/Cache.cpp +++ b/sycl/unittests/kernel-and-program/Cache.cpp @@ -102,8 +102,8 @@ static pi_result redefinedProgramCreateWithSource(pi_context context, static pi_result redefinedProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, const pi_device_binary_property *metadata, - size_t metadata_length, pi_program *ret_program) { + size_t metadata_length, const pi_device_binary_property *metadata, + pi_int32 *binary_status, pi_program *ret_program) { *ret_program = reinterpret_cast(1); return PI_SUCCESS; } diff --git a/sycl/unittests/misc/KernelBuildOptions.cpp b/sycl/unittests/misc/KernelBuildOptions.cpp index ef6d31a0d4270..91339553acd97 100644 --- a/sycl/unittests/misc/KernelBuildOptions.cpp +++ b/sycl/unittests/misc/KernelBuildOptions.cpp @@ -142,8 +142,8 @@ static pi_result redefinedProgramCreateWithSource(pi_context context, static pi_result redefinedProgramCreateWithBinary( pi_context context, pi_uint32 num_devices, const pi_device *device_list, const size_t *lengths, const unsigned char **binaries, - pi_int32 *binary_status, const pi_device_binary_property *metadata, - size_t metadata_length, pi_program *ret_program) { + size_t num_metadata_entries, const pi_device_binary_property *metadata, + pi_int32 *binary_status, pi_program *ret_program) { return PI_SUCCESS; } diff --git a/sycl/unittests/pi/cuda/test_kernels.cpp b/sycl/unittests/pi/cuda/test_kernels.cpp index 56ecea55e9a10..0bbf55e3e376f 100644 --- a/sycl/unittests/pi/cuda/test_kernels.cpp +++ b/sycl/unittests/pi/cuda/test_kernels.cpp @@ -142,7 +142,7 @@ TEST_F(CudaKernelsTest, PICreateProgramAndKernel) { ASSERT_EQ( (plugin->call_nocheck( context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - &binary_status, nullptr, 0, &prog)), + 0, nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( @@ -187,7 +187,7 @@ TEST_F(CudaKernelsTest, PICreateProgramAndKernelWithMetadata) { ASSERT_EQ( (plugin.call_nocheck( context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - &binary_status, &reqdWorkGroupSizeMDPropPointer, 1, &prog)), + 1, &reqdWorkGroupSizeMDPropPointer, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ((plugin.call_nocheck( @@ -220,7 +220,7 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSimple) { ASSERT_EQ( (plugin->call_nocheck( context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - &binary_status, nullptr, 0, &prog)), + 0, nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); @@ -250,7 +250,7 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSetTwice) { ASSERT_EQ( (plugin->call_nocheck( context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - &binary_status, nullptr, 0, &prog)), + 0, nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); @@ -289,7 +289,7 @@ TEST_F(CudaKernelsTest, PIKernelSetMemObj) { ASSERT_EQ( (plugin->call_nocheck( context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - &binary_status, nullptr, 0, &prog)), + 0, nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); @@ -325,7 +325,7 @@ TEST_F(CudaKernelsTest, PIkerneldispatch) { ASSERT_EQ( (plugin->call_nocheck( context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - &binary_status, nullptr, 0, &prog)), + 0, nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); @@ -369,7 +369,7 @@ TEST_F(CudaKernelsTest, PIkerneldispatchTwo) { ASSERT_EQ( (plugin->call_nocheck( context_, 1, &device_, nullptr, (const unsigned char **)&twoParams, - &binary_status, nullptr, 0, &prog)), + 0, nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); @@ -424,8 +424,8 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSetTwiceOneLocal) { pi_int32 binary_status = PI_SUCCESS; ASSERT_EQ((plugin->call_nocheck( context_, 1, &device_, nullptr, - (const unsigned char **)&threeParamsTwoLocal, &binary_status, - nullptr, 0, &prog)), + (const unsigned char **)&threeParamsTwoLocal, 0, nullptr, + &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); From 76ad746a0cf3824de6d478d7fbf626646245e6d9 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 28 Jun 2021 18:46:19 +0100 Subject: [PATCH 11/12] Fix formatting Signed-off-by: Steffen Larsen --- sycl/unittests/pi/cuda/test_kernels.cpp | 36 ++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/sycl/unittests/pi/cuda/test_kernels.cpp b/sycl/unittests/pi/cuda/test_kernels.cpp index 0bbf55e3e376f..c4bdc3230de48 100644 --- a/sycl/unittests/pi/cuda/test_kernels.cpp +++ b/sycl/unittests/pi/cuda/test_kernels.cpp @@ -141,8 +141,8 @@ TEST_F(CudaKernelsTest, PICreateProgramAndKernel) { pi_int32 binary_status = PI_SUCCESS; ASSERT_EQ( (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - 0, nullptr, &binary_status, &prog)), + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0, + nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ((plugin->call_nocheck( @@ -185,23 +185,23 @@ TEST_F(CudaKernelsTest, PICreateProgramAndKernelWithMetadata) { pi_program prog; pi_int32 binary_status = PI_SUCCESS; ASSERT_EQ( - (plugin.call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - 1, &reqdWorkGroupSizeMDPropPointer, &binary_status, &prog)), + (plugin->call_nocheck( + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 1, + &reqdWorkGroupSizeMDPropPointer, &binary_status, &prog)), PI_SUCCESS); - ASSERT_EQ((plugin.call_nocheck( + ASSERT_EQ((plugin->call_nocheck( prog, 1, &device_, "", nullptr, nullptr)), PI_SUCCESS); pi_kernel kern; - ASSERT_EQ((plugin.call_nocheck( + ASSERT_EQ((plugin->call_nocheck( prog, "_Z8myKernelPi", &kern)), PI_SUCCESS); ASSERT_NE(kern, nullptr); size_t compileWGSize[3] = {0}; - ASSERT_EQ((plugin.call_nocheck( + ASSERT_EQ((plugin->call_nocheck( kern, device_, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(size_t) * 3, compileWGSize, nullptr)), PI_SUCCESS); @@ -219,8 +219,8 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSimple) { pi_int32 binary_status = PI_SUCCESS; ASSERT_EQ( (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - 0, nullptr, &binary_status, &prog)), + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0, + nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); @@ -249,8 +249,8 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSetTwice) { pi_int32 binary_status = PI_SUCCESS; ASSERT_EQ( (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - 0, nullptr, &binary_status, &prog)), + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0, + nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); @@ -288,8 +288,8 @@ TEST_F(CudaKernelsTest, PIKernelSetMemObj) { pi_int32 binary_status = PI_SUCCESS; ASSERT_EQ( (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - 0, nullptr, &binary_status, &prog)), + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0, + nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); @@ -324,8 +324,8 @@ TEST_F(CudaKernelsTest, PIkerneldispatch) { pi_int32 binary_status = PI_SUCCESS; ASSERT_EQ( (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, - 0, nullptr, &binary_status, &prog)), + context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource, 0, + nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); @@ -368,8 +368,8 @@ TEST_F(CudaKernelsTest, PIkerneldispatchTwo) { pi_int32 binary_status = PI_SUCCESS; ASSERT_EQ( (plugin->call_nocheck( - context_, 1, &device_, nullptr, (const unsigned char **)&twoParams, - 0, nullptr, &binary_status, &prog)), + context_, 1, &device_, nullptr, (const unsigned char **)&twoParams, 0, + nullptr, &binary_status, &prog)), PI_SUCCESS); ASSERT_EQ(binary_status, PI_SUCCESS); From 5fda49a9a778a4a7853fe327f2e83c383cd326e4 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Mon, 28 Jun 2021 19:54:40 +0100 Subject: [PATCH 12/12] Fix sycl-offload-amdgcn test Signed-off-by: Steffen Larsen --- clang/test/Driver/sycl-offload-amdgcn.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/clang/test/Driver/sycl-offload-amdgcn.cpp b/clang/test/Driver/sycl-offload-amdgcn.cpp index 85884e27524b6..cbfaad9980337 100644 --- a/clang/test/Driver/sycl-offload-amdgcn.cpp +++ b/clang/test/Driver/sycl-offload-amdgcn.cpp @@ -28,10 +28,12 @@ // CHK-PHASES-NO-CC: 9: assembler, {8}, object, (host-sycl) // CHK-PHASES-NO-CC: 10: linker, {9}, image, (host-sycl) // CHK-PHASES-NO-CC: 11: linker, {5}, ir, (device-sycl) -// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl) -// CHK-PHASES-NO-CC: 13: backend, {12}, assembler, (device-sycl) -// CHK-PHASES-NO-CC: 14: assembler, {13}, object, (device-sycl) -// CHK-PHASES-NO-CC: 15: linker, {14}, image, (device-sycl) -// CHK-PHASES-NO-CC: 16: linker, {15}, hip-fatbin, (device-sycl) -// CHK-PHASES-NO-CC: 17: clang-offload-wrapper, {16}, object, (device-sycl) -// CHK-PHASES-NO-CC: 18: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa-sycldevice)" {17}, image +// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, tempfiletable, (device-sycl) +// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl) +// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl) +// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl) +// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl) +// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl) +// CHK-PHASES-NO-CC: 18: file-table-tform, {12, 17}, tempfiletable, (device-sycl) +// CHK-PHASES-NO-CC: 19: clang-offload-wrapper, {18}, object, (device-sycl) +// CHK-PHASES-NO-CC: 20: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa-sycldevice)" {19}, image