diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt index fabf64546e9da..9e97d2c8ccfdb 100644 --- a/sycl-jit/jit-compiler/CMakeLists.txt +++ b/sycl-jit/jit-compiler/CMakeLists.txt @@ -60,6 +60,7 @@ target_include_directories(sycl-jit ${LLVM_MAIN_INCLUDE_DIR} ${LLVM_SPIRV_INCLUDE_DIRS} ${LLVM_EXTERNAL_CLANG_SOURCE_DIR}/include + ${LLVM_EXTERNAL_CLANG_SOURCE_DIR}/lib ${CMAKE_BINARY_DIR}/tools/clang/include ) target_include_directories(sycl-jit diff --git a/sycl-jit/jit-compiler/include/RTC.h b/sycl-jit/jit-compiler/include/RTC.h index a1a39d5b50ad3..6a690df459646 100644 --- a/sycl-jit/jit-compiler/include/RTC.h +++ b/sycl-jit/jit-compiler/include/RTC.h @@ -176,10 +176,11 @@ class RTCResult { /// Calculates a BLAKE3 hash of the pre-processed source string described by /// \p SourceFile (considering any additional \p IncludeFiles) and the -/// concatenation of the \p UserArgs. +/// concatenation of the \p UserArgs, for a given \p Format. JIT_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, View IncludeFiles, - View UserArgs); + View UserArgs, + BinaryFormat Format); /// Compiles, links against device libraries, and finalizes the device code in /// the source string described by \p SourceFile, considering any additional \p @@ -191,10 +192,14 @@ JIT_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, /// /// If \p SaveIR is true and \p CachedIR is empty, the LLVM module obtained from /// the frontend invocation is wrapped in bitcode format in the result object. +/// +/// \p BinaryFormat describes the desired format of the compilation - which +/// corresponds to the backend that is being targeted. JIT_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, View IncludeFiles, View UserArgs, - View CachedIR, bool SaveIR); + View CachedIR, bool SaveIR, + BinaryFormat Format); /// Requests that the JIT binary referenced by \p Address is deleted from the /// `JITContext`. diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index bc5d638a25255..ade0161991b2d 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -8,11 +8,17 @@ #include "DeviceCompilation.h" #include "ESIMD.h" +#include "JITBinaryInfo.h" +#include "translation/Translation.h" +#include +#include +#include #include #include #include #include +#include #include #include #include @@ -178,7 +184,8 @@ class RTCToolActionBase : public ToolAction { assert(!hasExecuted() && "Action should only be invoked on a single file"); // Create a compiler instance to handle the actual work. - CompilerInstance Compiler(std::move(Invocation), std::move(PCHContainerOps)); + CompilerInstance Compiler(std::move(Invocation), + std::move(PCHContainerOps)); Compiler.setFileManager(Files); // Suppress summary with number of warnings and errors being printed to // stdout. @@ -312,7 +319,7 @@ class LLVMDiagnosticWrapper : public llvm::DiagnosticHandler { } // anonymous namespace static void adjustArgs(const InputArgList &UserArgList, - const std::string &DPCPPRoot, + const std::string &DPCPPRoot, BinaryFormat Format, SmallVectorImpl &CommandLine) { DerivedArgList DAL{UserArgList}; const auto &OptTable = getDriverOptTable(); @@ -325,6 +332,23 @@ static void adjustArgs(const InputArgList &UserArgList, // unused argument warning. DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_Qunused_arguments)); + if (Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN) { + auto [CPU, Features] = + Translator::getTargetCPUAndFeatureAttrs(nullptr, "", Format); + (void)Features; + if (Format == BinaryFormat::AMDGCN) { + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_fsycl_targets_EQ), + "amdgcn-amd-amdhsa"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_Xsycl_backend_EQ), + "amdgcn-amd-amdhsa"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_offload_arch_EQ), CPU); + } else { + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_fsycl_targets_EQ), + "nvptx64-nvidia-cuda"); + DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_Xsycl_backend)); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_cuda_gpu_arch_EQ), CPU); + } + } ArgStringList ASL; for_each(DAL, [&DAL, &ASL](Arg *A) { A->render(DAL, ASL); }); for_each(UserArgList, @@ -361,10 +385,9 @@ static void setupTool(ClangTool &Tool, const std::string &DPCPPRoot, }); } -Expected -jit_compiler::calculateHash(InMemoryFile SourceFile, - View IncludeFiles, - const InputArgList &UserArgList) { +Expected jit_compiler::calculateHash( + InMemoryFile SourceFile, View IncludeFiles, + const InputArgList &UserArgList, BinaryFormat Format) { TimeTraceScope TTS{"calculateHash"}; const std::string &DPCPPRoot = getDPCPPRoot(); @@ -373,7 +396,7 @@ jit_compiler::calculateHash(InMemoryFile SourceFile, } SmallVector CommandLine; - adjustArgs(UserArgList, DPCPPRoot, CommandLine); + adjustArgs(UserArgList, DPCPPRoot, Format, CommandLine); FixedCompilationDatabase DB{".", CommandLine}; ClangTool Tool{DB, {SourceFile.Path}}; @@ -399,11 +422,10 @@ jit_compiler::calculateHash(InMemoryFile SourceFile, return createStringError("Calculating source hash failed"); } -Expected -jit_compiler::compileDeviceCode(InMemoryFile SourceFile, - View IncludeFiles, - const InputArgList &UserArgList, - std::string &BuildLog, LLVMContext &Context) { +Expected jit_compiler::compileDeviceCode( + InMemoryFile SourceFile, View IncludeFiles, + const InputArgList &UserArgList, std::string &BuildLog, + LLVMContext &Context, BinaryFormat Format) { TimeTraceScope TTS{"compileDeviceCode"}; const std::string &DPCPPRoot = getDPCPPRoot(); @@ -412,7 +434,7 @@ jit_compiler::compileDeviceCode(InMemoryFile SourceFile, } SmallVector CommandLine; - adjustArgs(UserArgList, DPCPPRoot, CommandLine); + adjustArgs(UserArgList, DPCPPRoot, Format, CommandLine); FixedCompilationDatabase DB{".", CommandLine}; ClangTool Tool{DB, {SourceFile.Path}}; @@ -430,12 +452,22 @@ jit_compiler::compileDeviceCode(InMemoryFile SourceFile, return createStringError(BuildLog); } -// This function is a simplified copy of the device library selection process in -// `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V target -// (no AoT, no third-party GPUs, no native CPU). Keep in sync! +// This function is a simplified copy of the device library selection process +// in `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V, or +// GPU targets (no AoT, no native CPU). Keep in sync! static bool getDeviceLibraries(const ArgList &Args, SmallVectorImpl &LibraryList, - DiagnosticsEngine &Diags) { + DiagnosticsEngine &Diags, BinaryFormat Format) { + // For CUDA/HIP we only need devicelib, early exit here. + if (Format == BinaryFormat::PTX) { + LibraryList.push_back( + Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc")); + return false; + } else if (Format == BinaryFormat::AMDGCN) { + LibraryList.push_back(Args.MakeArgString("devicelib-amdgcn-amd-amdhsa.bc")); + return false; + } + struct DeviceLibOptInfo { StringRef DeviceLibName; StringRef DeviceLibOption; @@ -540,7 +572,8 @@ static Expected loadBitcodeLibrary(StringRef LibPath, Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, const InputArgList &UserArgList, - std::string &BuildLog) { + std::string &BuildLog, + BinaryFormat Format) { TimeTraceScope TTS{"linkDeviceLibraries"}; const std::string &DPCPPRoot = getDPCPPRoot(); @@ -555,11 +588,29 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, /* ShouldOwnClient=*/false); SmallVector LibNames; - bool FoundUnknownLib = getDeviceLibraries(UserArgList, LibNames, Diags); + const bool FoundUnknownLib = + getDeviceLibraries(UserArgList, LibNames, Diags, Format); if (FoundUnknownLib) { return createStringError("Could not determine list of device libraries: %s", BuildLog.c_str()); } + const bool IsCudaHIP = + Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN; + if (IsCudaHIP) { + // Based on the OS and the format decide on the version of libspirv. + // NOTE: this will be problematic if cross-compiling between OSes. + std::string Libclc{"clc/"}; + Libclc.append( +#ifdef _WIN32 + "remangled-l32-signed_char.libspirv-" +#else + "remangled-l64-signed_char.libspirv-" +#endif + ); + Libclc.append(Format == BinaryFormat::PTX ? "nvptx64-nvidia-cuda.bc" + : "amdgcn-amd-amdhsa.bc"); + LibNames.push_back(Libclc); + } LLVMContext &Context = Module.getContext(); for (const std::string &LibName : LibNames) { @@ -577,6 +628,58 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, } } + // For GPU targets we need to link against vendor provided libdevice. + if (IsCudaHIP) { + Triple T{Module.getTargetTriple()}; + Driver D{(Twine(DPCPPRoot) + "/bin/clang++").str(), T.getTriple(), Diags}; + auto [CPU, Features] = + Translator::getTargetCPUAndFeatureAttrs(&Module, "", Format); + (void)Features; + // Helper lambda to link modules. + auto LinkInLib = [&](const StringRef LibDevice) -> Error { + ModuleUPtr LibDeviceModule; + if (auto Error = loadBitcodeLibrary(LibDevice, Context) + .moveInto(LibDeviceModule)) { + return Error; + } + if (Linker::linkModules(Module, std::move(LibDeviceModule), + Linker::LinkOnlyNeeded)) { + return createStringError("Unable to link libdevice: %s", + BuildLog.c_str()); + } + return Error::success(); + }; + SmallVector LibDeviceFiles; + if (Format == BinaryFormat::PTX) { + // For NVPTX we can get away with CudaInstallationDetector. + LazyDetector CudaInstallation{D, T, + UserArgList}; + auto LibDevice = CudaInstallation->getLibDeviceFile(CPU); + if (LibDevice.empty()) { + return createStringError("Unable to find Cuda libdevice"); + } + LibDeviceFiles.push_back(LibDevice); + } else { + // AMDGPU requires entire toolchain in order to provide all common bitcode + // libraries. + clang::driver::toolchains::ROCMToolChain TC(D, T, UserArgList); + auto CommonDeviceLibs = TC.getCommonDeviceLibNames( + UserArgList, CPU, Action::OffloadKind::OFK_SYCL, false); + if (CommonDeviceLibs.empty()) { + return createStringError("Unable to find ROCm common device libraries"); + } + for (auto &Lib : CommonDeviceLibs) { + LibDeviceFiles.push_back(Lib.Path); + } + } + for (auto &LibDeviceFile : LibDeviceFiles) { + // llvm::Error converts to false on success. + if (auto Error = LinkInLib(LibDeviceFile)) { + return Error; + } + } + } + return Error::success(); } diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h index 62da2cdb54bf1..aa1b19df0cc8f 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h @@ -8,6 +8,7 @@ #pragma once +#include "JITBinaryInfo.h" #include "RTC.h" #include @@ -24,16 +25,17 @@ using ModuleUPtr = std::unique_ptr; llvm::Expected calculateHash(InMemoryFile SourceFile, View IncludeFiles, - const llvm::opt::InputArgList &UserArgList); + const llvm::opt::InputArgList &UserArgList, BinaryFormat Format); llvm::Expected compileDeviceCode(InMemoryFile SourceFile, View IncludeFiles, const llvm::opt::InputArgList &UserArgList, - std::string &BuildLog, llvm::LLVMContext &Context); + std::string &BuildLog, llvm::LLVMContext &Context, + BinaryFormat Format); llvm::Error linkDeviceLibraries(llvm::Module &Module, const llvm::opt::InputArgList &UserArgList, - std::string &BuildLog); + std::string &BuildLog, BinaryFormat Format); using PostLinkResult = std::pair>; llvm::Expected diff --git a/sycl-jit/jit-compiler/lib/rtc/RTC.cpp b/sycl-jit/jit-compiler/lib/rtc/RTC.cpp index e9307ccb27eed..2e42bca3a49a5 100644 --- a/sycl-jit/jit-compiler/lib/rtc/RTC.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/RTC.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "RTC.h" +#include "JITBinaryInfo.h" #include "helper/ErrorHelper.h" #include "rtc/DeviceCompilation.h" #include "translation/SPIRVLLVMTranslation.h" @@ -26,7 +27,8 @@ using namespace jit_compiler; JIT_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, View IncludeFiles, - View UserArgs) { + View UserArgs, + BinaryFormat Format) { llvm::opt::InputArgList UserArgList; if (auto Error = parseUserArgs(UserArgs).moveInto(UserArgList)) { return errorTo(std::move(Error), @@ -36,8 +38,8 @@ JIT_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, auto Start = std::chrono::high_resolution_clock::now(); std::string Hash; - if (auto Error = - calculateHash(SourceFile, IncludeFiles, UserArgList).moveInto(Hash)) { + if (auto Error = calculateHash(SourceFile, IncludeFiles, UserArgList, Format) + .moveInto(Hash)) { return errorTo(std::move(Error), "Hashing failed", /*IsHash=*/false); } @@ -55,7 +57,8 @@ JIT_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, JIT_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, View IncludeFiles, View UserArgs, - View CachedIR, bool SaveIR) { + View CachedIR, bool SaveIR, + BinaryFormat Format) { llvm::LLVMContext Context; std::string BuildLog; configureDiagnostics(Context, BuildLog); @@ -104,7 +107,7 @@ JIT_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, bool FromSource = !Module; if (FromSource) { if (auto Error = compileDeviceCode(SourceFile, IncludeFiles, UserArgList, - BuildLog, Context) + BuildLog, Context, Format) .moveInto(Module)) { return errorTo(std::move(Error), "Device compilation failed"); } @@ -118,7 +121,8 @@ JIT_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, IR = RTCDeviceCodeIR{BCString.data(), BCString.data() + BCString.size()}; } - if (auto Error = linkDeviceLibraries(*Module, UserArgList, BuildLog)) { + if (auto Error = + linkDeviceLibraries(*Module, UserArgList, BuildLog, Format)) { return errorTo(std::move(Error), "Device linking failed"); } @@ -131,9 +135,9 @@ JIT_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, for (auto [DevImgInfo, Module] : llvm::zip_equal(BundleInfo.DevImgInfos, Modules)) { - if (auto Error = Translator::translate(*Module, JITContext::getInstance(), - BinaryFormat::SPIRV) - .moveInto(DevImgInfo.BinaryInfo)) { + if (auto Error = + Translator::translate(*Module, JITContext::getInstance(), Format) + .moveInto(DevImgInfo.BinaryInfo)) { return errorTo(std::move(Error), "SPIR-V translation failed"); } } diff --git a/sycl-jit/jit-compiler/lib/translation/Translation.cpp b/sycl-jit/jit-compiler/lib/translation/Translation.cpp index 457b6bfc0e637..99eaffd3384d1 100644 --- a/sycl-jit/jit-compiler/lib/translation/Translation.cpp +++ b/sycl-jit/jit-compiler/lib/translation/Translation.cpp @@ -83,9 +83,6 @@ llvm::Expected Translator::translateToPTX(llvm::Module &Mod, LLVMInitializeNVPTXAsmPrinter(); LLVMInitializeNVPTXTargetMC(); - static const char *TARGET_CPU_ATTRIBUTE = "target-cpu"; - static const char *TARGET_FEATURE_ATTRIBUTE = "target-features"; - std::string TargetTriple{"nvptx64-nvidia-cuda"}; std::string ErrorMessage; @@ -99,32 +96,11 @@ llvm::Expected Translator::translateToPTX(llvm::Module &Mod, ErrorMessage.c_str()); } - // Give priority to user specified values (through environment variables: - // SYCL_JIT_AMDGCN_PTX_TARGET_CPU and SYCL_JIT_AMDGCN_PTX_TARGET_FEATURES). - auto CPUVal = ConfigHelper::get(); - auto FeaturesVal = ConfigHelper::get(); - llvm::StringRef CPU = CPUVal.begin(); - llvm::StringRef Features = FeaturesVal.begin(); - - auto *KernelFunc = KernelName ? Mod.getFunction(KernelName) : nullptr; - // If they were not set, use default and consult the module for alternatives - // (if present). - if (CPU.empty()) { - CPU = "sm_50"; - if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) { - CPU = KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString(); - } - } - if (Features.empty()) { - Features = "+sm_50,+ptx76"; - if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) { - Features = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE) - .getValueAsString(); - } - } + auto [CPU, Features] = + getTargetCPUAndFeatureAttrs(&Mod, KernelName, BinaryFormat::PTX); std::unique_ptr TargetMachine(Target->createTargetMachine( - Mod.getTargetTriple(), CPU, Features, {}, llvm::Reloc::PIC_, std::nullopt, + Triple{TargetTriple}, CPU, Features, {}, llvm::Reloc::PIC_, std::nullopt, llvm::CodeGenOptLevel::Default)); llvm::legacy::PassManager PM; @@ -166,9 +142,6 @@ Translator::translateToAMDGCN(llvm::Module &Mod, JITContext &JITCtx, LLVMInitializeAMDGPUAsmPrinter(); LLVMInitializeAMDGPUTargetMC(); - static const char *TARGET_CPU_ATTRIBUTE = "target-cpu"; - static const char *TARGET_FEATURE_ATTRIBUTE = "target-features"; - std::string TargetTriple{"amdgcn-amd-amdhsa"}; std::string ErrorMessage; @@ -181,29 +154,10 @@ Translator::translateToAMDGCN(llvm::Module &Mod, JITContext &JITCtx, "Failed to load and translate AMDGCN LLVM IR module with error %s", ErrorMessage.c_str()); - auto CPUVal = ConfigHelper::get(); - auto FeaturesVal = ConfigHelper::get(); - llvm::StringRef CPU = CPUVal.begin(); - llvm::StringRef Features = FeaturesVal.begin(); - - auto *KernelFunc = KernelName ? Mod.getFunction(KernelName) : nullptr; - if (CPU.empty()) { - // Set to the lowest tested target according to the GetStartedGuide, section - // "Build DPC++ toolchain with support for HIP AMD" - CPU = "gfx906"; - if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) { - CPU = KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString(); - } - } - if (Features.empty()) { - if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) { - Features = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE) - .getValueAsString(); - } - } - + auto [CPU, Features] = + getTargetCPUAndFeatureAttrs(&Mod, KernelName, BinaryFormat::AMDGCN); std::unique_ptr TargetMachine(Target->createTargetMachine( - Mod.getTargetTriple(), CPU, Features, {}, llvm::Reloc::PIC_, std::nullopt, + Triple{TargetTriple}, CPU, Features, {}, llvm::Reloc::PIC_, std::nullopt, llvm::CodeGenOptLevel::Default)); std::string AMDObj; @@ -226,3 +180,34 @@ Translator::translateToAMDGCN(llvm::Module &Mod, JITContext &JITCtx, return &JITCtx.emplaceBinary(std::move(AMDObj), BinaryFormat::AMDGCN); #endif // JIT_SUPPORT_AMDGCN } + +std::pair Translator::getTargetCPUAndFeatureAttrs( + const llvm::Module *M, const char *KernelName, BinaryFormat Format) { + assert((Format == BinaryFormat::AMDGCN || Format == BinaryFormat::PTX) && + "Unexpected format found"); + static const char *TARGET_CPU_ATTRIBUTE = "target-cpu"; + static const char *TARGET_FEATURE_ATTRIBUTE = "target-features"; + // Give priority to user specified values (through environment variables: + // SYCL_JIT_AMDGCN_PTX_TARGET_CPU and SYCL_JIT_AMDGCN_PTX_TARGET_FEATURES). + auto *KernelFunc = (M && KernelName) ? M->getFunction(KernelName) : nullptr; + auto CPUVal = ConfigHelper::get(); + auto FeaturesVal = ConfigHelper::get(); + llvm::StringRef CPU = CPUVal.begin(); + llvm::StringRef Features = FeaturesVal.begin(); + if (CPU.empty()) { + // Set to the lowest tested target according to the GetStartedGuide, section + // "Build DPC++ toolchain with support for HIP AMD" + CPU = Format == BinaryFormat::AMDGCN ? "gfx90a" : "sm_50"; + if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) { + CPU = KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString(); + } + } + if (Features.empty()) { + Features = Format == BinaryFormat::PTX ? "+sm_50,+ptx76" : ""; + if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) { + Features = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE) + .getValueAsString(); + } + } + return std::make_pair(std::string{CPU}, std::string{Features}); +} diff --git a/sycl-jit/jit-compiler/lib/translation/Translation.h b/sycl-jit/jit-compiler/lib/translation/Translation.h index 9c8dfdda5f9f5..78bf4ef8ac416 100644 --- a/sycl-jit/jit-compiler/lib/translation/Translation.h +++ b/sycl-jit/jit-compiler/lib/translation/Translation.h @@ -12,7 +12,6 @@ #include "JITContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Error.h" -#include namespace jit_compiler { @@ -26,6 +25,9 @@ class Translator { translate(llvm::Module &Mod, JITContext &JITCtx, BinaryFormat Format, const char *KernelName = nullptr); + std::pair static getTargetCPUAndFeatureAttrs( + const llvm::Module *M, const char *KernelName, BinaryFormat Format); + private: /// Pair of address and size to represent a binary blob. using BinaryBlob = std::pair; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index bab3b9291d44f..c49409561f123 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -8,6 +8,7 @@ #pragma once +#include "JITBinaryInfo.h" #include #include #include @@ -699,6 +700,22 @@ class device_image_impl return MRTCBinInfo && MRTCBinInfo->MLanguage == Lang; } + static ::jit_compiler::BinaryFormat getTargetFormat(const backend Backend) { + switch (Backend) { + case backend::ext_oneapi_level_zero: + case backend::opencl: + return ::jit_compiler::BinaryFormat::SPIRV; + case backend::ext_oneapi_cuda: + return ::jit_compiler::BinaryFormat::PTX; + case backend::ext_oneapi_hip: + return ::jit_compiler::BinaryFormat::AMDGCN; + default: + throw sycl::exception( + sycl::make_error_code(sycl::errc::invalid), + "Backend does not support kernel_compiler extension"); + } + } + std::vector> buildFromSource( const std::vector &Devices, const std::vector &BuildOptions, @@ -729,9 +746,12 @@ class device_image_impl } } - if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) + if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { + const auto Format = getTargetFormat(MContext.get_backend()); return createSYCLImages(Devices, bundle_state::executable, BuildOptions, - LogPtr, RegisteredKernelNames, OutDeviceBins); + LogPtr, RegisteredKernelNames, OutDeviceBins, + Format); + } std::vector DeviceVec; DeviceVec.reserve(Devices.size()); @@ -819,8 +839,10 @@ class device_image_impl "device does not support source language"); } } + const auto Format = getTargetFormat(MContext.get_backend()); return createSYCLImages(Devices, bundle_state::object, CompileOptions, - LogPtr, RegisteredKernelNames, OutDeviceBins); + LogPtr, RegisteredKernelNames, OutDeviceBins, + Format); } private: @@ -992,8 +1014,8 @@ class device_image_impl const std::vector &Options, std::string *LogPtr, const std::vector &RegisteredKernelNames, - std::vector> &OutDeviceBins) - const { + std::vector> &OutDeviceBins, + ::jit_compiler::BinaryFormat Format) const { assert(MRTCBinInfo); assert(MRTCBinInfo->MLanguage == syclex::source_language::sycl); assert(std::holds_alternative(MBinImage)); @@ -1023,7 +1045,7 @@ class device_image_impl auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_Compile( RegisteredKernelNames.empty() ? SourceStr : SourceExt.str(), - MRTCBinInfo->MIncludePairs, Options, LogPtr); + MRTCBinInfo->MIncludePairs, Options, LogPtr, Format); auto &PM = detail::ProgramManager::getInstance(); diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index 189bd3d145309..2884e25399e5f 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -263,6 +263,7 @@ sycl_device_binaries jit_compiler::createDeviceBinaries( Binary.addOffloadEntry(std::move(Entry)); } + bool FinalizationTagAdded = false; for (const auto &FPS : DevImgInfo.Properties) { bool IsDeviceGlobalsPropSet = FPS.Name == __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS; @@ -279,18 +280,50 @@ sycl_device_binaries jit_compiler::createDeviceBinaries( sycl_property_type::SYCL_PROPERTY_TYPE_BYTE_ARRAY}); } } + if (FPS.Name == __SYCL_PROPERTY_SET_PROGRAM_METADATA && + DevImgInfo.BinaryInfo.Format == + ::jit_compiler::BinaryFormat::AMDGCN) { + PropSet.addProperty(PropertyContainer{ + __SYCL_PROGRAM_METADATA_TAG_NEED_FINALIZATION, 1}); + FinalizationTagAdded = true; + } Binary.addProperty(std::move(PropSet)); Binary.setCompileOptions(BundleInfo.CompileOptions.c_str()); } - Collection->addDeviceBinary(std::move(Binary), - DevImgInfo.BinaryInfo.BinaryStart, - DevImgInfo.BinaryInfo.BinarySize, - (DevImgInfo.BinaryInfo.AddressBits == 64) - ? __SYCL_DEVICE_BINARY_TARGET_SPIRV64 - : __SYCL_DEVICE_BINARY_TARGET_SPIRV32, - SYCL_DEVICE_BINARY_TYPE_SPIRV); + auto BinaryTarget = ""; + auto Format = SYCL_DEVICE_BINARY_TYPE_NONE; + switch (DevImgInfo.BinaryInfo.Format) { + case ::jit_compiler::BinaryFormat::SPIRV: + BinaryTarget = DevImgInfo.BinaryInfo.AddressBits == 64 + ? __SYCL_DEVICE_BINARY_TARGET_SPIRV64 + : __SYCL_DEVICE_BINARY_TARGET_SPIRV32; + Format = SYCL_DEVICE_BINARY_TYPE_SPIRV; + break; + case ::jit_compiler::BinaryFormat::PTX: + BinaryTarget = __SYCL_DEVICE_BINARY_TARGET_LLVM_NVPTX64; + Format = SYCL_DEVICE_BINARY_TYPE_NONE; + break; + case ::jit_compiler::BinaryFormat::AMDGCN: { + BinaryTarget = __SYCL_DEVICE_BINARY_TARGET_LLVM_AMDGCN; + Format = SYCL_DEVICE_BINARY_TYPE_NONE; + // If the program had no properties, the tag needs to be added now. + if (!FinalizationTagAdded) { + PropertySetContainer ProgramMetadata{ + __SYCL_PROPERTY_SET_PROGRAM_METADATA}; + ProgramMetadata.addProperty(PropertyContainer{ + __SYCL_PROGRAM_METADATA_TAG_NEED_FINALIZATION, 1}); + Binary.addProperty(std::move(ProgramMetadata)); + } + break; + } + default: + assert(false && "Unsupported format"); + }; + Collection->addDeviceBinary( + std::move(Binary), DevImgInfo.BinaryInfo.BinaryStart, + DevImgInfo.BinaryInfo.BinarySize, BinaryTarget, Format); } sycl_device_binaries Binaries = Collection->getPIDeviceStruct(); @@ -311,7 +344,26 @@ void jit_compiler::destroyDeviceBinaries(sycl_device_binaries Binaries) { std::pair jit_compiler::compileSYCL( const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, - const std::vector &UserArgs, std::string *LogPtr) { + const std::vector &UserArgs, std::string *LogPtr, + ::jit_compiler::BinaryFormat Format) { + if (Format == ::jit_compiler::BinaryFormat::PTX || + Format == ::jit_compiler::BinaryFormat::AMDGCN) { + // If present, set-up the config with env variables describing CPU and + // features. + auto SetUpOption = [](const std::string &Value) { + ::jit_compiler::JITEnvVar Option(Value.begin(), Value.end()); + return Option; + }; + ::jit_compiler::JITEnvVar TargetCPUOpt = SetUpOption( + detail::SYCLConfig::get()); + this->AddToConfigHandle( + ::jit_compiler::option::JITTargetCPU::set(TargetCPUOpt)); + ::jit_compiler::JITEnvVar TargetFeaturesOpt = SetUpOption( + detail::SYCLConfig::get()); + this->AddToConfigHandle( + ::jit_compiler::option::JITTargetFeatures::set(TargetFeaturesOpt)); + } + auto appendToLog = [LogPtr](const char *Msg) { if (LogPtr) { LogPtr->append(Msg); @@ -339,7 +391,7 @@ std::pair jit_compiler::compileSYCL( std::vector CachedIR; if (PersistentDeviceCodeCache::isEnabled()) { auto Result = - CalculateHashHandle(SourceFile, IncludeFilesView, UserArgsView); + CalculateHashHandle(SourceFile, IncludeFilesView, UserArgsView, Format); if (Result.failed()) { appendToLog(Result.getPreprocLog()); @@ -349,8 +401,9 @@ std::pair jit_compiler::compileSYCL( } } - auto Result = CompileSYCLHandle(SourceFile, IncludeFilesView, UserArgsView, - CachedIR, /*SaveIR=*/!CacheKey.empty()); + auto Result = + CompileSYCLHandle(SourceFile, IncludeFilesView, UserArgsView, CachedIR, + /*SaveIR=*/!CacheKey.empty(), Format); const char *BuildLog = Result.getBuildLog(); appendToLog(BuildLog); diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index 0f0a3f3f5738c..e0aefd1deae5d 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -45,7 +46,8 @@ class jit_compiler { std::pair compileSYCL( const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, - const std::vector &UserArgs, std::string *LogPtr); + const std::vector &UserArgs, std::string *LogPtr, + ::jit_compiler::BinaryFormat Format); void destroyDeviceBinaries(sycl_device_binaries Binaries); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 93ec94a8ac328..418842fdc48ec 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -44,7 +44,8 @@ std::pair SYCL_JIT_Compile( [[maybe_unused]] const std::string &SYCLSource, [[maybe_unused]] const include_pairs_t &IncludePairs, [[maybe_unused]] const std::vector &UserArgs, - [[maybe_unused]] std::string *LogPtr) { + [[maybe_unused]] std::string *LogPtr, + [[maybe_unused]] ::jit_compiler::BinaryFormat Format) { #if SYCL_EXT_JIT_ENABLE static std::atomic_uintptr_t CompilationCounter; std::string CompilationID = "rtc_" + std::to_string(CompilationCounter++); @@ -52,7 +53,7 @@ std::pair SYCL_JIT_Compile( for (const sycl::detail::string_view UserArg : UserArgs) UserArgStrings.push_back(UserArg.data()); return sycl::detail::jit_compiler::get_instance().compileSYCL( - CompilationID, SYCLSource, IncludePairs, UserArgStrings, LogPtr); + CompilationID, SYCLSource, IncludePairs, UserArgStrings, LogPtr, Format); #else throw sycl::exception(sycl::errc::build, "kernel_compiler via sycl-jit is not available"); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index 047a10a061e1f..007dfae9aff22 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include // __SYCL_EXPORT #include @@ -35,7 +36,7 @@ userArgsAsString(const std::vector &UserArguments); std::pair SYCL_JIT_Compile(const std::string &Source, const include_pairs_t &IncludePairs, const std::vector &UserArgs, - std::string *LogPtr); + std::string *LogPtr, ::jit_compiler::BinaryFormat Format); void SYCL_JIT_Destroy(sycl_device_binaries Binaries); diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index a7e0f9289aac0..8fbeb5e98bc57 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -409,8 +409,10 @@ bool is_source_kernel_bundle_supported( const std::vector &DeviceImplVec) { backend BE = DeviceImplVec[0]->getBackend(); // Support is limited to the opencl and level_zero backends. - bool BE_Acceptable = (BE == sycl::backend::ext_oneapi_level_zero) || - (BE == sycl::backend::opencl); + bool BE_Acceptable = + (BE == sycl::backend::ext_oneapi_level_zero) || + (BE == sycl::backend::opencl || BE == sycl::backend::ext_oneapi_hip || + BE == sycl::backend::ext_oneapi_cuda); if (!BE_Acceptable) return false; diff --git a/sycl/test-e2e/KernelCompiler/sycl.cpp b/sycl/test-e2e/KernelCompiler/sycl.cpp index fac041254778a..10d4fe69b839d 100644 --- a/sycl/test-e2e/KernelCompiler/sycl.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl.cpp @@ -6,14 +6,13 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. // RUN: %{build} -o %t.out -// RUN: %{l0_leak_check} %{run} %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out // XFAIL: preview-mode && run-mode // XFAIL-TRACKER: https://github.com/intel/llvm/issues/18390 @@ -111,15 +110,14 @@ auto constexpr DeviceLibrariesSource = R"===( #include #include #include -#include extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(sycl::ext::oneapi::experimental::single_task_kernel) void device_libs_kernel(float *ptr) { // Extension list: llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp - // cl_intel_devicelib_assert is not available for opencl:gpu; skip testing it. - // Only test the fp32 variants of complex, math and imf to keep this test + // cl_intel_devicelib_assert is not available for opencl:gpu; skip testing + // it. Only test the fp32 variants of complex and math to keep this test // device-agnostic. // cl_intel_devicelib_math @@ -131,14 +129,8 @@ void device_libs_kernel(float *ptr) { // cl_intel_devicelib_cstring ptr[2] = memcmp(ptr + 2, ptr + 2, sizeof(float)); - // cl_intel_devicelib_imf - ptr[3] = sycl::ext::intel::math::sqrt(ptr[3] * 2); - - // cl_intel_devicelib_imf_bf16 - ptr[4] = sycl::ext::intel::math::float2bfloat16(ptr[4] * 0.5f); - // cl_intel_devicelib_bfloat16 - ptr[5] = sycl::ext::oneapi::bfloat16{ptr[5] / 0.25f}; + ptr[3] = sycl::ext::oneapi::bfloat16{ptr[3] / 0.25f}; } )==="; @@ -359,7 +351,7 @@ int test_device_libraries(sycl::queue q) { exe_kb kbExe = syclex::build(kbSrc); sycl::kernel k = kbExe.ext_oneapi_get_kernel("device_libs_kernel"); - constexpr size_t nElem = 6; + constexpr size_t nElem = 4; float *ptr = sycl::malloc_shared(nElem, q); for (int i = 0; i < nElem; ++i) ptr[i] = 1.0f; diff --git a/sycl/test-e2e/KernelCompiler/sycl_basic.cpp b/sycl/test-e2e/KernelCompiler/sycl_basic.cpp index 5c212ee0d843d..033070acca675 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_basic.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_basic.cpp @@ -6,14 +6,13 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. // RUN: %{build} -o %t.out -// RUN: %{l0_leak_check} %{run} %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_cache.cpp b/sycl/test-e2e/KernelCompiler/sycl_cache.cpp index 1ec2f744b632d..15d3fe829ba70 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_cache.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_cache.cpp @@ -7,6 +7,11 @@ //===----------------------------------------------------------------------===// // REQUIRES: (opencl || level_zero) +// Unlike other RTC tests, don't run this one on Cuda/HIP. Eviction mechanism +// is based on the size of compiled kernels, which in turns depends on the +// target. Don't run eviction check for CUDA/HIP, so that we don't have to find +// a magic number that works for all binaries (and by definition is flaky). + // REQUIRES: aspect-usm_device_allocations // UNSUPPORTED: accelerator diff --git a/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp b/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp index d111e77960e48..6f27ee95d84a8 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp @@ -6,13 +6,11 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) - // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. // RUN: %{build} -o %t.out -// RUN: %{run-unfiltered-devices} %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run-unfiltered-devices} %t.out #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp b/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp index ceda7252369e5..3f10968c45f57 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // UNSUPPORTED: accelerator, opencl && gpu @@ -15,7 +14,7 @@ // UNSUPPORTED-TRACKER: GSD-4287 // RUN: %{build} -o %t.out -// RUN: %{l0_leak_check} %{run} %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp index 789bab6223546..e8390ced06efb 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp @@ -11,6 +11,8 @@ // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. +// Note linking is not supported on CUDA/HIP. + // -- Test for a case where a kernel bundle is built that exports a symbol and // -- other kernel bundles that uses it are compiled/linked without it. These // -- cases should fail due to unresolved symbols, rather than picking up the diff --git a/sycl/test-e2e/KernelCompiler/sycl_imf.cpp b/sycl/test-e2e/KernelCompiler/sycl_imf.cpp new file mode 100644 index 0000000000000..142cc2160ac52 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_imf.cpp @@ -0,0 +1,79 @@ +//==--- sycl_imf.cpp --- kernel_compiler extension imf tests ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations +// REQUIRES: (opencl || level_zero) + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +// XFAIL: preview-mode && run-mode +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/18390 + +#include +#include +#include + +auto constexpr IMFSources = R"===( +#include +#include +#include + +extern "C" SYCL_EXTERNAL +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(sycl::ext::oneapi::experimental::single_task_kernel) +void imf_kernel(float *ptr) { + // cl_intel_devicelib_imf + ptr[0] = sycl::ext::intel::math::sqrt(ptr[0] * 2); + + // cl_intel_devicelib_imf_bf16 + ptr[1] = sycl::ext::intel::math::float2bfloat16(ptr[1] * 0.5f); +} +)==="; + +int main() { +#ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER + sycl::queue q; + namespace syclex = sycl::ext::oneapi::experimental; + using source_kb = sycl::kernel_bundle; + using exe_kb = sycl::kernel_bundle; + + sycl::context ctx = q.get_context(); + + source_kb kbSrc = syclex::create_kernel_bundle_from_source( + ctx, syclex::source_language::sycl, IMFSources); + exe_kb kbExe = syclex::build(kbSrc); + + sycl::kernel k = kbExe.ext_oneapi_get_kernel("imf_kernel"); + constexpr size_t nElem = 2; + float *ptr = sycl::malloc_shared(nElem, q); + for (int i = 0; i < nElem; ++i) + ptr[i] = 1.0f; + + q.submit([&](sycl::handler &cgh) { + cgh.set_arg(0, ptr); + cgh.single_task(k); + }); + q.wait_and_throw(); + + // Check that the kernel was executed. Given the {1.0, 1.0} input, + // the expected result is approximately {1.41, 0.5}. + for (unsigned i = 0; i < nElem; ++i) { + std::cout << ptr[i] << ' '; + assert(ptr[i] != 1.0f); + } + std::cout << std::endl; + + sycl::free(ptr, q); +#else + static_assert(false, "Kernel Compiler feature test macro undefined"); +#endif + return 0; +} diff --git a/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp b/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp index bf0d46b5800be..b65e25ccb445b 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp @@ -6,18 +6,16 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations - // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. // RUN: %{build} -o %t.out -// RUN: %{run} %t.out %S | FileCheck %s --check-prefixes=CHECK,CHECK-NOCWD +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out %S | FileCheck %s --check-prefixes=CHECK,CHECK-NOCWD // COM: Run test again in a directory that contains a different version of // `header1.hpp` -// RUN: cd %S/include/C ; %{run} %t.out %S | FileCheck %s --check-prefixes=CHECK,CHECK-CWD +// RUN: cd %S/include/C ; %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out %S | FileCheck %s --check-prefixes=CHECK,CHECK-CWD #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_join.cpp b/sycl/test-e2e/KernelCompiler/sycl_join.cpp index 4ec0ec5c35604..8d013f9713cc7 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_join.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_join.cpp @@ -6,15 +6,14 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_shared_allocations // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. // RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// RUN: %{l0_leak_check} %{run} %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out // XFAIL: preview-mode && run-mode // XFAIL-TRACKER: https://github.com/intel/llvm/issues/18390 diff --git a/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp b/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp index 4ef1cb2d88e9c..82b866353e615 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp @@ -6,14 +6,13 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. // RUN: %{build} -o %t.out -// RUN: env SYCL_UR_TRACE=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} env SYCL_UR_TRACE=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s #include #include @@ -51,14 +50,15 @@ int test_lifetimes() { ctx, syclex::source_language::sycl, SYCLSource); exe_kb kbExe1 = syclex::build(kbSrc); - // CHECK: urProgramCreateWithIL{{.*}}phProgram{{.*}}([[PROG1:.*]])) + // Cuda/Hip programs will be created with Binary, spirv IL. + // CHECK: urProgramCreateWith{{IL|Binary}}{{.*}}phProgram{{.*}}([[PROG1:.*]])) { std::cout << "Scope1\n"; // CHECK: Scope1 exe_kb kbExe2 = syclex::build(kbSrc); // kbExe2 goes out of scope; its kernels are removed from program mananager. - // CHECK: urProgramCreateWithIL{{.*}}phProgram{{.*}}([[PROG2:.*]])) + // CHECK: urProgramCreateWith{{IL|Binary}}{{.*}}phProgram{{.*}}([[PROG2:.*]])) // CHECK: urProgramRelease{{.*}}[[PROG2]] } std::cout << "End Scope1\n"; diff --git a/sycl/test-e2e/KernelCompiler/sycl_link.cpp b/sycl/test-e2e/KernelCompiler/sycl_link.cpp index 9ff9878e387ee..17aedb9685360 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link.cpp @@ -15,6 +15,8 @@ // -- Test for a simple linking case with source files compiled from SYCL source // -- at runtime. +// Note linking is not supported on CUDA/HIP. + // RUN: %{build} -o %t.out // RUN: %{run} %t.out // RUN: %{l0_leak_check} %{run} %t.out diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp index 49d67205b8cd1..ebe9e1d8ab56e 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp @@ -12,6 +12,8 @@ // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. +// Note linking is not supported on CUDA/HIP. + // -- Test for linking where two kernels use the same imported symbols. // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp index 76fe85ce72fa9..dbc31e63da125 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp @@ -12,6 +12,8 @@ // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. +// Note linking is not supported on CUDA/HIP. + // -- Test for linking where two kernels use the same imported symbols, but one // -- may not be supported on the device. diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp index f9d6381e08b38..465ce7b5136a4 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp @@ -12,6 +12,8 @@ // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. +// Note linking is not supported on CUDA/HIP. + // -- Test for a case where a kernel bundle with an exported symbol is compiled // -- before another kernel bundle using a different variant of the symbol. diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp index 7fe9b0fd9db79..5b2a0e41b33b3 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp @@ -12,6 +12,8 @@ // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. +// Note linking is not supported on CUDA/HIP. + // -- Test for the linking of two kernels with conflicting definitions of // -- kernels with the same name. diff --git a/sycl/test-e2e/KernelCompiler/sycl_namespaces.cpp b/sycl/test-e2e/KernelCompiler/sycl_namespaces.cpp index fbe2a75daf617..713c220419c75 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_namespaces.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_namespaces.cpp @@ -6,14 +6,13 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. // RUN: %{build} -o %t.out -// RUN: %{l0_leak_check} %{run} %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_overload.cpp b/sycl/test-e2e/KernelCompiler/sycl_overload.cpp index 97488d880993b..00ac6c76fa10c 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_overload.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_overload.cpp @@ -6,14 +6,13 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. // RUN: %{build} -o %t.out -// RUN: %{l0_leak_check} %{run} %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp b/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp index 77a2d89bed123..3d149adff8ebe 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp @@ -6,12 +6,11 @@ // //===----------------------------------------------------------------------===// -// REQUIRES: (opencl || level_zero) // UNSUPPORTED: accelerator // UNSUPPORTED-INTENDED: SYCL-RTC is not available for accelerator devices // RUN: %{build} -o %t.out -// RUN: %{run} %t.out | FileCheck %s +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out | FileCheck %s #include #include diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py index 44da0ef1c52c4..ff2f8e36ecfb7 100644 --- a/sycl/test-e2e/format.py +++ b/sycl/test-e2e/format.py @@ -244,6 +244,7 @@ def execute(self, test, litConfig): ) sycl_target_opts += hip_arch_opts substitutions.append(("%{hip_arch_opts}", hip_arch_opts)) + substitutions.append(("%{amd_arch}", test.config.amd_arch)) if ( "target-spir" in build_targets and "spirv-backend" in test.config.available_features diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 5b97ae7a0cac9..4292342068187 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 26 +// CHECK-NUM-MATCHES: 28 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see