From 1f0279cd4060a7e8e95bfc350318056666a36cae Mon Sep 17 00:00:00 2001 From: "Pirog, Mikolaj Maciej" Date: Fri, 4 Jul 2025 05:21:10 -0700 Subject: [PATCH 1/5] Revert " [SYCL] RTC support for AMD and Nvidia GPU targets (#18918)" This reverts commit 6d97d984558a5a383dc57e56a124e751d4d790d5. --- sycl-jit/jit-compiler/CMakeLists.txt | 1 - sycl-jit/jit-compiler/include/RTC.h | 11 +- .../lib/rtc/DeviceCompilation.cpp | 141 +++--------------- .../jit-compiler/lib/rtc/DeviceCompilation.h | 8 +- sycl-jit/jit-compiler/lib/rtc/RTC.cpp | 22 ++- .../lib/translation/Translation.cpp | 89 ++++++----- .../lib/translation/Translation.h | 4 +- sycl/source/detail/device_image_impl.hpp | 34 +---- sycl/source/detail/jit_compiler.cpp | 75 ++-------- sycl/source/detail/jit_compiler.hpp | 4 +- .../kernel_compiler/kernel_compiler_sycl.cpp | 5 +- .../kernel_compiler/kernel_compiler_sycl.hpp | 3 +- sycl/source/kernel_bundle.cpp | 6 +- sycl/test-e2e/KernelCompiler/sycl.cpp | 18 ++- sycl/test-e2e/KernelCompiler/sycl_basic.cpp | 3 +- sycl/test-e2e/KernelCompiler/sycl_cache.cpp | 5 - .../KernelCompiler/sycl_context_error.cpp | 4 +- .../KernelCompiler/sycl_device_globals.cpp | 3 +- .../sycl_export_registration.cpp | 2 - sycl/test-e2e/KernelCompiler/sycl_imf.cpp | 79 ---------- .../KernelCompiler/sycl_include_paths.cpp | 5 +- sycl/test-e2e/KernelCompiler/sycl_join.cpp | 5 +- .../KernelCompiler/sycl_lifetimes.cpp | 8 +- sycl/test-e2e/KernelCompiler/sycl_link.cpp | 2 - .../KernelCompiler/sycl_link_common_dep.cpp | 2 - .../sycl_link_common_dep_optional_feature.cpp | 2 - .../sycl_link_export_conflict.cpp | 2 - .../sycl_link_kernel_conflict.cpp | 2 - .../KernelCompiler/sycl_namespaces.cpp | 3 +- .../test-e2e/KernelCompiler/sycl_overload.cpp | 3 +- .../KernelCompiler/sycl_time_trace.cpp | 2 +- sycl/test-e2e/format.py | 1 - .../no_sycl_hpp_in_e2e_tests.cpp | 2 +- 33 files changed, 146 insertions(+), 410 deletions(-) delete mode 100644 sycl/test-e2e/KernelCompiler/sycl_imf.cpp diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt index 9e97d2c8ccfdb..fabf64546e9da 100644 --- a/sycl-jit/jit-compiler/CMakeLists.txt +++ b/sycl-jit/jit-compiler/CMakeLists.txt @@ -60,7 +60,6 @@ target_include_directories(sycl-jit ${LLVM_MAIN_INCLUDE_DIR} ${LLVM_SPIRV_INCLUDE_DIRS} ${LLVM_EXTERNAL_CLANG_SOURCE_DIR}/include - ${LLVM_EXTERNAL_CLANG_SOURCE_DIR}/lib ${CMAKE_BINARY_DIR}/tools/clang/include ) target_include_directories(sycl-jit diff --git a/sycl-jit/jit-compiler/include/RTC.h b/sycl-jit/jit-compiler/include/RTC.h index 6a690df459646..a1a39d5b50ad3 100644 --- a/sycl-jit/jit-compiler/include/RTC.h +++ b/sycl-jit/jit-compiler/include/RTC.h @@ -176,11 +176,10 @@ class RTCResult { /// Calculates a BLAKE3 hash of the pre-processed source string described by /// \p SourceFile (considering any additional \p IncludeFiles) and the -/// concatenation of the \p UserArgs, for a given \p Format. +/// concatenation of the \p UserArgs. JIT_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, View IncludeFiles, - View UserArgs, - BinaryFormat Format); + View UserArgs); /// Compiles, links against device libraries, and finalizes the device code in /// the source string described by \p SourceFile, considering any additional \p @@ -192,14 +191,10 @@ JIT_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, /// /// If \p SaveIR is true and \p CachedIR is empty, the LLVM module obtained from /// the frontend invocation is wrapped in bitcode format in the result object. -/// -/// \p BinaryFormat describes the desired format of the compilation - which -/// corresponds to the backend that is being targeted. JIT_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, View IncludeFiles, View UserArgs, - View CachedIR, bool SaveIR, - BinaryFormat Format); + View CachedIR, bool SaveIR); /// Requests that the JIT binary referenced by \p Address is deleted from the /// `JITContext`. diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index ade0161991b2d..bc5d638a25255 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -8,17 +8,11 @@ #include "DeviceCompilation.h" #include "ESIMD.h" -#include "JITBinaryInfo.h" -#include "translation/Translation.h" -#include -#include -#include #include #include #include #include -#include #include #include #include @@ -184,8 +178,7 @@ class RTCToolActionBase : public ToolAction { assert(!hasExecuted() && "Action should only be invoked on a single file"); // Create a compiler instance to handle the actual work. - CompilerInstance Compiler(std::move(Invocation), - std::move(PCHContainerOps)); + CompilerInstance Compiler(std::move(Invocation), std::move(PCHContainerOps)); Compiler.setFileManager(Files); // Suppress summary with number of warnings and errors being printed to // stdout. @@ -319,7 +312,7 @@ class LLVMDiagnosticWrapper : public llvm::DiagnosticHandler { } // anonymous namespace static void adjustArgs(const InputArgList &UserArgList, - const std::string &DPCPPRoot, BinaryFormat Format, + const std::string &DPCPPRoot, SmallVectorImpl &CommandLine) { DerivedArgList DAL{UserArgList}; const auto &OptTable = getDriverOptTable(); @@ -332,23 +325,6 @@ static void adjustArgs(const InputArgList &UserArgList, // unused argument warning. DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_Qunused_arguments)); - if (Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN) { - auto [CPU, Features] = - Translator::getTargetCPUAndFeatureAttrs(nullptr, "", Format); - (void)Features; - if (Format == BinaryFormat::AMDGCN) { - DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_fsycl_targets_EQ), - "amdgcn-amd-amdhsa"); - DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_Xsycl_backend_EQ), - "amdgcn-amd-amdhsa"); - DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_offload_arch_EQ), CPU); - } else { - DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_fsycl_targets_EQ), - "nvptx64-nvidia-cuda"); - DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_Xsycl_backend)); - DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_cuda_gpu_arch_EQ), CPU); - } - } ArgStringList ASL; for_each(DAL, [&DAL, &ASL](Arg *A) { A->render(DAL, ASL); }); for_each(UserArgList, @@ -385,9 +361,10 @@ static void setupTool(ClangTool &Tool, const std::string &DPCPPRoot, }); } -Expected jit_compiler::calculateHash( - InMemoryFile SourceFile, View IncludeFiles, - const InputArgList &UserArgList, BinaryFormat Format) { +Expected +jit_compiler::calculateHash(InMemoryFile SourceFile, + View IncludeFiles, + const InputArgList &UserArgList) { TimeTraceScope TTS{"calculateHash"}; const std::string &DPCPPRoot = getDPCPPRoot(); @@ -396,7 +373,7 @@ Expected jit_compiler::calculateHash( } SmallVector CommandLine; - adjustArgs(UserArgList, DPCPPRoot, Format, CommandLine); + adjustArgs(UserArgList, DPCPPRoot, CommandLine); FixedCompilationDatabase DB{".", CommandLine}; ClangTool Tool{DB, {SourceFile.Path}}; @@ -422,10 +399,11 @@ Expected jit_compiler::calculateHash( return createStringError("Calculating source hash failed"); } -Expected jit_compiler::compileDeviceCode( - InMemoryFile SourceFile, View IncludeFiles, - const InputArgList &UserArgList, std::string &BuildLog, - LLVMContext &Context, BinaryFormat Format) { +Expected +jit_compiler::compileDeviceCode(InMemoryFile SourceFile, + View IncludeFiles, + const InputArgList &UserArgList, + std::string &BuildLog, LLVMContext &Context) { TimeTraceScope TTS{"compileDeviceCode"}; const std::string &DPCPPRoot = getDPCPPRoot(); @@ -434,7 +412,7 @@ Expected jit_compiler::compileDeviceCode( } SmallVector CommandLine; - adjustArgs(UserArgList, DPCPPRoot, Format, CommandLine); + adjustArgs(UserArgList, DPCPPRoot, CommandLine); FixedCompilationDatabase DB{".", CommandLine}; ClangTool Tool{DB, {SourceFile.Path}}; @@ -452,22 +430,12 @@ Expected jit_compiler::compileDeviceCode( return createStringError(BuildLog); } -// This function is a simplified copy of the device library selection process -// in `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V, or -// GPU targets (no AoT, no native CPU). Keep in sync! +// This function is a simplified copy of the device library selection process in +// `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V target +// (no AoT, no third-party GPUs, no native CPU). Keep in sync! static bool getDeviceLibraries(const ArgList &Args, SmallVectorImpl &LibraryList, - DiagnosticsEngine &Diags, BinaryFormat Format) { - // For CUDA/HIP we only need devicelib, early exit here. - if (Format == BinaryFormat::PTX) { - LibraryList.push_back( - Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc")); - return false; - } else if (Format == BinaryFormat::AMDGCN) { - LibraryList.push_back(Args.MakeArgString("devicelib-amdgcn-amd-amdhsa.bc")); - return false; - } - + DiagnosticsEngine &Diags) { struct DeviceLibOptInfo { StringRef DeviceLibName; StringRef DeviceLibOption; @@ -572,8 +540,7 @@ static Expected loadBitcodeLibrary(StringRef LibPath, Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, const InputArgList &UserArgList, - std::string &BuildLog, - BinaryFormat Format) { + std::string &BuildLog) { TimeTraceScope TTS{"linkDeviceLibraries"}; const std::string &DPCPPRoot = getDPCPPRoot(); @@ -588,29 +555,11 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, /* ShouldOwnClient=*/false); SmallVector LibNames; - const bool FoundUnknownLib = - getDeviceLibraries(UserArgList, LibNames, Diags, Format); + bool FoundUnknownLib = getDeviceLibraries(UserArgList, LibNames, Diags); if (FoundUnknownLib) { return createStringError("Could not determine list of device libraries: %s", BuildLog.c_str()); } - const bool IsCudaHIP = - Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN; - if (IsCudaHIP) { - // Based on the OS and the format decide on the version of libspirv. - // NOTE: this will be problematic if cross-compiling between OSes. - std::string Libclc{"clc/"}; - Libclc.append( -#ifdef _WIN32 - "remangled-l32-signed_char.libspirv-" -#else - "remangled-l64-signed_char.libspirv-" -#endif - ); - Libclc.append(Format == BinaryFormat::PTX ? "nvptx64-nvidia-cuda.bc" - : "amdgcn-amd-amdhsa.bc"); - LibNames.push_back(Libclc); - } LLVMContext &Context = Module.getContext(); for (const std::string &LibName : LibNames) { @@ -628,58 +577,6 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, } } - // For GPU targets we need to link against vendor provided libdevice. - if (IsCudaHIP) { - Triple T{Module.getTargetTriple()}; - Driver D{(Twine(DPCPPRoot) + "/bin/clang++").str(), T.getTriple(), Diags}; - auto [CPU, Features] = - Translator::getTargetCPUAndFeatureAttrs(&Module, "", Format); - (void)Features; - // Helper lambda to link modules. - auto LinkInLib = [&](const StringRef LibDevice) -> Error { - ModuleUPtr LibDeviceModule; - if (auto Error = loadBitcodeLibrary(LibDevice, Context) - .moveInto(LibDeviceModule)) { - return Error; - } - if (Linker::linkModules(Module, std::move(LibDeviceModule), - Linker::LinkOnlyNeeded)) { - return createStringError("Unable to link libdevice: %s", - BuildLog.c_str()); - } - return Error::success(); - }; - SmallVector LibDeviceFiles; - if (Format == BinaryFormat::PTX) { - // For NVPTX we can get away with CudaInstallationDetector. - LazyDetector CudaInstallation{D, T, - UserArgList}; - auto LibDevice = CudaInstallation->getLibDeviceFile(CPU); - if (LibDevice.empty()) { - return createStringError("Unable to find Cuda libdevice"); - } - LibDeviceFiles.push_back(LibDevice); - } else { - // AMDGPU requires entire toolchain in order to provide all common bitcode - // libraries. - clang::driver::toolchains::ROCMToolChain TC(D, T, UserArgList); - auto CommonDeviceLibs = TC.getCommonDeviceLibNames( - UserArgList, CPU, Action::OffloadKind::OFK_SYCL, false); - if (CommonDeviceLibs.empty()) { - return createStringError("Unable to find ROCm common device libraries"); - } - for (auto &Lib : CommonDeviceLibs) { - LibDeviceFiles.push_back(Lib.Path); - } - } - for (auto &LibDeviceFile : LibDeviceFiles) { - // llvm::Error converts to false on success. - if (auto Error = LinkInLib(LibDeviceFile)) { - return Error; - } - } - } - return Error::success(); } diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h index aa1b19df0cc8f..62da2cdb54bf1 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h @@ -8,7 +8,6 @@ #pragma once -#include "JITBinaryInfo.h" #include "RTC.h" #include @@ -25,17 +24,16 @@ using ModuleUPtr = std::unique_ptr; llvm::Expected calculateHash(InMemoryFile SourceFile, View IncludeFiles, - const llvm::opt::InputArgList &UserArgList, BinaryFormat Format); + const llvm::opt::InputArgList &UserArgList); llvm::Expected compileDeviceCode(InMemoryFile SourceFile, View IncludeFiles, const llvm::opt::InputArgList &UserArgList, - std::string &BuildLog, llvm::LLVMContext &Context, - BinaryFormat Format); + std::string &BuildLog, llvm::LLVMContext &Context); llvm::Error linkDeviceLibraries(llvm::Module &Module, const llvm::opt::InputArgList &UserArgList, - std::string &BuildLog, BinaryFormat Format); + std::string &BuildLog); using PostLinkResult = std::pair>; llvm::Expected diff --git a/sycl-jit/jit-compiler/lib/rtc/RTC.cpp b/sycl-jit/jit-compiler/lib/rtc/RTC.cpp index 2e42bca3a49a5..e9307ccb27eed 100644 --- a/sycl-jit/jit-compiler/lib/rtc/RTC.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/RTC.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "RTC.h" -#include "JITBinaryInfo.h" #include "helper/ErrorHelper.h" #include "rtc/DeviceCompilation.h" #include "translation/SPIRVLLVMTranslation.h" @@ -27,8 +26,7 @@ using namespace jit_compiler; JIT_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, View IncludeFiles, - View UserArgs, - BinaryFormat Format) { + View UserArgs) { llvm::opt::InputArgList UserArgList; if (auto Error = parseUserArgs(UserArgs).moveInto(UserArgList)) { return errorTo(std::move(Error), @@ -38,8 +36,8 @@ JIT_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, auto Start = std::chrono::high_resolution_clock::now(); std::string Hash; - if (auto Error = calculateHash(SourceFile, IncludeFiles, UserArgList, Format) - .moveInto(Hash)) { + if (auto Error = + calculateHash(SourceFile, IncludeFiles, UserArgList).moveInto(Hash)) { return errorTo(std::move(Error), "Hashing failed", /*IsHash=*/false); } @@ -57,8 +55,7 @@ JIT_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, JIT_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, View IncludeFiles, View UserArgs, - View CachedIR, bool SaveIR, - BinaryFormat Format) { + View CachedIR, bool SaveIR) { llvm::LLVMContext Context; std::string BuildLog; configureDiagnostics(Context, BuildLog); @@ -107,7 +104,7 @@ JIT_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, bool FromSource = !Module; if (FromSource) { if (auto Error = compileDeviceCode(SourceFile, IncludeFiles, UserArgList, - BuildLog, Context, Format) + BuildLog, Context) .moveInto(Module)) { return errorTo(std::move(Error), "Device compilation failed"); } @@ -121,8 +118,7 @@ JIT_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, IR = RTCDeviceCodeIR{BCString.data(), BCString.data() + BCString.size()}; } - if (auto Error = - linkDeviceLibraries(*Module, UserArgList, BuildLog, Format)) { + if (auto Error = linkDeviceLibraries(*Module, UserArgList, BuildLog)) { return errorTo(std::move(Error), "Device linking failed"); } @@ -135,9 +131,9 @@ JIT_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, for (auto [DevImgInfo, Module] : llvm::zip_equal(BundleInfo.DevImgInfos, Modules)) { - if (auto Error = - Translator::translate(*Module, JITContext::getInstance(), Format) - .moveInto(DevImgInfo.BinaryInfo)) { + if (auto Error = Translator::translate(*Module, JITContext::getInstance(), + BinaryFormat::SPIRV) + .moveInto(DevImgInfo.BinaryInfo)) { return errorTo(std::move(Error), "SPIR-V translation failed"); } } diff --git a/sycl-jit/jit-compiler/lib/translation/Translation.cpp b/sycl-jit/jit-compiler/lib/translation/Translation.cpp index 99eaffd3384d1..457b6bfc0e637 100644 --- a/sycl-jit/jit-compiler/lib/translation/Translation.cpp +++ b/sycl-jit/jit-compiler/lib/translation/Translation.cpp @@ -83,6 +83,9 @@ llvm::Expected Translator::translateToPTX(llvm::Module &Mod, LLVMInitializeNVPTXAsmPrinter(); LLVMInitializeNVPTXTargetMC(); + static const char *TARGET_CPU_ATTRIBUTE = "target-cpu"; + static const char *TARGET_FEATURE_ATTRIBUTE = "target-features"; + std::string TargetTriple{"nvptx64-nvidia-cuda"}; std::string ErrorMessage; @@ -96,11 +99,32 @@ llvm::Expected Translator::translateToPTX(llvm::Module &Mod, ErrorMessage.c_str()); } - auto [CPU, Features] = - getTargetCPUAndFeatureAttrs(&Mod, KernelName, BinaryFormat::PTX); + // Give priority to user specified values (through environment variables: + // SYCL_JIT_AMDGCN_PTX_TARGET_CPU and SYCL_JIT_AMDGCN_PTX_TARGET_FEATURES). + auto CPUVal = ConfigHelper::get(); + auto FeaturesVal = ConfigHelper::get(); + llvm::StringRef CPU = CPUVal.begin(); + llvm::StringRef Features = FeaturesVal.begin(); + + auto *KernelFunc = KernelName ? Mod.getFunction(KernelName) : nullptr; + // If they were not set, use default and consult the module for alternatives + // (if present). + if (CPU.empty()) { + CPU = "sm_50"; + if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) { + CPU = KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString(); + } + } + if (Features.empty()) { + Features = "+sm_50,+ptx76"; + if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) { + Features = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE) + .getValueAsString(); + } + } std::unique_ptr TargetMachine(Target->createTargetMachine( - Triple{TargetTriple}, CPU, Features, {}, llvm::Reloc::PIC_, std::nullopt, + Mod.getTargetTriple(), CPU, Features, {}, llvm::Reloc::PIC_, std::nullopt, llvm::CodeGenOptLevel::Default)); llvm::legacy::PassManager PM; @@ -142,6 +166,9 @@ Translator::translateToAMDGCN(llvm::Module &Mod, JITContext &JITCtx, LLVMInitializeAMDGPUAsmPrinter(); LLVMInitializeAMDGPUTargetMC(); + static const char *TARGET_CPU_ATTRIBUTE = "target-cpu"; + static const char *TARGET_FEATURE_ATTRIBUTE = "target-features"; + std::string TargetTriple{"amdgcn-amd-amdhsa"}; std::string ErrorMessage; @@ -154,10 +181,29 @@ Translator::translateToAMDGCN(llvm::Module &Mod, JITContext &JITCtx, "Failed to load and translate AMDGCN LLVM IR module with error %s", ErrorMessage.c_str()); - auto [CPU, Features] = - getTargetCPUAndFeatureAttrs(&Mod, KernelName, BinaryFormat::AMDGCN); + auto CPUVal = ConfigHelper::get(); + auto FeaturesVal = ConfigHelper::get(); + llvm::StringRef CPU = CPUVal.begin(); + llvm::StringRef Features = FeaturesVal.begin(); + + auto *KernelFunc = KernelName ? Mod.getFunction(KernelName) : nullptr; + if (CPU.empty()) { + // Set to the lowest tested target according to the GetStartedGuide, section + // "Build DPC++ toolchain with support for HIP AMD" + CPU = "gfx906"; + if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) { + CPU = KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString(); + } + } + if (Features.empty()) { + if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) { + Features = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE) + .getValueAsString(); + } + } + std::unique_ptr TargetMachine(Target->createTargetMachine( - Triple{TargetTriple}, CPU, Features, {}, llvm::Reloc::PIC_, std::nullopt, + Mod.getTargetTriple(), CPU, Features, {}, llvm::Reloc::PIC_, std::nullopt, llvm::CodeGenOptLevel::Default)); std::string AMDObj; @@ -180,34 +226,3 @@ Translator::translateToAMDGCN(llvm::Module &Mod, JITContext &JITCtx, return &JITCtx.emplaceBinary(std::move(AMDObj), BinaryFormat::AMDGCN); #endif // JIT_SUPPORT_AMDGCN } - -std::pair Translator::getTargetCPUAndFeatureAttrs( - const llvm::Module *M, const char *KernelName, BinaryFormat Format) { - assert((Format == BinaryFormat::AMDGCN || Format == BinaryFormat::PTX) && - "Unexpected format found"); - static const char *TARGET_CPU_ATTRIBUTE = "target-cpu"; - static const char *TARGET_FEATURE_ATTRIBUTE = "target-features"; - // Give priority to user specified values (through environment variables: - // SYCL_JIT_AMDGCN_PTX_TARGET_CPU and SYCL_JIT_AMDGCN_PTX_TARGET_FEATURES). - auto *KernelFunc = (M && KernelName) ? M->getFunction(KernelName) : nullptr; - auto CPUVal = ConfigHelper::get(); - auto FeaturesVal = ConfigHelper::get(); - llvm::StringRef CPU = CPUVal.begin(); - llvm::StringRef Features = FeaturesVal.begin(); - if (CPU.empty()) { - // Set to the lowest tested target according to the GetStartedGuide, section - // "Build DPC++ toolchain with support for HIP AMD" - CPU = Format == BinaryFormat::AMDGCN ? "gfx90a" : "sm_50"; - if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_CPU_ATTRIBUTE)) { - CPU = KernelFunc->getFnAttribute(TARGET_CPU_ATTRIBUTE).getValueAsString(); - } - } - if (Features.empty()) { - Features = Format == BinaryFormat::PTX ? "+sm_50,+ptx76" : ""; - if (KernelFunc && KernelFunc->hasFnAttribute(TARGET_FEATURE_ATTRIBUTE)) { - Features = KernelFunc->getFnAttribute(TARGET_FEATURE_ATTRIBUTE) - .getValueAsString(); - } - } - return std::make_pair(std::string{CPU}, std::string{Features}); -} diff --git a/sycl-jit/jit-compiler/lib/translation/Translation.h b/sycl-jit/jit-compiler/lib/translation/Translation.h index 78bf4ef8ac416..9c8dfdda5f9f5 100644 --- a/sycl-jit/jit-compiler/lib/translation/Translation.h +++ b/sycl-jit/jit-compiler/lib/translation/Translation.h @@ -12,6 +12,7 @@ #include "JITContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Error.h" +#include namespace jit_compiler { @@ -25,9 +26,6 @@ class Translator { translate(llvm::Module &Mod, JITContext &JITCtx, BinaryFormat Format, const char *KernelName = nullptr); - std::pair static getTargetCPUAndFeatureAttrs( - const llvm::Module *M, const char *KernelName, BinaryFormat Format); - private: /// Pair of address and size to represent a binary blob. using BinaryBlob = std::pair; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index c49409561f123..bab3b9291d44f 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -8,7 +8,6 @@ #pragma once -#include "JITBinaryInfo.h" #include #include #include @@ -700,22 +699,6 @@ class device_image_impl return MRTCBinInfo && MRTCBinInfo->MLanguage == Lang; } - static ::jit_compiler::BinaryFormat getTargetFormat(const backend Backend) { - switch (Backend) { - case backend::ext_oneapi_level_zero: - case backend::opencl: - return ::jit_compiler::BinaryFormat::SPIRV; - case backend::ext_oneapi_cuda: - return ::jit_compiler::BinaryFormat::PTX; - case backend::ext_oneapi_hip: - return ::jit_compiler::BinaryFormat::AMDGCN; - default: - throw sycl::exception( - sycl::make_error_code(sycl::errc::invalid), - "Backend does not support kernel_compiler extension"); - } - } - std::vector> buildFromSource( const std::vector &Devices, const std::vector &BuildOptions, @@ -746,12 +729,9 @@ class device_image_impl } } - if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { - const auto Format = getTargetFormat(MContext.get_backend()); + if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) return createSYCLImages(Devices, bundle_state::executable, BuildOptions, - LogPtr, RegisteredKernelNames, OutDeviceBins, - Format); - } + LogPtr, RegisteredKernelNames, OutDeviceBins); std::vector DeviceVec; DeviceVec.reserve(Devices.size()); @@ -839,10 +819,8 @@ class device_image_impl "device does not support source language"); } } - const auto Format = getTargetFormat(MContext.get_backend()); return createSYCLImages(Devices, bundle_state::object, CompileOptions, - LogPtr, RegisteredKernelNames, OutDeviceBins, - Format); + LogPtr, RegisteredKernelNames, OutDeviceBins); } private: @@ -1014,8 +992,8 @@ class device_image_impl const std::vector &Options, std::string *LogPtr, const std::vector &RegisteredKernelNames, - std::vector> &OutDeviceBins, - ::jit_compiler::BinaryFormat Format) const { + std::vector> &OutDeviceBins) + const { assert(MRTCBinInfo); assert(MRTCBinInfo->MLanguage == syclex::source_language::sycl); assert(std::holds_alternative(MBinImage)); @@ -1045,7 +1023,7 @@ class device_image_impl auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_Compile( RegisteredKernelNames.empty() ? SourceStr : SourceExt.str(), - MRTCBinInfo->MIncludePairs, Options, LogPtr, Format); + MRTCBinInfo->MIncludePairs, Options, LogPtr); auto &PM = detail::ProgramManager::getInstance(); diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index 2884e25399e5f..189bd3d145309 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -263,7 +263,6 @@ sycl_device_binaries jit_compiler::createDeviceBinaries( Binary.addOffloadEntry(std::move(Entry)); } - bool FinalizationTagAdded = false; for (const auto &FPS : DevImgInfo.Properties) { bool IsDeviceGlobalsPropSet = FPS.Name == __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS; @@ -280,50 +279,18 @@ sycl_device_binaries jit_compiler::createDeviceBinaries( sycl_property_type::SYCL_PROPERTY_TYPE_BYTE_ARRAY}); } } - if (FPS.Name == __SYCL_PROPERTY_SET_PROGRAM_METADATA && - DevImgInfo.BinaryInfo.Format == - ::jit_compiler::BinaryFormat::AMDGCN) { - PropSet.addProperty(PropertyContainer{ - __SYCL_PROGRAM_METADATA_TAG_NEED_FINALIZATION, 1}); - FinalizationTagAdded = true; - } Binary.addProperty(std::move(PropSet)); Binary.setCompileOptions(BundleInfo.CompileOptions.c_str()); } - auto BinaryTarget = ""; - auto Format = SYCL_DEVICE_BINARY_TYPE_NONE; - switch (DevImgInfo.BinaryInfo.Format) { - case ::jit_compiler::BinaryFormat::SPIRV: - BinaryTarget = DevImgInfo.BinaryInfo.AddressBits == 64 - ? __SYCL_DEVICE_BINARY_TARGET_SPIRV64 - : __SYCL_DEVICE_BINARY_TARGET_SPIRV32; - Format = SYCL_DEVICE_BINARY_TYPE_SPIRV; - break; - case ::jit_compiler::BinaryFormat::PTX: - BinaryTarget = __SYCL_DEVICE_BINARY_TARGET_LLVM_NVPTX64; - Format = SYCL_DEVICE_BINARY_TYPE_NONE; - break; - case ::jit_compiler::BinaryFormat::AMDGCN: { - BinaryTarget = __SYCL_DEVICE_BINARY_TARGET_LLVM_AMDGCN; - Format = SYCL_DEVICE_BINARY_TYPE_NONE; - // If the program had no properties, the tag needs to be added now. - if (!FinalizationTagAdded) { - PropertySetContainer ProgramMetadata{ - __SYCL_PROPERTY_SET_PROGRAM_METADATA}; - ProgramMetadata.addProperty(PropertyContainer{ - __SYCL_PROGRAM_METADATA_TAG_NEED_FINALIZATION, 1}); - Binary.addProperty(std::move(ProgramMetadata)); - } - break; - } - default: - assert(false && "Unsupported format"); - }; - Collection->addDeviceBinary( - std::move(Binary), DevImgInfo.BinaryInfo.BinaryStart, - DevImgInfo.BinaryInfo.BinarySize, BinaryTarget, Format); + Collection->addDeviceBinary(std::move(Binary), + DevImgInfo.BinaryInfo.BinaryStart, + DevImgInfo.BinaryInfo.BinarySize, + (DevImgInfo.BinaryInfo.AddressBits == 64) + ? __SYCL_DEVICE_BINARY_TARGET_SPIRV64 + : __SYCL_DEVICE_BINARY_TARGET_SPIRV32, + SYCL_DEVICE_BINARY_TYPE_SPIRV); } sycl_device_binaries Binaries = Collection->getPIDeviceStruct(); @@ -344,26 +311,7 @@ void jit_compiler::destroyDeviceBinaries(sycl_device_binaries Binaries) { std::pair jit_compiler::compileSYCL( const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, - const std::vector &UserArgs, std::string *LogPtr, - ::jit_compiler::BinaryFormat Format) { - if (Format == ::jit_compiler::BinaryFormat::PTX || - Format == ::jit_compiler::BinaryFormat::AMDGCN) { - // If present, set-up the config with env variables describing CPU and - // features. - auto SetUpOption = [](const std::string &Value) { - ::jit_compiler::JITEnvVar Option(Value.begin(), Value.end()); - return Option; - }; - ::jit_compiler::JITEnvVar TargetCPUOpt = SetUpOption( - detail::SYCLConfig::get()); - this->AddToConfigHandle( - ::jit_compiler::option::JITTargetCPU::set(TargetCPUOpt)); - ::jit_compiler::JITEnvVar TargetFeaturesOpt = SetUpOption( - detail::SYCLConfig::get()); - this->AddToConfigHandle( - ::jit_compiler::option::JITTargetFeatures::set(TargetFeaturesOpt)); - } - + const std::vector &UserArgs, std::string *LogPtr) { auto appendToLog = [LogPtr](const char *Msg) { if (LogPtr) { LogPtr->append(Msg); @@ -391,7 +339,7 @@ std::pair jit_compiler::compileSYCL( std::vector CachedIR; if (PersistentDeviceCodeCache::isEnabled()) { auto Result = - CalculateHashHandle(SourceFile, IncludeFilesView, UserArgsView, Format); + CalculateHashHandle(SourceFile, IncludeFilesView, UserArgsView); if (Result.failed()) { appendToLog(Result.getPreprocLog()); @@ -401,9 +349,8 @@ std::pair jit_compiler::compileSYCL( } } - auto Result = - CompileSYCLHandle(SourceFile, IncludeFilesView, UserArgsView, CachedIR, - /*SaveIR=*/!CacheKey.empty(), Format); + auto Result = CompileSYCLHandle(SourceFile, IncludeFilesView, UserArgsView, + CachedIR, /*SaveIR=*/!CacheKey.empty()); const char *BuildLog = Result.getBuildLog(); appendToLog(BuildLog); diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index e0aefd1deae5d..0f0a3f3f5738c 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -8,7 +8,6 @@ #pragma once -#include #include #include #include @@ -46,8 +45,7 @@ class jit_compiler { std::pair compileSYCL( const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, - const std::vector &UserArgs, std::string *LogPtr, - ::jit_compiler::BinaryFormat Format); + const std::vector &UserArgs, std::string *LogPtr); void destroyDeviceBinaries(sycl_device_binaries Binaries); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 418842fdc48ec..93ec94a8ac328 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -44,8 +44,7 @@ std::pair SYCL_JIT_Compile( [[maybe_unused]] const std::string &SYCLSource, [[maybe_unused]] const include_pairs_t &IncludePairs, [[maybe_unused]] const std::vector &UserArgs, - [[maybe_unused]] std::string *LogPtr, - [[maybe_unused]] ::jit_compiler::BinaryFormat Format) { + [[maybe_unused]] std::string *LogPtr) { #if SYCL_EXT_JIT_ENABLE static std::atomic_uintptr_t CompilationCounter; std::string CompilationID = "rtc_" + std::to_string(CompilationCounter++); @@ -53,7 +52,7 @@ std::pair SYCL_JIT_Compile( for (const sycl::detail::string_view UserArg : UserArgs) UserArgStrings.push_back(UserArg.data()); return sycl::detail::jit_compiler::get_instance().compileSYCL( - CompilationID, SYCLSource, IncludePairs, UserArgStrings, LogPtr, Format); + CompilationID, SYCLSource, IncludePairs, UserArgStrings, LogPtr); #else throw sycl::exception(sycl::errc::build, "kernel_compiler via sycl-jit is not available"); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index 007dfae9aff22..047a10a061e1f 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -8,7 +8,6 @@ #pragma once -#include #include #include // __SYCL_EXPORT #include @@ -36,7 +35,7 @@ userArgsAsString(const std::vector &UserArguments); std::pair SYCL_JIT_Compile(const std::string &Source, const include_pairs_t &IncludePairs, const std::vector &UserArgs, - std::string *LogPtr, ::jit_compiler::BinaryFormat Format); + std::string *LogPtr); void SYCL_JIT_Destroy(sycl_device_binaries Binaries); diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 8fbeb5e98bc57..a7e0f9289aac0 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -409,10 +409,8 @@ bool is_source_kernel_bundle_supported( const std::vector &DeviceImplVec) { backend BE = DeviceImplVec[0]->getBackend(); // Support is limited to the opencl and level_zero backends. - bool BE_Acceptable = - (BE == sycl::backend::ext_oneapi_level_zero) || - (BE == sycl::backend::opencl || BE == sycl::backend::ext_oneapi_hip || - BE == sycl::backend::ext_oneapi_cuda); + bool BE_Acceptable = (BE == sycl::backend::ext_oneapi_level_zero) || + (BE == sycl::backend::opencl); if (!BE_Acceptable) return false; diff --git a/sycl/test-e2e/KernelCompiler/sycl.cpp b/sycl/test-e2e/KernelCompiler/sycl.cpp index 13003bea98a62..3095d570be946 100644 --- a/sycl/test-e2e/KernelCompiler/sycl.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// +// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out // XFAIL: preview-mode && run-mode // XFAIL-TRACKER: https://github.com/intel/llvm/issues/18390 @@ -107,14 +108,15 @@ auto constexpr DeviceLibrariesSource = R"===( #include #include #include +#include extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(sycl::ext::oneapi::experimental::single_task_kernel) void device_libs_kernel(float *ptr) { // Extension list: llvm/lib/SYCLLowerIR/SYCLDeviceLibReqMask.cpp - // cl_intel_devicelib_assert is not available for opencl:gpu; skip testing - // it. Only test the fp32 variants of complex and math to keep this test + // cl_intel_devicelib_assert is not available for opencl:gpu; skip testing it. + // Only test the fp32 variants of complex, math and imf to keep this test // device-agnostic. // cl_intel_devicelib_math @@ -126,8 +128,14 @@ void device_libs_kernel(float *ptr) { // cl_intel_devicelib_cstring ptr[2] = memcmp(ptr + 2, ptr + 2, sizeof(float)); + // cl_intel_devicelib_imf + ptr[3] = sycl::ext::intel::math::sqrt(ptr[3] * 2); + + // cl_intel_devicelib_imf_bf16 + ptr[4] = sycl::ext::intel::math::float2bfloat16(ptr[4] * 0.5f); + // cl_intel_devicelib_bfloat16 - ptr[3] = sycl::ext::oneapi::bfloat16{ptr[3] / 0.25f}; + ptr[5] = sycl::ext::oneapi::bfloat16{ptr[5] / 0.25f}; } )==="; @@ -348,7 +356,7 @@ int test_device_libraries(sycl::queue q) { exe_kb kbExe = syclex::build(kbSrc); sycl::kernel k = kbExe.ext_oneapi_get_kernel("device_libs_kernel"); - constexpr size_t nElem = 4; + constexpr size_t nElem = 6; float *ptr = sycl::malloc_shared(nElem, q); for (int i = 0; i < nElem; ++i) ptr[i] = 1.0f; diff --git a/sycl/test-e2e/KernelCompiler/sycl_basic.cpp b/sycl/test-e2e/KernelCompiler/sycl_basic.cpp index fbb7768fd8ac8..5832d5ff50685 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_basic.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_basic.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// +// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_cache.cpp b/sycl/test-e2e/KernelCompiler/sycl_cache.cpp index 0cffec11ffa47..405d97ce15e96 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_cache.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_cache.cpp @@ -7,11 +7,6 @@ //===----------------------------------------------------------------------===// // REQUIRES: (opencl || level_zero) -// Unlike other RTC tests, don't run this one on Cuda/HIP. Eviction mechanism -// is based on the size of compiled kernels, which in turns depends on the -// target. Don't run eviction check for CUDA/HIP, so that we don't have to find -// a magic number that works for all binaries (and by definition is flaky). - // REQUIRES: aspect-usm_device_allocations // DEFINE: %{cache_vars} = env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=7 SYCL_CACHE_DIR=%t/cache_dir diff --git a/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp b/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp index 7d41f3a3f79d0..e72c733fdbf9f 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_context_error.cpp @@ -6,8 +6,10 @@ // //===----------------------------------------------------------------------===// +// REQUIRES: (opencl || level_zero) + // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run-unfiltered-devices} %t.out +// RUN: %{run-unfiltered-devices} %t.out #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp b/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp index ba8b08584df81..2296aba1d9504 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_device_globals.cpp @@ -6,13 +6,14 @@ // //===----------------------------------------------------------------------===// +// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp index 4ca1f5296dc7b..410f4f3e8d753 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_export_registration.cpp @@ -8,8 +8,6 @@ // REQUIRES: (opencl || level_zero) -// Note linking is not supported on CUDA/HIP. - // -- Test for a case where a kernel bundle is built that exports a symbol and // -- other kernel bundles that uses it are compiled/linked without it. These // -- cases should fail due to unresolved symbols, rather than picking up the diff --git a/sycl/test-e2e/KernelCompiler/sycl_imf.cpp b/sycl/test-e2e/KernelCompiler/sycl_imf.cpp deleted file mode 100644 index 142cc2160ac52..0000000000000 --- a/sycl/test-e2e/KernelCompiler/sycl_imf.cpp +++ /dev/null @@ -1,79 +0,0 @@ -//==--- sycl_imf.cpp --- kernel_compiler extension imf tests ---------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: aspect-usm_device_allocations -// REQUIRES: (opencl || level_zero) - -// UNSUPPORTED: accelerator -// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. - -// RUN: %{build} -o %t.out -// RUN: %{l0_leak_check} %{run} %t.out - -// XFAIL: preview-mode && run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/18390 - -#include -#include -#include - -auto constexpr IMFSources = R"===( -#include -#include -#include - -extern "C" SYCL_EXTERNAL -SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(sycl::ext::oneapi::experimental::single_task_kernel) -void imf_kernel(float *ptr) { - // cl_intel_devicelib_imf - ptr[0] = sycl::ext::intel::math::sqrt(ptr[0] * 2); - - // cl_intel_devicelib_imf_bf16 - ptr[1] = sycl::ext::intel::math::float2bfloat16(ptr[1] * 0.5f); -} -)==="; - -int main() { -#ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER - sycl::queue q; - namespace syclex = sycl::ext::oneapi::experimental; - using source_kb = sycl::kernel_bundle; - using exe_kb = sycl::kernel_bundle; - - sycl::context ctx = q.get_context(); - - source_kb kbSrc = syclex::create_kernel_bundle_from_source( - ctx, syclex::source_language::sycl, IMFSources); - exe_kb kbExe = syclex::build(kbSrc); - - sycl::kernel k = kbExe.ext_oneapi_get_kernel("imf_kernel"); - constexpr size_t nElem = 2; - float *ptr = sycl::malloc_shared(nElem, q); - for (int i = 0; i < nElem; ++i) - ptr[i] = 1.0f; - - q.submit([&](sycl::handler &cgh) { - cgh.set_arg(0, ptr); - cgh.single_task(k); - }); - q.wait_and_throw(); - - // Check that the kernel was executed. Given the {1.0, 1.0} input, - // the expected result is approximately {1.41, 0.5}. - for (unsigned i = 0; i < nElem; ++i) { - std::cout << ptr[i] << ' '; - assert(ptr[i] != 1.0f); - } - std::cout << std::endl; - - sycl::free(ptr, q); -#else - static_assert(false, "Kernel Compiler feature test macro undefined"); -#endif - return 0; -} diff --git a/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp b/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp index d073390a3d6a9..0cc50413dbad4 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_include_paths.cpp @@ -6,14 +6,15 @@ // //===----------------------------------------------------------------------===// +// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out %S | FileCheck %s --check-prefixes=CHECK,CHECK-NOCWD +// RUN: %{run} %t.out %S | FileCheck %s --check-prefixes=CHECK,CHECK-NOCWD // COM: Run test again in a directory that contains a different version of // `header1.hpp` -// RUN: cd %S/include/C ; %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out %S | FileCheck %s --check-prefixes=CHECK,CHECK-CWD +// RUN: cd %S/include/C ; %{run} %t.out %S | FileCheck %s --check-prefixes=CHECK,CHECK-CWD #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_join.cpp b/sycl/test-e2e/KernelCompiler/sycl_join.cpp index 1cc0c8e8d2d8e..33cf81dd6bdcb 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_join.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_join.cpp @@ -6,11 +6,12 @@ // //===----------------------------------------------------------------------===// +// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_shared_allocations // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out // XFAIL: preview-mode && run-mode // XFAIL-TRACKER: https://github.com/intel/llvm/issues/18390 diff --git a/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp b/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp index 7d38e44cab6c2..94864926af4c4 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_lifetimes.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// +// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} env SYCL_UR_TRACE=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s +// RUN: env SYCL_UR_TRACE=-1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s #include #include @@ -47,15 +48,14 @@ int test_lifetimes() { ctx, syclex::source_language::sycl, SYCLSource); exe_kb kbExe1 = syclex::build(kbSrc); - // Cuda/Hip programs will be created with Binary, spirv IL. - // CHECK: urProgramCreateWith{{IL|Binary}}{{.*}}phProgram{{.*}}([[PROG1:.*]])) + // CHECK: urProgramCreateWithIL{{.*}}phProgram{{.*}}([[PROG1:.*]])) { std::cout << "Scope1\n"; // CHECK: Scope1 exe_kb kbExe2 = syclex::build(kbSrc); // kbExe2 goes out of scope; its kernels are removed from program mananager. - // CHECK: urProgramCreateWith{{IL|Binary}}{{.*}}phProgram{{.*}}([[PROG2:.*]])) + // CHECK: urProgramCreateWithIL{{.*}}phProgram{{.*}}([[PROG2:.*]])) // CHECK: urProgramRelease{{.*}}[[PROG2]] } std::cout << "End Scope1\n"; diff --git a/sycl/test-e2e/KernelCompiler/sycl_link.cpp b/sycl/test-e2e/KernelCompiler/sycl_link.cpp index 1453e02b773dc..7187488d0f0dc 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link.cpp @@ -12,8 +12,6 @@ // -- Test for a simple linking case with source files compiled from SYCL source // -- at runtime. -// Note linking is not supported on CUDA/HIP. - // RUN: %{build} -o %t.out // RUN: %{run} %t.out // RUN: %{l0_leak_check} %{run} %t.out diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp index f639a9bb3a337..b1d66710c2fa2 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep.cpp @@ -9,8 +9,6 @@ // REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_shared_allocations -// Note linking is not supported on CUDA/HIP. - // -- Test for linking where two kernels use the same imported symbols. // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp index 5183a927881e1..4976a9066bc3e 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_common_dep_optional_feature.cpp @@ -9,8 +9,6 @@ // REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_shared_allocations -// Note linking is not supported on CUDA/HIP. - // -- Test for linking where two kernels use the same imported symbols, but one // -- may not be supported on the device. diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp index 0739a5d9da00a..4f81eeb684fe4 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_export_conflict.cpp @@ -9,8 +9,6 @@ // REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_shared_allocations -// Note linking is not supported on CUDA/HIP. - // -- Test for a case where a kernel bundle with an exported symbol is compiled // -- before another kernel bundle using a different variant of the symbol. diff --git a/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp b/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp index 0636fbff37bf7..41b8c99c137a3 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_link_kernel_conflict.cpp @@ -9,8 +9,6 @@ // REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_shared_allocations -// Note linking is not supported on CUDA/HIP. - // -- Test for the linking of two kernels with conflicting definitions of // -- kernels with the same name. diff --git a/sycl/test-e2e/KernelCompiler/sycl_namespaces.cpp b/sycl/test-e2e/KernelCompiler/sycl_namespaces.cpp index 29c54161506f9..89ad5f5e67c40 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_namespaces.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_namespaces.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// +// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_overload.cpp b/sycl/test-e2e/KernelCompiler/sycl_overload.cpp index a1b6cb787d030..2e7b5fee432a5 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_overload.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_overload.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// +// REQUIRES: (opencl || level_zero) // REQUIRES: aspect-usm_device_allocations // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{l0_leak_check} %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out #include #include diff --git a/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp b/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp index 4b1a04d1bcb8d..861256f5f0ed1 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out | FileCheck %s +// RUN: %{run} %t.out | FileCheck %s #include #include diff --git a/sycl/test-e2e/format.py b/sycl/test-e2e/format.py index ff2f8e36ecfb7..44da0ef1c52c4 100644 --- a/sycl/test-e2e/format.py +++ b/sycl/test-e2e/format.py @@ -244,7 +244,6 @@ def execute(self, test, litConfig): ) sycl_target_opts += hip_arch_opts substitutions.append(("%{hip_arch_opts}", hip_arch_opts)) - substitutions.append(("%{amd_arch}", test.config.amd_arch)) if ( "target-spir" in build_targets and "spirv-backend" in test.config.available_features diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 7f3757fc70624..13f0fc51bc037 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 29 +// CHECK-NUM-MATCHES: 27 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From 479601aa8ea20afe4240fd43bd8320cb2a5cd82b Mon Sep 17 00:00:00 2001 From: "Pirog, Mikolaj Maciej" Date: Fri, 4 Jul 2025 05:47:28 -0700 Subject: [PATCH 2/5] Formatting --- sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index bc5d638a25255..20891e7615b5c 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -178,7 +178,8 @@ class RTCToolActionBase : public ToolAction { assert(!hasExecuted() && "Action should only be invoked on a single file"); // Create a compiler instance to handle the actual work. - CompilerInstance Compiler(std::move(Invocation), std::move(PCHContainerOps)); + CompilerInstance Compiler(std::move(Invocation), + std::move(PCHContainerOps)); Compiler.setFileManager(Files); // Suppress summary with number of warnings and errors being printed to // stdout. From 91e9369fd4e43774da7301744e6f16496c72bdc1 Mon Sep 17 00:00:00 2001 From: "Pirog, Mikolaj Maciej" Date: Fri, 4 Jul 2025 06:13:16 -0700 Subject: [PATCH 3/5] Fix test --- sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 13f0fc51bc037..4292342068187 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 27 +// CHECK-NUM-MATCHES: 28 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From 60c0d05d233fdd9d34d1c4eafabf512532ddbbf2 Mon Sep 17 00:00:00 2001 From: "Pirog, Mikolaj Maciej" Date: Fri, 4 Jul 2025 06:58:37 -0700 Subject: [PATCH 4/5] Revert "[SYCL] Fix compilation when JIT is disabled (#19302)" This reverts commit d82acd7745392f7afa64c43e607345f64555674a. --- sycl/source/detail/jit_compiler.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index 0f0a3f3f5738c..31484af10c8b6 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include From debfa35dd40dd11822ea7b3ec226f0e69bb06fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Pir=C3=B3g?= Date: Fri, 4 Jul 2025 20:54:25 +0200 Subject: [PATCH 5/5] Update sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp Co-authored-by: Nicolas Miller --- sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp b/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp index 861256f5f0ed1..d3916dfc3803b 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_time_trace.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - +// REQUIRES: (opencl || level_zero) // RUN: %{build} -o %t.out // RUN: %{run} %t.out | FileCheck %s