-
Notifications
You must be signed in to change notification settings - Fork 794
[SYCL] Bring back RTC support for AMD and Nvidia GPU targets #19342
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e68f896
90d9736
ba63ac4
306ce76
53e4655
3c2a6b1
e232425
37dd687
8a10225
66986e2
f9693ea
f00d2a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1162,6 +1162,20 @@ void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args, | |
HostTC.AddIAMCUIncludeArgs(Args, CC1Args); | ||
} | ||
|
||
llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> | ||
CudaToolChain::getDeviceLibs( | ||
const llvm::opt::ArgList &DriverArgs, | ||
const Action::OffloadKind DeviceOffloadingKind) const { | ||
StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); | ||
std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch); | ||
if (LibDeviceFile.empty()) { | ||
getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this PR also adds the first and only callsite for this method, and we unconditionally set |
||
return {}; | ||
} | ||
|
||
return {BitCodeLibraryInfo{LibDeviceFile}}; | ||
} | ||
|
||
SanitizerMask CudaToolChain::getSupportedSanitizers() const { | ||
// The CudaToolChain only supports sanitizers in the sense that it allows | ||
// sanitizer arguments on the command line if they are supported by the host | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,12 +8,16 @@ | |
|
||
#include "DeviceCompilation.h" | ||
#include "ESIMD.h" | ||
#include "JITBinaryInfo.h" | ||
#include "translation/Translation.h" | ||
|
||
#include <clang/Basic/DiagnosticDriver.h> | ||
#include <clang/Basic/Version.h> | ||
#include <clang/CodeGen/CodeGenAction.h> | ||
#include <clang/Driver/Compilation.h> | ||
#include <clang/Driver/Driver.h> | ||
#include <clang/Driver/Options.h> | ||
#include <clang/Driver/ToolChain.h> | ||
#include <clang/Frontend/ChainedDiagnosticConsumer.h> | ||
#include <clang/Frontend/CompilerInstance.h> | ||
#include <clang/Frontend/FrontendActions.h> | ||
|
@@ -52,6 +56,7 @@ using namespace llvm::opt; | |
using namespace llvm::sycl; | ||
using namespace llvm::module_split; | ||
using namespace llvm::util; | ||
using namespace llvm::vfs; | ||
using namespace jit_compiler; | ||
|
||
#ifdef _GNU_SOURCE | ||
|
@@ -313,7 +318,7 @@ class LLVMDiagnosticWrapper : public llvm::DiagnosticHandler { | |
} // anonymous namespace | ||
|
||
static void adjustArgs(const InputArgList &UserArgList, | ||
const std::string &DPCPPRoot, | ||
const std::string &DPCPPRoot, BinaryFormat Format, | ||
SmallVectorImpl<std::string> &CommandLine) { | ||
DerivedArgList DAL{UserArgList}; | ||
const auto &OptTable = getDriverOptTable(); | ||
|
@@ -326,6 +331,17 @@ static void adjustArgs(const InputArgList &UserArgList, | |
// unused argument warning. | ||
DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_Qunused_arguments)); | ||
|
||
if (Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN) { | ||
auto [CPU, Features] = | ||
Translator::getTargetCPUAndFeatureAttrs(nullptr, "", Format); | ||
(void)Features; | ||
StringRef OT = Format == BinaryFormat::PTX ? "nvptx64-nvidia-cuda" | ||
: "amdgcn-amd-amdhsa"; | ||
DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_fsycl_targets_EQ), OT); | ||
DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_Xsycl_backend_EQ), OT); | ||
DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_offload_arch_EQ), CPU); | ||
} | ||
|
||
ArgStringList ASL; | ||
for_each(DAL, [&DAL, &ASL](Arg *A) { A->render(DAL, ASL); }); | ||
for_each(UserArgList, | ||
|
@@ -362,10 +378,9 @@ static void setupTool(ClangTool &Tool, const std::string &DPCPPRoot, | |
}); | ||
} | ||
|
||
Expected<std::string> | ||
jit_compiler::calculateHash(InMemoryFile SourceFile, | ||
View<InMemoryFile> IncludeFiles, | ||
const InputArgList &UserArgList) { | ||
Expected<std::string> jit_compiler::calculateHash( | ||
InMemoryFile SourceFile, View<InMemoryFile> IncludeFiles, | ||
const InputArgList &UserArgList, BinaryFormat Format) { | ||
TimeTraceScope TTS{"calculateHash"}; | ||
|
||
const std::string &DPCPPRoot = getDPCPPRoot(); | ||
|
@@ -374,7 +389,7 @@ jit_compiler::calculateHash(InMemoryFile SourceFile, | |
} | ||
|
||
SmallVector<std::string> CommandLine; | ||
adjustArgs(UserArgList, DPCPPRoot, CommandLine); | ||
adjustArgs(UserArgList, DPCPPRoot, Format, CommandLine); | ||
|
||
FixedCompilationDatabase DB{".", CommandLine}; | ||
ClangTool Tool{DB, {SourceFile.Path}}; | ||
|
@@ -400,11 +415,10 @@ jit_compiler::calculateHash(InMemoryFile SourceFile, | |
return createStringError("Calculating source hash failed"); | ||
} | ||
|
||
Expected<ModuleUPtr> | ||
jit_compiler::compileDeviceCode(InMemoryFile SourceFile, | ||
View<InMemoryFile> IncludeFiles, | ||
const InputArgList &UserArgList, | ||
std::string &BuildLog, LLVMContext &Context) { | ||
Expected<ModuleUPtr> jit_compiler::compileDeviceCode( | ||
InMemoryFile SourceFile, View<InMemoryFile> IncludeFiles, | ||
const InputArgList &UserArgList, std::string &BuildLog, | ||
LLVMContext &Context, BinaryFormat Format) { | ||
TimeTraceScope TTS{"compileDeviceCode"}; | ||
|
||
const std::string &DPCPPRoot = getDPCPPRoot(); | ||
|
@@ -413,7 +427,7 @@ jit_compiler::compileDeviceCode(InMemoryFile SourceFile, | |
} | ||
|
||
SmallVector<std::string> CommandLine; | ||
adjustArgs(UserArgList, DPCPPRoot, CommandLine); | ||
adjustArgs(UserArgList, DPCPPRoot, Format, CommandLine); | ||
|
||
FixedCompilationDatabase DB{".", CommandLine}; | ||
ClangTool Tool{DB, {SourceFile.Path}}; | ||
|
@@ -431,12 +445,22 @@ jit_compiler::compileDeviceCode(InMemoryFile SourceFile, | |
return createStringError(BuildLog); | ||
} | ||
|
||
// This function is a simplified copy of the device library selection process in | ||
// `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V target | ||
// (no AoT, no third-party GPUs, no native CPU). Keep in sync! | ||
// This function is a simplified copy of the device library selection process | ||
// in `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V, or | ||
// GPU targets (no AoT, no native CPU). Keep in sync! | ||
static bool getDeviceLibraries(const ArgList &Args, | ||
SmallVectorImpl<std::string> &LibraryList, | ||
DiagnosticsEngine &Diags) { | ||
DiagnosticsEngine &Diags, BinaryFormat Format) { | ||
// For CUDA/HIP we only need devicelib, early exit here. | ||
if (Format == BinaryFormat::PTX) { | ||
LibraryList.push_back( | ||
Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc")); | ||
return false; | ||
} else if (Format == BinaryFormat::AMDGCN) { | ||
LibraryList.push_back(Args.MakeArgString("devicelib-amdgcn-amd-amdhsa.bc")); | ||
return false; | ||
} | ||
|
||
struct DeviceLibOptInfo { | ||
StringRef DeviceLibName; | ||
StringRef DeviceLibOption; | ||
|
@@ -541,7 +565,8 @@ static Expected<ModuleUPtr> loadBitcodeLibrary(StringRef LibPath, | |
|
||
Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, | ||
const InputArgList &UserArgList, | ||
std::string &BuildLog) { | ||
std::string &BuildLog, | ||
BinaryFormat Format) { | ||
TimeTraceScope TTS{"linkDeviceLibraries"}; | ||
|
||
const std::string &DPCPPRoot = getDPCPPRoot(); | ||
|
@@ -556,11 +581,29 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, | |
/* ShouldOwnClient=*/false); | ||
|
||
SmallVector<std::string> LibNames; | ||
bool FoundUnknownLib = getDeviceLibraries(UserArgList, LibNames, Diags); | ||
const bool FoundUnknownLib = | ||
getDeviceLibraries(UserArgList, LibNames, Diags, Format); | ||
if (FoundUnknownLib) { | ||
return createStringError("Could not determine list of device libraries: %s", | ||
BuildLog.c_str()); | ||
} | ||
const bool IsCudaHIP = | ||
Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN; | ||
if (IsCudaHIP) { | ||
// Based on the OS and the format decide on the version of libspirv. | ||
// NOTE: this will be problematic if cross-compiling between OSes. | ||
std::string Libclc{"clc/"}; | ||
Libclc.append( | ||
#ifdef _WIN32 | ||
"remangled-l32-signed_char.libspirv-" | ||
#else | ||
"remangled-l64-signed_char.libspirv-" | ||
#endif | ||
); | ||
Libclc.append(Format == BinaryFormat::PTX ? "nvptx64-nvidia-cuda.bc" | ||
: "amdgcn-amd-amdhsa.bc"); | ||
LibNames.push_back(Libclc); | ||
} | ||
|
||
LLVMContext &Context = Module.getContext(); | ||
for (const std::string &LibName : LibNames) { | ||
|
@@ -578,6 +621,72 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, | |
} | ||
} | ||
|
||
// For GPU targets we need to link against vendor provided libdevice. | ||
if (IsCudaHIP) { | ||
std::string Argv0 = DPCPPRoot + "/bin/clang++"; | ||
Triple T{Module.getTargetTriple()}; | ||
IntrusiveRefCntPtr<OverlayFileSystem> OFS{ | ||
new OverlayFileSystem{getRealFileSystem()}}; | ||
IntrusiveRefCntPtr<InMemoryFileSystem> VFS{new InMemoryFileSystem}; | ||
std::string CppFileName{"a.cpp"}; | ||
VFS->addFile(CppFileName, /*ModificationTime=*/0, | ||
MemoryBuffer::getMemBuffer("", "")); | ||
OFS->pushOverlay(VFS); | ||
Driver D{Argv0, T.getTriple(), Diags, "dpcpp compiler driver", OFS}; | ||
|
||
SmallVector<std::string> CommandLine; | ||
CommandLine.push_back(Argv0); | ||
adjustArgs(UserArgList, DPCPPRoot, Format, CommandLine); | ||
CommandLine.push_back(CppFileName); | ||
SmallVector<const char *> CommandLineCStr(CommandLine.size()); | ||
llvm::transform(CommandLine, CommandLineCStr.begin(), | ||
[](const auto &S) { return S.c_str(); }); | ||
|
||
Compilation *C = D.BuildCompilation(CommandLineCStr); | ||
if (!C) { | ||
return createStringError("Unable to construct driver for CUDA/HIP"); | ||
} | ||
|
||
const ToolChain *OffloadTC = | ||
C->getSingleOffloadToolChain<Action::OFK_SYCL>(); | ||
InputArgList EmptyArgList; | ||
auto Archs = | ||
D.getOffloadArchs(*C, EmptyArgList, Action::OFK_SYCL, OffloadTC); | ||
assert(Archs.size() == 1 && | ||
"Offload toolchain should be configured to single architecture"); | ||
StringRef CPU = *Archs.begin(); | ||
|
||
// Pass only `-march=` or `-mcpu=` with the GPU arch determined by the | ||
// driver to `getDeviceLibs`. | ||
DerivedArgList CPUArgList{EmptyArgList}; | ||
if (Format == BinaryFormat::PTX) { | ||
CPUArgList.AddJoinedArg(nullptr, D.getOpts().getOption(OPT_march_EQ), | ||
CPU); | ||
} else { | ||
CPUArgList.AddJoinedArg(nullptr, D.getOpts().getOption(OPT_mcpu_EQ), CPU); | ||
} | ||
|
||
SmallVector<ToolChain::BitCodeLibraryInfo, 12> CommonDeviceLibs = | ||
OffloadTC->getDeviceLibs(CPUArgList, Action::OffloadKind::OFK_SYCL); | ||
Comment on lines
+669
to
+670
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note to reviewers: This (and the setup of the |
||
if (CommonDeviceLibs.empty()) { | ||
return createStringError("Unable to find common device libraries"); | ||
} | ||
|
||
for (auto &Lib : CommonDeviceLibs) { | ||
ModuleUPtr LibModule; | ||
if (auto Error = | ||
loadBitcodeLibrary(Lib.Path, Context).moveInto(LibModule)) { | ||
return Error; | ||
} | ||
|
||
if (Linker::linkModules(Module, std::move(LibModule), | ||
Linker::LinkOnlyNeeded)) { | ||
return createStringError("Unable to link device library %s: %s", | ||
Lib.Path.c_str(), BuildLog.c_str()); | ||
} | ||
} | ||
} | ||
|
||
return Error::success(); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Kind ping @intel/dpcpp-clang-driver-reviewers -- any concerns about adding this method?