8
8
9
9
#include " DeviceCompilation.h"
10
10
#include " ESIMD.h"
11
- #include " JITBinaryInfo.h"
12
- #include " translation/Translation.h"
13
11
14
- #include < Driver/ToolChains/AMDGPU.h>
15
- #include < Driver/ToolChains/Cuda.h>
16
- #include < Driver/ToolChains/LazyDetector.h>
17
12
#include < clang/Basic/DiagnosticDriver.h>
18
13
#include < clang/Basic/Version.h>
19
14
#include < clang/CodeGen/CodeGenAction.h>
20
15
#include < clang/Driver/Compilation.h>
21
- #include < clang/Driver/Driver.h>
22
16
#include < clang/Driver/Options.h>
23
17
#include < clang/Frontend/ChainedDiagnosticConsumer.h>
24
18
#include < clang/Frontend/CompilerInstance.h>
@@ -184,8 +178,7 @@ class RTCToolActionBase : public ToolAction {
184
178
assert (!hasExecuted () && " Action should only be invoked on a single file" );
185
179
186
180
// Create a compiler instance to handle the actual work.
187
- CompilerInstance Compiler (std::move (Invocation),
188
- std::move (PCHContainerOps));
181
+ CompilerInstance Compiler (std::move (Invocation), std::move (PCHContainerOps));
189
182
Compiler.setFileManager (Files);
190
183
// Suppress summary with number of warnings and errors being printed to
191
184
// stdout.
@@ -319,7 +312,7 @@ class LLVMDiagnosticWrapper : public llvm::DiagnosticHandler {
319
312
} // anonymous namespace
320
313
321
314
static void adjustArgs (const InputArgList &UserArgList,
322
- const std::string &DPCPPRoot, BinaryFormat Format,
315
+ const std::string &DPCPPRoot,
323
316
SmallVectorImpl<std::string> &CommandLine) {
324
317
DerivedArgList DAL{UserArgList};
325
318
const auto &OptTable = getDriverOptTable ();
@@ -332,23 +325,6 @@ static void adjustArgs(const InputArgList &UserArgList,
332
325
// unused argument warning.
333
326
DAL.AddFlagArg (nullptr , OptTable.getOption (OPT_Qunused_arguments));
334
327
335
- if (Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN) {
336
- auto [CPU, Features] =
337
- Translator::getTargetCPUAndFeatureAttrs (nullptr , " " , Format);
338
- (void )Features;
339
- if (Format == BinaryFormat::AMDGCN) {
340
- DAL.AddJoinedArg (nullptr , OptTable.getOption (OPT_fsycl_targets_EQ),
341
- " amdgcn-amd-amdhsa" );
342
- DAL.AddJoinedArg (nullptr , OptTable.getOption (OPT_Xsycl_backend_EQ),
343
- " amdgcn-amd-amdhsa" );
344
- DAL.AddJoinedArg (nullptr , OptTable.getOption (OPT_offload_arch_EQ), CPU);
345
- } else {
346
- DAL.AddJoinedArg (nullptr , OptTable.getOption (OPT_fsycl_targets_EQ),
347
- " nvptx64-nvidia-cuda" );
348
- DAL.AddFlagArg (nullptr , OptTable.getOption (OPT_Xsycl_backend));
349
- DAL.AddJoinedArg (nullptr , OptTable.getOption (OPT_cuda_gpu_arch_EQ), CPU);
350
- }
351
- }
352
328
ArgStringList ASL;
353
329
for_each (DAL, [&DAL, &ASL](Arg *A) { A->render (DAL, ASL); });
354
330
for_each (UserArgList,
@@ -385,9 +361,10 @@ static void setupTool(ClangTool &Tool, const std::string &DPCPPRoot,
385
361
});
386
362
}
387
363
388
- Expected<std::string> jit_compiler::calculateHash (
389
- InMemoryFile SourceFile, View<InMemoryFile> IncludeFiles,
390
- const InputArgList &UserArgList, BinaryFormat Format) {
364
+ Expected<std::string>
365
+ jit_compiler::calculateHash (InMemoryFile SourceFile,
366
+ View<InMemoryFile> IncludeFiles,
367
+ const InputArgList &UserArgList) {
391
368
TimeTraceScope TTS{" calculateHash" };
392
369
393
370
const std::string &DPCPPRoot = getDPCPPRoot ();
@@ -396,7 +373,7 @@ Expected<std::string> jit_compiler::calculateHash(
396
373
}
397
374
398
375
SmallVector<std::string> CommandLine;
399
- adjustArgs (UserArgList, DPCPPRoot, Format, CommandLine);
376
+ adjustArgs (UserArgList, DPCPPRoot, CommandLine);
400
377
401
378
FixedCompilationDatabase DB{" ." , CommandLine};
402
379
ClangTool Tool{DB, {SourceFile.Path }};
@@ -422,10 +399,11 @@ Expected<std::string> jit_compiler::calculateHash(
422
399
return createStringError (" Calculating source hash failed" );
423
400
}
424
401
425
- Expected<ModuleUPtr> jit_compiler::compileDeviceCode (
426
- InMemoryFile SourceFile, View<InMemoryFile> IncludeFiles,
427
- const InputArgList &UserArgList, std::string &BuildLog,
428
- LLVMContext &Context, BinaryFormat Format) {
402
+ Expected<ModuleUPtr>
403
+ jit_compiler::compileDeviceCode (InMemoryFile SourceFile,
404
+ View<InMemoryFile> IncludeFiles,
405
+ const InputArgList &UserArgList,
406
+ std::string &BuildLog, LLVMContext &Context) {
429
407
TimeTraceScope TTS{" compileDeviceCode" };
430
408
431
409
const std::string &DPCPPRoot = getDPCPPRoot ();
@@ -434,7 +412,7 @@ Expected<ModuleUPtr> jit_compiler::compileDeviceCode(
434
412
}
435
413
436
414
SmallVector<std::string> CommandLine;
437
- adjustArgs (UserArgList, DPCPPRoot, Format, CommandLine);
415
+ adjustArgs (UserArgList, DPCPPRoot, CommandLine);
438
416
439
417
FixedCompilationDatabase DB{" ." , CommandLine};
440
418
ClangTool Tool{DB, {SourceFile.Path }};
@@ -452,22 +430,12 @@ Expected<ModuleUPtr> jit_compiler::compileDeviceCode(
452
430
return createStringError (BuildLog);
453
431
}
454
432
455
- // This function is a simplified copy of the device library selection process
456
- // in `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V, or
457
- // GPU targets (no AoT, no native CPU). Keep in sync!
433
+ // This function is a simplified copy of the device library selection process in
434
+ // `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V target
435
+ // (no AoT, no third-party GPUs , no native CPU). Keep in sync!
458
436
static bool getDeviceLibraries (const ArgList &Args,
459
437
SmallVectorImpl<std::string> &LibraryList,
460
- DiagnosticsEngine &Diags, BinaryFormat Format) {
461
- // For CUDA/HIP we only need devicelib, early exit here.
462
- if (Format == BinaryFormat::PTX) {
463
- LibraryList.push_back (
464
- Args.MakeArgString (" devicelib-nvptx64-nvidia-cuda.bc" ));
465
- return false ;
466
- } else if (Format == BinaryFormat::AMDGCN) {
467
- LibraryList.push_back (Args.MakeArgString (" devicelib-amdgcn-amd-amdhsa.bc" ));
468
- return false ;
469
- }
470
-
438
+ DiagnosticsEngine &Diags) {
471
439
struct DeviceLibOptInfo {
472
440
StringRef DeviceLibName;
473
441
StringRef DeviceLibOption;
@@ -572,8 +540,7 @@ static Expected<ModuleUPtr> loadBitcodeLibrary(StringRef LibPath,
572
540
573
541
Error jit_compiler::linkDeviceLibraries (llvm::Module &Module,
574
542
const InputArgList &UserArgList,
575
- std::string &BuildLog,
576
- BinaryFormat Format) {
543
+ std::string &BuildLog) {
577
544
TimeTraceScope TTS{" linkDeviceLibraries" };
578
545
579
546
const std::string &DPCPPRoot = getDPCPPRoot ();
@@ -588,29 +555,11 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module,
588
555
/* ShouldOwnClient=*/ false );
589
556
590
557
SmallVector<std::string> LibNames;
591
- const bool FoundUnknownLib =
592
- getDeviceLibraries (UserArgList, LibNames, Diags, Format);
558
+ bool FoundUnknownLib = getDeviceLibraries (UserArgList, LibNames, Diags);
593
559
if (FoundUnknownLib) {
594
560
return createStringError (" Could not determine list of device libraries: %s" ,
595
561
BuildLog.c_str ());
596
562
}
597
- const bool IsCudaHIP =
598
- Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN;
599
- if (IsCudaHIP) {
600
- // Based on the OS and the format decide on the version of libspirv.
601
- // NOTE: this will be problematic if cross-compiling between OSes.
602
- std::string Libclc{" clc/" };
603
- Libclc.append (
604
- #ifdef _WIN32
605
- " remangled-l32-signed_char.libspirv-"
606
- #else
607
- " remangled-l64-signed_char.libspirv-"
608
- #endif
609
- );
610
- Libclc.append (Format == BinaryFormat::PTX ? " nvptx64-nvidia-cuda.bc"
611
- : " amdgcn-amd-amdhsa.bc" );
612
- LibNames.push_back (Libclc);
613
- }
614
563
615
564
LLVMContext &Context = Module.getContext ();
616
565
for (const std::string &LibName : LibNames) {
@@ -628,58 +577,6 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module,
628
577
}
629
578
}
630
579
631
- // For GPU targets we need to link against vendor provided libdevice.
632
- if (IsCudaHIP) {
633
- Triple T{Module.getTargetTriple ()};
634
- Driver D{(Twine (DPCPPRoot) + " /bin/clang++" ).str (), T.getTriple (), Diags};
635
- auto [CPU, Features] =
636
- Translator::getTargetCPUAndFeatureAttrs (&Module, " " , Format);
637
- (void )Features;
638
- // Helper lambda to link modules.
639
- auto LinkInLib = [&](const StringRef LibDevice) -> Error {
640
- ModuleUPtr LibDeviceModule;
641
- if (auto Error = loadBitcodeLibrary (LibDevice, Context)
642
- .moveInto (LibDeviceModule)) {
643
- return Error;
644
- }
645
- if (Linker::linkModules (Module, std::move (LibDeviceModule),
646
- Linker::LinkOnlyNeeded)) {
647
- return createStringError (" Unable to link libdevice: %s" ,
648
- BuildLog.c_str ());
649
- }
650
- return Error::success ();
651
- };
652
- SmallVector<std::string, 12 > LibDeviceFiles;
653
- if (Format == BinaryFormat::PTX) {
654
- // For NVPTX we can get away with CudaInstallationDetector.
655
- LazyDetector<CudaInstallationDetector> CudaInstallation{D, T,
656
- UserArgList};
657
- auto LibDevice = CudaInstallation->getLibDeviceFile (CPU);
658
- if (LibDevice.empty ()) {
659
- return createStringError (" Unable to find Cuda libdevice" );
660
- }
661
- LibDeviceFiles.push_back (LibDevice);
662
- } else {
663
- // AMDGPU requires entire toolchain in order to provide all common bitcode
664
- // libraries.
665
- clang::driver::toolchains::ROCMToolChain TC (D, T, UserArgList);
666
- auto CommonDeviceLibs = TC.getCommonDeviceLibNames (
667
- UserArgList, CPU, Action::OffloadKind::OFK_SYCL, false );
668
- if (CommonDeviceLibs.empty ()) {
669
- return createStringError (" Unable to find ROCm common device libraries" );
670
- }
671
- for (auto &Lib : CommonDeviceLibs) {
672
- LibDeviceFiles.push_back (Lib.Path );
673
- }
674
- }
675
- for (auto &LibDeviceFile : LibDeviceFiles) {
676
- // llvm::Error converts to false on success.
677
- if (auto Error = LinkInLib (LibDeviceFile)) {
678
- return Error;
679
- }
680
- }
681
- }
682
-
683
580
return Error::success ();
684
581
}
685
582
0 commit comments