Skip to content

Commit 2f0abc6

Browse files
author
Martin Wehking
authored
Create one bitcode library for AMD (#15055)
Enable compilation of libdevice for AMD by adding AMDGCN to macro guarded code parts in libdevice for enabling e.g. standard library math function. Add compilation workflow to SYCLLibdevice.cmake for AMD. Follow the compilation mechanism for NVPTX (56a6ae2) and create a single bitcode library file. Do not select builtin LLVM intrinsics for AMDGCN by default to ensure that stdlib functions can be found when linking against libdevice. Ensure that the clang tests check for the correctness of the new clang driver actions and check if the driver still links the device code against the itt device libraries when device library linkage has been excluded. Fix a compilation error of Intel math function libraries for MSVC when targeting AMD. Include "device.h" before including "device_imf.hpp" to avoid the inclusion of <type_traits>, which failed with a redefinition of symbols error.
1 parent 340e133 commit 2f0abc6

40 files changed

+175
-67
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2738,7 +2738,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
27382738
ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
27392739
}
27402740
if (GenerateIntrinsics &&
2741-
!(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) {
2741+
!(getLangOpts().SYCLIsDevice && (getTarget().getTriple().isNVPTX() ||
2742+
getTarget().getTriple().isAMDGCN()))) {
27422743
switch (BuiltinIDIfNoAsmLabel) {
27432744
case Builtin::BIacos:
27442745
case Builtin::BIacosf:

clang/lib/Driver/Driver.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5555,7 +5555,7 @@ class OffloadingActionBuilder final {
55555555
// AOT compilation.
55565556
bool SYCLDeviceLibLinked = false;
55575557
Action *NativeCPULib = nullptr;
5558-
if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) {
5558+
if (IsSPIR || IsNVPTX || IsAMDGCN || IsSYCLNativeCPU) {
55595559
bool UseJitLink =
55605560
IsSPIR &&
55615561
Args.hasFlag(options::OPT_fsycl_device_lib_jit_link,

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,
165165

166166
// spir64 target is actually JIT compilation, so we defer selection of
167167
// bfloat16 libraries to runtime. For AOT we need libraries, but skip
168-
// for Nvidia.
169-
NeedLibs =
170-
Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX();
168+
// for Nvidia and AMD.
169+
NeedLibs = Triple.getSubArch() != llvm::Triple::NoSubArch &&
170+
!Triple.isNVPTX() && !Triple.isAMDGCN();
171171
UseNative = false;
172172
if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen &&
173173
C.hasOffloadToolChain<Action::OFK_SYCL>()) {
@@ -212,9 +212,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
212212
SmallVector<std::string, 8> LibraryList;
213213
const llvm::opt::ArgList &Args = C.getArgs();
214214

215-
// For NVPTX we only use one single bitcode library and ignore
215+
// For NVPTX and AMDGCN we only use one single bitcode library and ignore
216216
// manually specified SYCL device libraries.
217-
bool IgnoreSingleLibs = TargetTriple.isNVPTX();
217+
bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN();
218218

219219
struct DeviceLibOptInfo {
220220
StringRef DeviceLibName;
@@ -278,6 +278,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
278278
if (TargetTriple.isNVPTX() && IgnoreSingleLibs)
279279
LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc"));
280280

281+
if (TargetTriple.isAMDGCN() && IgnoreSingleLibs)
282+
LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc"));
283+
281284
if (IgnoreSingleLibs)
282285
return LibraryList;
283286

clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
99
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s
10+
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s
11+
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s
1012

1113
#include "Inputs/sycl.hpp"
1214

clang/test/Driver/Inputs/SYCL/lib/devicelib--amd.bc

Whitespace-only changes.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Tests specific to `-fsycl-targets=amdgcn-amd-amdhsa`
2+
// Verify that the correct devicelib linking actions are spawned by the driver.
3+
// Check also if the correct warnings are generated.
4+
5+
// UNSUPPORTED: system-windows
6+
7+
// Check if internal libraries are still linked against when linkage of all
8+
// device libs is manually excluded.
9+
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all --sysroot=%S/Inputs/SYCL \
10+
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
11+
// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s
12+
13+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
14+
// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, gfx906)
15+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
16+
// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, gfx906)
17+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
18+
// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, gfx906)
19+
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
20+
// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, gfx906)
21+
22+
// Check that the -fsycl-device-lib flag has no effect when "all" is specified.
23+
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all --sysroot=%S/Inputs/SYCL \
24+
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
25+
// RUN: | FileCheck -check-prefix=CHK-ALL %s
26+
27+
// Check that the -fsycl-device-lib flag has no effect when subsets of libs
28+
// are specified.
29+
// RUN: %clangxx -ccc-print-phases -std=c++11 --sysroot=%S/Inputs/SYCL \
30+
// RUN: -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
31+
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
32+
// RUN: | FileCheck -check-prefix=CHK-ALL %s
33+
34+
// Check that -fno-sycl-device-lib is ignored when it does not contain "all".
35+
// A warning should be printed that the flag got ignored.
36+
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl --sysroot=%S/Inputs/SYCL \
37+
// RUN: -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
38+
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
39+
// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s
40+
41+
// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib='
42+
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
43+
// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, gfx906)
44+

clang/test/Driver/sycl-offload-amdgcn.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
/// Check phases w/out specifying a compute capability.
2727
// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \
28-
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
28+
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -fsycl-device-lib=all -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
2929
// RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s
3030
// CHK-PHASES-NO-CC: 0: input, "{{.*}}", c++, (host-sycl)
3131
// CHK-PHASES-NO-CC: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
@@ -37,17 +37,19 @@
3737
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
3838
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
3939
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906)
40-
// CHK-PHASES-NO-CC: 10: sycl-post-link, {9}, ir, (device-sycl, gfx906)
41-
// CHK-PHASES-NO-CC: 11: file-table-tform, {10}, ir, (device-sycl, gfx906)
42-
// CHK-PHASES-NO-CC: 12: backend, {11}, assembler, (device-sycl, gfx906)
43-
// CHK-PHASES-NO-CC: 13: assembler, {12}, object, (device-sycl, gfx906)
44-
// CHK-PHASES-NO-CC: 14: linker, {13}, image, (device-sycl, gfx906)
45-
// CHK-PHASES-NO-CC: 15: linker, {14}, hip-fatbin, (device-sycl, gfx906)
46-
// CHK-PHASES-NO-CC: 16: foreach, {11, 15}, hip-fatbin, (device-sycl, gfx906)
47-
// CHK-PHASES-NO-CC: 17: file-table-tform, {10, 16}, tempfiletable, (device-sycl, gfx906)
48-
// CHK-PHASES-NO-CC: 18: clang-offload-wrapper, {17}, object, (device-sycl, gfx906)
49-
// CHK-PHASES-NO-CC: 19: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {18}, object
50-
// CHK-PHASES-NO-CC: 20: linker, {8, 19}, image, (host-sycl)
40+
// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
41+
// CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906)
42+
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906)
43+
// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906)
44+
// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, gfx906)
45+
// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, gfx906)
46+
// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl, gfx906)
47+
// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl, gfx906)
48+
// CHK-PHASES-NO-CC: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx906)
49+
// CHK-PHASES-NO-CC: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx906)
50+
// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx906)
51+
// CHK-PHASES-NO-CC: 21: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {20}, object
52+
// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl)
5153

5254
/// Check that we only unbundle an archive once.
5355
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -nogpulib \

libdevice/cmake/modules/SYCLLibdevice.cmake

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ foreach(filetype IN LISTS filetypes)
6565
add_dependencies(libsycldevice libsycldevice-${filetype})
6666
endforeach()
6767

68-
# For NVPTX each device libary is compiled into a single bitcode
68+
# For NVPTX and AMDGCN each device libary is compiled into a single bitcode
6969
# file and all files created this way are linked into one large bitcode
7070
# library.
7171
# Additional compilation options are needed for compiling each device library.
@@ -76,6 +76,13 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
7676
"-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib")
7777
set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false")
7878
endif()
79+
if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
80+
list(APPEND devicelib_arch amd)
81+
set(compile_opts_amd "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa"
82+
"-Xsycl-target-backend" "--offload-arch=gfx940")
83+
set(opt_flags_amd "-O3" "--amdgpu-oclc-reflect-enable=false")
84+
endif()
85+
7986

8087
set(spv_device_compile_opts -fsycl-device-only -fsycl-device-obj=spirv)
8188
set(bc_device_compile_opts -fsycl-device-only -fsycl-device-obj=llvmir)
@@ -444,7 +451,7 @@ foreach(dtype IN ITEMS bf16 fp32 fp64)
444451
endforeach()
445452
endforeach()
446453

447-
# Add device fallback imf libraries for the CUDA target.
454+
# Add device fallback imf libraries for the NVPTX and AMD targets.
448455
# The output files are bitcode.
449456
foreach(arch IN LISTS devicelib_arch)
450457
foreach(dtype IN ITEMS bf16 fp32 fp64)
@@ -464,7 +471,7 @@ foreach(arch IN LISTS devicelib_arch)
464471
endforeach()
465472
endforeach()
466473

467-
# Create one large bitcode file for the CUDA targets.
474+
# Create one large bitcode file for the CUDA and AMD targets.
468475
# Use all the files collected in the respective global properties.
469476
foreach(arch IN LISTS devicelib_arch)
470477
get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch})

libdevice/cmath_wrapper.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
#include "device_math.h"
1010

11-
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
11+
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
12+
defined(__AMDGCN__)
1213

1314
DEVICE_EXTERN_C_INLINE
1415
int abs(int x) { return __devicelib_abs(x); }
@@ -199,4 +200,4 @@ DEVICE_EXTERN_C_INLINE
199200
float rintf(float x) { return __nv_rintf(x); }
200201
#endif // __NVPTX__
201202

202-
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
203+
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__

libdevice/cmath_wrapper_fp64.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99

1010
#include "device_math.h"
1111

12-
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
12+
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
13+
defined(__AMDGCN__)
1314

1415
// All exported functions in math and complex device libraries are weak
1516
// reference. If users provide their own math or complex functions(with
@@ -496,4 +497,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1
496497
}
497498
}
498499
#endif // defined(_WIN32)
499-
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
500+
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__

0 commit comments

Comments
 (0)