Skip to content

Commit 2dc3c06

Browse files
authored
[SYCL] Generalize GlobalOffset and enable it for AMDGPU (#5855)
The purpose of this patch is to generalize SYCL global offset pass and enable it for AMDGPU. * enable global offset in AMD's HIP * decorate SYCL kernel with dedicated MDNode: This removes the need for command line options added by the SYCL driver, discussed here: [SYCL] Generalize local accessor to shared mem pass #5149 (comment) * extract common helpers for local accessor and global offset passes * generalize the pass * introduce builtin_amdgcn_implicit_offset and enable the pass for ADMGPU * implement spirv_GlobalOffset_[x,y,z] * update the docs The main deviation from the NVPTX is the need for supporting address spaces. For AMD kernel arguments reside in constant address space, which for the case with offset forces a copy to private AS, in order to keep the call-graph interface coherent (we can't allocate const address space for the case without offset). Corresponding test-suit PR: intel/llvm-test-suite#941
1 parent 05fe5ae commit 2dc3c06

File tree

45 files changed

+843
-294
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+843
-294
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,5 +316,10 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x16_bf16, "V16fV4sV8sV16fiIiIi",
316316
TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_16x16x64_i8, "V4iV2iV4iV4iiIiIi", "nc", "mai-insts")
317317
TARGET_BUILTIN(__builtin_amdgcn_smfmac_i32_32x32x32_i8, "V16iV2iV4iV16iiIiIi", "nc", "mai-insts")
318318

319+
//===----------------------------------------------------------------------===//
320+
// SYCL builtin.
321+
//===----------------------------------------------------------------------===//
322+
BUILTIN(__builtin_amdgcn_implicit_offset, "Ui*5", "nc")
323+
319324
#undef BUILTIN
320325
#undef TARGET_BUILTIN

clang/lib/CodeGen/TargetInfo.cpp

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,26 @@ static bool occupiesMoreThan(CodeGenTypes &cgt,
151151
return (intCount + fpCount > maxAllRegisters);
152152
}
153153

154+
/// Helper function for AMDGCN and NVVM targets, adds a NamedMDNode with GV,
155+
/// Name, and Operand as operands, and adds the resulting MDNode to the
156+
/// AnnotationName MDNode.
157+
static void addAMDGCOrNVVMMetadata(const char *AnnotationName,
158+
llvm::GlobalValue *GV, StringRef Name,
159+
int Operand) {
160+
llvm::Module *M = GV->getParent();
161+
llvm::LLVMContext &Ctx = M->getContext();
162+
163+
// Get annotations metadata node.
164+
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(AnnotationName);
165+
166+
llvm::Metadata *MDVals[] = {
167+
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
168+
llvm::ConstantAsMetadata::get(
169+
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
170+
// Append metadata to annotations node.
171+
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
172+
}
173+
154174
bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
155175
llvm::Type *eltTy,
156176
unsigned numElts) const {
@@ -7327,18 +7347,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
73277347

73287348
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
73297349
StringRef Name, int Operand) {
7330-
llvm::Module *M = GV->getParent();
7331-
llvm::LLVMContext &Ctx = M->getContext();
7332-
7333-
// Get "nvvm.annotations" metadata node
7334-
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
7335-
7336-
llvm::Metadata *MDVals[] = {
7337-
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
7338-
llvm::ConstantAsMetadata::get(
7339-
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
7340-
// Append metadata to nvvm.annotations
7341-
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
7350+
addAMDGCOrNVVMMetadata("nvvm.annotations", GV, Name, Operand);
73427351
}
73437352

73447353
bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
@@ -9308,6 +9317,12 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
93089317
llvm::Type *BlockTy) const override;
93099318
bool shouldEmitStaticExternCAliases() const override;
93109319
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
9320+
9321+
private:
9322+
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
9323+
// resulting MDNode to the amdgcn.annotations MDNode.
9324+
static void addAMDGCNMetadata(llvm::GlobalValue *GV, StringRef Name,
9325+
int Operand);
93119326
};
93129327
}
93139328

@@ -9324,6 +9339,11 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D,
93249339
cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()));
93259340
}
93269341

9342+
void AMDGPUTargetCodeGenInfo::addAMDGCNMetadata(llvm::GlobalValue *GV,
9343+
StringRef Name, int Operand) {
9344+
addAMDGCOrNVVMMetadata("amdgcn.annotations", GV, Name, Operand);
9345+
}
9346+
93279347
void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
93289348
const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const {
93299349
const auto *ReqdWGS =
@@ -9425,10 +9445,15 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
94259445

94269446
const bool IsHIPKernel =
94279447
M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();
9428-
94299448
if (IsHIPKernel)
94309449
F->addFnAttr("uniform-work-group-size", "true");
94319450

9451+
// Create !{<func-ref>, metadata !"kernel", i32 1} node for SYCL kernels.
9452+
const bool IsSYCLKernel =
9453+
FD && M.getLangOpts().SYCLIsDevice && FD->hasAttr<SYCLKernelAttr>();
9454+
if (IsSYCLKernel)
9455+
addAMDGCNMetadata(F, "kernel", 1);
9456+
94329457
if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
94339458
F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
94349459

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5842,12 +5842,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
58425842
CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
58435843
}
58445844

5845-
// Enable local accessor to shared memory pass for SYCL.
5846-
if (isa<BackendJobAction>(JA) && IsSYCLOffloadDevice &&
5847-
(Triple.isNVPTX() || Triple.isAMDGCN())) {
5848-
CmdArgs.push_back("-mllvm");
5849-
CmdArgs.push_back("-sycl-enable-local-accessor");
5850-
}
58515845
// These two are potentially updated by AddClangCLArgs.
58525846
codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo;
58535847
bool EmitCodeView = false;

clang/lib/Driver/ToolChains/HIPAMD.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
7878
const llvm::opt::ArgList &Args) const {
7979
// Construct lld command.
8080
// The output from ld.lld is an HSA code object file.
81-
ArgStringList LldArgs{"-flavor",
82-
"gnu",
83-
"--no-undefined",
84-
"-shared",
85-
"-plugin-opt=-amdgpu-internalize-symbols",
86-
"-plugin-opt=-sycl-enable-local-accessor"};
81+
ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", "-shared",
82+
"-plugin-opt=-amdgpu-internalize-symbols"};
8783

8884
auto &TC = getToolChain();
8985
auto &D = TC.getDriver();
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// RUN: %clang_cc1 -triple spir64-unknown-unknown -fsycl-is-device \
2+
// RUN: -S -emit-llvm %s -o %temp.ll
3+
// RUN: FileCheck -check-prefix=CHECK-SPIR --input-file %temp.ll %s
4+
5+
// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fsycl-is-device \
6+
// RUN: -S -emit-llvm %s -o %temp.ll
7+
// RUN: FileCheck -check-prefix=CHECK-NVPTX --input-file %temp.ll %s
8+
9+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -fsycl-is-device \
10+
// RUN: -S -emit-llvm %s -o %temp.ll
11+
// RUN: FileCheck -check-prefix=CHECK-AMDGCN --input-file %temp.ll %s
12+
13+
// The test makes sure that `[nnvm|amdgcn].annotations are correctly generated
14+
// only for their respective targets.
15+
16+
#include "Inputs/sycl.hpp"
17+
18+
sycl::handler H;
19+
20+
class Functor {
21+
public:
22+
void operator()() const {}
23+
};
24+
25+
// CHECK-SPIR-NOT: annotations =
26+
27+
// CHECK-NVPTX: nvvm.annotations = !{[[FIRST:![0-9]]], [[SECOND:![0-9]]]}
28+
// CHECK-NVPTX: [[FIRST]] = !{void ()* @_ZTS7Functor, !"kernel", i32 1}
29+
// CHECK-NVPTX: [[SECOND]] = !{void ()* @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE0_clES2_E5foo_2, !"kernel", i32 1}
30+
31+
// CHECK-AMDGCN: amdgcn.annotations = !{[[FIRST:![0-9]]], [[SECOND:![0-9]]]}
32+
// CHECK-AMDGCN: [[FIRST]] = !{void ()* @_ZTS7Functor, !"kernel", i32 1}
33+
// CHECK-AMDGCN: [[SECOND]] = !{void ()* @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE0_clES2_E5foo_2, !"kernel", i32 1}
34+
35+
int main() {
36+
sycl::queue q;
37+
q.submit([&](sycl::handler &cgh) {
38+
Functor foo{};
39+
cgh.single_task(foo);
40+
});
41+
42+
q.submit([&](cl::sycl::handler &cgh) {
43+
cgh.parallel_for<class foo_2>(cl::sycl::range<1>(1),
44+
[=](cl::sycl::item<1> item) {
45+
});
46+
});
47+
return 0;
48+
}

clang/test/Driver/sycl-local-accessor-opt.cpp

Lines changed: 0 additions & 14 deletions
This file was deleted.

libclc/amdgcn-amdhsa/libspirv/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11

2+
workitem/get_global_offset.ll
23
group/collectives.cl
34
group/collectives_helpers.ll
45
atomic/loadstore_helpers.ll
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
;;===----------------------------------------------------------------------===//
2+
;;
3+
;; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
;; See https://llvm.org/LICENSE.txt for license information.
5+
;; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
;;
7+
;;===----------------------------------------------------------------------===//
8+
9+
#if __clang_major__ >= 7
10+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
11+
#else
12+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
13+
#endif
14+
15+
; Function Attrs: nounwind readnone speculatable
16+
declare i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
17+
18+
define hidden i64 @_Z22__spirv_GlobalOffset_xv() nounwind alwaysinline {
19+
entry:
20+
%0 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
21+
%1 = load i32, i32 addrspace(5)* %0, align 4
22+
%zext = zext i32 %1 to i64
23+
ret i64 %zext
24+
}
25+
26+
define hidden i64 @_Z22__spirv_GlobalOffset_yv() nounwind alwaysinline {
27+
entry:
28+
%0 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
29+
%arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 1
30+
%1 = load i32, i32 addrspace(5)* %arrayidx, align 4
31+
%zext = zext i32 %1 to i64
32+
ret i64 %zext
33+
}
34+
35+
define hidden i64 @_Z22__spirv_GlobalOffset_zv() nounwind alwaysinline {
36+
entry:
37+
%0 = tail call i32 addrspace(5)* @llvm.amdgcn.implicit.offset()
38+
%arrayidx = getelementptr inbounds i32, i32 addrspace(5)* %0, i64 2
39+
%1 = load i32, i32 addrspace(5)* %arrayidx, align 4
40+
%zext = zext i32 %1 to i64
41+
ret i64 %zext
42+
}

libclc/amdgcn/libspirv/SOURCES

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
workitem/get_global_offset.cl
21
workitem/get_group_id.cl
32
workitem/get_global_size.cl
43
workitem/get_local_id.cl

libclc/amdgcn/libspirv/workitem/get_global_offset.cl

Lines changed: 0 additions & 25 deletions
This file was deleted.

0 commit comments

Comments
 (0)