Skip to content

Commit 856bd99

Browse files
committed
[AMDGPU] Defaults for missing dimensions in SYCL required wg size
SYCL allows for required work group to be partially specified (i.e. not all 3 dimensions): https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:kernel.attributes This fails AMDGPU's attribute verification. The patch aims to provide the default values for missing dimensions when dealing with SYCL kernels.
1 parent f3b20cb commit 856bd99

File tree

2 files changed

+106
-0
lines changed

2 files changed

+106
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,10 +317,38 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
317317
return MadeChange;
318318
}
319319

320+
// SYCL allows required work-group size attribute to be partially specified
321+
// (not all three dimensions), provide a default value (1) for the missing
322+
// dimensions.
323+
static void updateSYCLreqdWorkGroupMD(Function &F) {
324+
auto *Node = F.getMetadata("reqd_work_group_size");
325+
if (!Node || Node->getNumOperands() == 3)
326+
return;
327+
328+
auto &Context = F.getContext();
329+
SmallVector<uint64_t, 3> RWGS;
330+
for (auto &Op : Node->operands())
331+
RWGS.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
332+
while (RWGS.size() != 3)
333+
RWGS.push_back(1);
334+
335+
llvm::Metadata *RWGSArgs[] = {
336+
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
337+
llvm::IntegerType::get(Context, 32), llvm::APInt(32, RWGS[0]))),
338+
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
339+
llvm::IntegerType::get(Context, 32), llvm::APInt(32, RWGS[1]))),
340+
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
341+
llvm::IntegerType::get(Context, 32), llvm::APInt(32, RWGS[2])))};
342+
F.setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, RWGSArgs));
343+
}
320344

321345
// TODO: Move makeLIDRangeMetadata usage into here. Seem to not get
322346
// TargetPassConfig for subtarget.
323347
bool AMDGPULowerKernelAttributes::runOnModule(Module &M) {
348+
for (auto &F : M)
349+
if (F.hasFnAttribute("sycl-module-id"))
350+
updateSYCLreqdWorkGroupMD(F);
351+
324352
bool MadeChange = false;
325353
bool IsV5OrAbove = AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5;
326354
Function *BasePtr = getBasePtrIntrinsic(M, IsV5OrAbove);
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass amdgpu-lower-kernel-attributes -verify-machineinstrs -o - %s | FileCheck %s
2+
3+
# As SYCL allows for the required work group to be specified partially, we need
4+
# to patch it up to 3 dimensions. Make sure that it only happens when dealing
5+
# with SYCL kernels.
6+
7+
--- |
8+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
9+
10+
; CHECK-LABEL: sycl_kernel_3dim
11+
; CHECK: !reqd_work_group_size [[SYCL_3DIM:![0-9]+]]
12+
define amdgpu_kernel void @sycl_kernel_3dim() #0 !reqd_work_group_size !0 {
13+
entry:
14+
ret void
15+
}
16+
17+
; CHECK-LABEL: sycl_kernel_2dim
18+
; CHECK: !reqd_work_group_size [[SYCL_2DIM:![0-9]+]]
19+
define amdgpu_kernel void @sycl_kernel_2dim() #0 !reqd_work_group_size !1 {
20+
entry:
21+
ret void
22+
}
23+
24+
; CHECK-LABEL: non_sycl_kernel_3dim
25+
; CHECK: !reqd_work_group_size [[NON_SYCL_3DIM:![0-9]+]]
26+
define amdgpu_kernel void @non_sycl_kernel_3dim() #1 !reqd_work_group_size !2 {
27+
entry:
28+
ret void
29+
}
30+
31+
; CHECK-LABEL: non_sycl_kernel_2dim
32+
; CHECK: !reqd_work_group_size [[NON_SYCL_2DIM:![0-9]+]]
33+
define amdgpu_kernel void @non_sycl_kernel_2dim() #1 !reqd_work_group_size !3 {
34+
entry:
35+
ret void
36+
}
37+
38+
attributes #0 = { "sycl-module-id"="sycl-reqd-work-group-size.cpp" "target-cpu"="gfx90a" }
39+
attributes #1 = { "target-cpu"="gfx90a" }
40+
41+
; CHECK: [[SYCL_3DIM]] = !{i32 8, i32 16, i32 2}
42+
!0 = !{i32 8, i32 16, i32 2}
43+
; CHECK: [[SYCL_2DIM]] = !{i32 8, i32 16, i32 1}
44+
!1 = !{i32 8, i32 16}
45+
; CHECK: [[NON_SYCL_3DIM]] = !{i32 4, i32 8, i32 4}
46+
!2 = !{i32 4, i32 8, i32 4}
47+
; CHECK: [[NON_SYCL_2DIM]] = !{i32 4, i32 8}
48+
!3 = !{i32 4, i32 8}
49+
50+
...
51+
---
52+
name: sycl_kernel_3dim
53+
body: |
54+
bb.0.entry:
55+
S_ENDPGM 0
56+
57+
...
58+
---
59+
name: sycl_kernel_2dim
60+
body: |
61+
bb.0.entry:
62+
S_ENDPGM 0
63+
64+
...
65+
---
66+
name: non_sycl_kernel_3dim
67+
body: |
68+
bb.0.entry:
69+
S_ENDPGM 0
70+
71+
...
72+
---
73+
name: non_sycl_kernel_2dim
74+
body: |
75+
bb.0.entry:
76+
S_ENDPGM 0
77+
78+
...

0 commit comments

Comments
 (0)