Skip to content

Commit 53a4adc

Browse files
committed
[AMDGPU] Fix crash with 160-bit p7's by manually defining getPointerTy
While pointers in address space 7 (128 bit rsrc + 32 bit offset) should be rewritten out of the code before IR translation on AMDGPU, higher-level analyses may still call MVT getPointerTy() and the like on the target machine. Currently, since there is no MVT::i160, this operation ends up causing crashes. The changes to the data layout that caused such crashes were D149776. This patch causes getPointerTy() to return the type MVT::v5i32 and getPointerMemTy() to be MVT::v8i32. These are accurate types, but mean that we can't use vectors of address space 7 pointers during codegen. This is mostly OK, since vectors of buffers aren't supported in LPC anyway, but it's a noticable limitation. Potential alternative solutions include adjusting getPointerTy() to return an EVT or adding MVT::i160 and MVT::i256, both of which are rather disruptive to the rest of the compiler. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D150002
1 parent 3f6e4e5 commit 53a4adc

File tree

5 files changed

+120
-0
lines changed

5 files changed

+120
-0
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -979,6 +979,26 @@ static EVT memVTFromLoadIntrReturn(Type *Ty, unsigned MaxNumLanes) {
979979
return memVTFromLoadIntrData(ST->getContainedType(0), MaxNumLanes);
980980
}
981981

982+
/// Map address space 7 to MVT::v5i32 because that's its in-memory
983+
/// representation. This return value is vector-typed because there is no
984+
/// MVT::i160 and it is not clear if one can be added. While this could
985+
/// cause issues during codegen, these address space 7 pointers will be
986+
/// rewritten away by then. Therefore, we can return MVT::v5i32 in order
987+
/// to allow pre-codegen passes that query TargetTransformInfo, often for cost
988+
/// modeling, to work.
989+
MVT SITargetLowering::getPointerTy(const DataLayout &DL, unsigned AS) const {
990+
if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
991+
return MVT::v5i32;
992+
return AMDGPUTargetLowering::getPointerTy(DL, AS);
993+
}
994+
/// Similarly, the in-memory representation of a p7 is {p8, i32}, aka
995+
/// v8i32 when padding is added.
996+
MVT SITargetLowering::getPointerMemTy(const DataLayout &DL, unsigned AS) const {
997+
if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
998+
return MVT::v8i32;
999+
return AMDGPUTargetLowering::getPointerMemTy(DL, AS);
1000+
}
1001+
9821002
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
9831003
const CallInst &CI,
9841004
MachineFunction &MF,

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,12 @@ class SITargetLowering final : public AMDGPUTargetLowering {
273273

274274
bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;
275275

276+
// While address space 7 should never make it to codegen, it still needs to
277+
// have a MVT to prevent some analyses that query this function from breaking,
278+
// so, to work around the lack of i160, map it to v5i32.
279+
MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
280+
MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
281+
276282
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
277283
MachineFunction &MF,
278284
unsigned IntrinsicID) const override;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - -stop-after=irtranslator < %s
2+
3+
; Confirm that no one's gotten vectors of addrspace(7) pointers to go through the
4+
; IR translater incidentally.
5+
6+
define <2 x ptr addrspace(7)> @no_auto_constfold_gep_vector() {
7+
%gep = getelementptr i8, <2 x ptr addrspace(7)> zeroinitializer, <2 x i32> <i32 123, i32 123>
8+
ret <2 x ptr addrspace(7)> %gep
9+
}
10+
11+
define <2 x ptr addrspace(7)> @gep_vector_splat(<2 x ptr addrspace(7)> %ptrs, i64 %idx) {
12+
%gep = getelementptr i8, <2 x ptr addrspace(7)> %ptrs, i64 %idx
13+
ret <2 x ptr addrspace(7)> %gep
14+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - -stop-after=irtranslator %s | FileCheck %s
3+
4+
; Check that the CSEMIRBuilder doesn't fold away the getelementptr during IRTranslator
5+
define ptr addrspace(7) @no_auto_constfold_gep() {
6+
; CHECK-LABEL: name: no_auto_constfold_gep
7+
; CHECK: bb.1 (%ir-block.0):
8+
; CHECK-NEXT: [[C:%[0-9]+]]:_(p7) = G_CONSTANT i160 0
9+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s160) = G_CONSTANT i160 123
10+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p7) = G_PTR_ADD [[C]], [[C1]](s160)
11+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTR_ADD]](p7)
12+
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
13+
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
14+
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
15+
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
16+
; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
17+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4
18+
%gep = getelementptr i8, ptr addrspace(7) null, i32 123
19+
ret ptr addrspace(7) %gep
20+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt -passes=indvars -S < %s | FileCheck %s
3+
4+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
5+
target triple = "amdgcn--amdpal"
6+
7+
define void @f(ptr addrspace(7) %arg) {
8+
; CHECK-LABEL: define void @f
9+
; CHECK-SAME: (ptr addrspace(7) [[ARG:%.*]]) {
10+
; CHECK-NEXT: bb:
11+
; CHECK-NEXT: br label [[BB1:%.*]]
12+
; CHECK: bb1:
13+
; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB1]]
14+
; CHECK: bb2:
15+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(7) [[ARG]], i32 8
16+
; CHECK-NEXT: br label [[BB3:%.*]]
17+
; CHECK: bb3:
18+
; CHECK-NEXT: [[I4:%.*]] = load i32, ptr addrspace(7) [[SCEVGEP]], align 4
19+
; CHECK-NEXT: br label [[BB3]]
20+
;
21+
bb:
22+
br label %bb1
23+
bb1:
24+
%i = getelementptr i32, ptr addrspace(7) %arg, i32 2
25+
br i1 false, label %bb2, label %bb1
26+
bb2:
27+
br label %bb3
28+
bb3:
29+
%i4 = load i32, ptr addrspace(7) %i, align 4
30+
br label %bb3
31+
}
32+
33+
define void @f2(<2 x ptr addrspace(7)> %arg) {
34+
; CHECK-LABEL: define void @f2
35+
; CHECK-SAME: (<2 x ptr addrspace(7)> [[ARG:%.*]]) {
36+
; CHECK-NEXT: bb:
37+
; CHECK-NEXT: br label [[BB1:%.*]]
38+
; CHECK: bb1:
39+
; CHECK-NEXT: [[P:%.*]] = extractelement <2 x ptr addrspace(7)> [[ARG]], i32 0
40+
; CHECK-NEXT: [[I:%.*]] = getelementptr i32, ptr addrspace(7) [[P]], i32 2
41+
; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB1]]
42+
; CHECK: bb2:
43+
; CHECK-NEXT: [[I_LCSSA:%.*]] = phi ptr addrspace(7) [ [[I]], [[BB1]] ]
44+
; CHECK-NEXT: br label [[BB3:%.*]]
45+
; CHECK: bb3:
46+
; CHECK-NEXT: [[I4:%.*]] = load i32, ptr addrspace(7) [[I_LCSSA]], align 4
47+
; CHECK-NEXT: br label [[BB3]]
48+
;
49+
bb:
50+
br label %bb1
51+
bb1:
52+
%p = extractelement <2 x ptr addrspace(7)> %arg, i32 0
53+
%i = getelementptr i32, ptr addrspace(7) %p, i32 2
54+
br i1 false, label %bb2, label %bb1
55+
bb2:
56+
br label %bb3
57+
bb3:
58+
%i4 = load i32, ptr addrspace(7) %i, align 4
59+
br label %bb3
60+
}

0 commit comments

Comments
 (0)