Skip to content

Commit 7ac532e

Browse files
authored
[AMDGPU] Introduce AMDGPU::SGPR_SPILL asm comment flag (#67091)
Use this flag to give more context to implicit def comments in assembly. Reviewed on phabricator: https://reviews.llvm.org/D153754
1 parent e594c45 commit 7ac532e

24 files changed

+73
-47
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,21 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
250250
Streamer.popSection();
251251
}
252252

253+
void AMDGPUAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
254+
Register RegNo = MI->getOperand(0).getReg();
255+
256+
SmallString<128> Str;
257+
raw_svector_ostream OS(Str);
258+
OS << "implicit-def: "
259+
<< printReg(RegNo, MF->getSubtarget().getRegisterInfo());
260+
261+
if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL)
262+
OS << " : SGPR spill to VGPR lane";
263+
264+
OutStreamer->AddComment(OS.str());
265+
OutStreamer->addBlankLine();
266+
}
267+
253268
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
254269
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
255270
AsmPrinter::emitFunctionEntryLabel();

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
116116

117117
void emitFunctionBodyEnd() override;
118118

119+
void emitImplicitDef(const MachineInstr *MI) const override;
120+
119121
void emitFunctionEntryLabel() override;
120122

121123
void emitBasicBlockStart(const MachineBasicBlock &MBB) override;

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,6 +1397,13 @@ namespace AMDGPU {
13971397

13981398
} // end namespace AMDGPU
13991399

1400+
namespace AMDGPU {
1401+
enum AsmComments {
1402+
// For sgpr to vgpr spill instructions
1403+
SGPR_SPILL = MachineInstr::TAsmComments
1404+
};
1405+
} // namespace AMDGPU
1406+
14001407
namespace SI {
14011408
namespace KernelInputOffsets {
14021409

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,8 @@ void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
281281
auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(),
282282
TII->get(AMDGPU::IMPLICIT_DEF), Reg);
283283
MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
284+
// Set SGPR_SPILL asm printer flag
285+
MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
284286
if (LIS) {
285287
LIS->InsertMachineInstrInMaps(*MIB);
286288
}

llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
1515
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
1616
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
1717
; CHECK-NEXT: s_mov_b32 exec_lo, s4
18-
; CHECK-NEXT: ; implicit-def: $vgpr8
18+
; CHECK-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
1919
; CHECK-NEXT: v_mov_b32_e32 v8, v0
2020
; CHECK-NEXT: s_or_saveexec_b32 s21, -1
2121
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload

llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ define amdgpu_kernel void @test_loop(ptr addrspace(3) %ptr, i32 %n) nounwind {
3636
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
3737
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
3838
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
39-
; GCN_DBG-NEXT: ; implicit-def: $vgpr0
39+
; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
4040
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
4141
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
4242
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
@@ -144,7 +144,7 @@ define amdgpu_kernel void @loop_const_true(ptr addrspace(3) %ptr, i32 %n) nounwi
144144
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
145145
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
146146
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
147-
; GCN_DBG-NEXT: ; implicit-def: $vgpr0
147+
; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
148148
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
149149
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
150150
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
@@ -232,7 +232,7 @@ define amdgpu_kernel void @loop_const_false(ptr addrspace(3) %ptr, i32 %n) nounw
232232
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
233233
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
234234
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
235-
; GCN_DBG-NEXT: ; implicit-def: $vgpr0
235+
; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
236236
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
237237
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
238238
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
@@ -321,7 +321,7 @@ define amdgpu_kernel void @loop_const_undef(ptr addrspace(3) %ptr, i32 %n) nounw
321321
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
322322
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
323323
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
324-
; GCN_DBG-NEXT: ; implicit-def: $vgpr0
324+
; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
325325
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
326326
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
327327
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
@@ -422,7 +422,7 @@ define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {
422422
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
423423
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
424424
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
425-
; GCN_DBG-NEXT: ; implicit-def: $vgpr0
425+
; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
426426
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
427427
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
428428
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0

llvm/test/CodeGen/AMDGPU/collapse-endcf.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
4848
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
4949
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
5050
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
51-
; GCN-O0-NEXT: ; implicit-def: $vgpr1
51+
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
5252
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
5353
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
5454
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -221,7 +221,7 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
221221
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
222222
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
223223
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
224-
; GCN-O0-NEXT: ; implicit-def: $vgpr1
224+
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
225225
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
226226
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
227227
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -430,7 +430,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
430430
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
431431
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
432432
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
433-
; GCN-O0-NEXT: ; implicit-def: $vgpr1
433+
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
434434
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
435435
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
436436
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -676,7 +676,7 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
676676
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
677677
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
678678
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
679-
; GCN-O0-NEXT: ; implicit-def: $vgpr1
679+
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
680680
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
681681
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
682682
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -931,7 +931,7 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
931931
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
932932
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
933933
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
934-
; GCN-O0-NEXT: ; implicit-def: $vgpr1
934+
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
935935
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
936936
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
937937
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -1080,7 +1080,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
10801080
; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
10811081
; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
10821082
; GCN-O0-NEXT: s_mov_b64 exec, s[4:5]
1083-
; GCN-O0-NEXT: ; implicit-def: $vgpr1
1083+
; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
10841084
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
10851085
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
10861086
; GCN-O0-NEXT: s_waitcnt expcnt(1)

llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
117117
; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
118118
; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
119119
; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
120-
; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0
120+
; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
121121
; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0)
122122
; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s2, 0
123123
; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s3, 1
@@ -240,7 +240,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
240240
; FLAT_SCR_ARCH-LABEL: test:
241241
; FLAT_SCR_ARCH: ; %bb.0:
242242
; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
243-
; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0
243+
; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
244244
; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0)
245245
; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s2, 0
246246
; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s3, 1

llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
1313
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
1414
; CHECK-NEXT: s_add_u32 s0, s0, s17
1515
; CHECK-NEXT: s_addc_u32 s1, s1, 0
16-
; CHECK-NEXT: ; implicit-def: $vgpr3
16+
; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
1717
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
1818
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
1919
; CHECK-NEXT: s_add_i32 s12, s33, 0x100200

llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
144144
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
145145
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
146146
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
147-
; W64-O0-NEXT: ; implicit-def: $vgpr5
147+
; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
148148
; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
149149
; W64-O0-NEXT: v_mov_b32_e32 v5, v3
150150
; W64-O0-NEXT: v_mov_b32_e32 v6, v2
@@ -497,7 +497,7 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
497497
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
498498
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
499499
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
500-
; W64-O0-NEXT: ; implicit-def: $vgpr13
500+
; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
501501
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
502502
; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
503503
; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
@@ -1019,7 +1019,7 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
10191019
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
10201020
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
10211021
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
1022-
; W64-O0-NEXT: ; implicit-def: $vgpr8
1022+
; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
10231023
; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
10241024
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
10251025
; W64-O0-NEXT: v_mov_b32_e32 v8, v6

0 commit comments

Comments
 (0)