diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index b2360ce30fd6e..445279dcc0be1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -245,6 +245,21 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() { Streamer.popSection(); } +void AMDGPUAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { + Register RegNo = MI->getOperand(0).getReg(); + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "implicit-def: " + << printReg(RegNo, MF->getSubtarget().getRegisterInfo()); + + if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL) + OS << " : SGPR spill to VGPR lane"; + + OutStreamer->AddComment(OS.str()); + OutStreamer->addBlankLine(); +} + void AMDGPUAsmPrinter::emitFunctionEntryLabel() { if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { AsmPrinter::emitFunctionEntryLabel(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index d490209ce35ec..dc2fadc0f9352 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -116,6 +116,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter { void emitFunctionBodyEnd() override; + void emitImplicitDef(const MachineInstr *MI) const override; + void emitFunctionEntryLabel() override; void emitBasicBlockStart(const MachineBasicBlock &MBB) override; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index e85917a4c0f32..a4f59fc3513d6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1397,6 +1397,13 @@ namespace AMDGPU { } // end namespace AMDGPU +namespace AMDGPU { +enum AsmComments { + // For sgpr to vgpr spill instructions + SGPR_SPILL = MachineInstr::TAsmComments +}; +} // namespace AMDGPU + namespace SI { namespace KernelInputOffsets { diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 47d28d5d0eab5..b99d1ee75ef5a 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -281,6 +281,8 @@ void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF, auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(), TII->get(AMDGPU::IMPLICIT_DEF), Reg); MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); + // Set SGPR_SPILL asm printer flag + MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL); if (LIS) { LIS->InsertMachineInstrInMaps(*MIB); } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index 1d1ff4251c53a..b19230c2e876c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -15,7 +15,7 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s4 -; CHECK-NEXT: ; implicit-def: $vgpr8 +; CHECK-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane ; CHECK-NEXT: v_mov_b32_e32 v8, v0 ; CHECK-NEXT: s_or_saveexec_b32 s21, -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll index c9f9078281da2..c6b17b40ffb6a 100644 --- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -36,7 +36,7 @@ define amdgpu_kernel void @test_loop(ptr addrspace(3) %ptr, i32 %n) nounwind { ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s11 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 @@ -144,7 +144,7 @@ define amdgpu_kernel void @loop_const_true(ptr addrspace(3) %ptr, i32 %n) nounwi ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s11 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 @@ -232,7 +232,7 @@ define amdgpu_kernel void @loop_const_false(ptr addrspace(3) %ptr, i32 %n) nounw ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s11 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 @@ -321,7 +321,7 @@ define amdgpu_kernel void @loop_const_undef(ptr addrspace(3) %ptr, i32 %n) nounw ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s11 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 @@ -422,7 +422,7 @@ define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind { ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s11 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 +; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index 74d62ffc58029..73d5088141cdb 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -48,7 +48,7 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s11 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload @@ -221,7 +221,7 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s11 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload @@ -430,7 +430,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s11 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload @@ -676,7 +676,7 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s11 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload @@ -931,7 +931,7 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s11 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload @@ -1080,7 +1080,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; implicit-def: $vgpr1 +; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(1) diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll index 8acc38eaf0170..b3cca5ff2429b 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll @@ -117,7 +117,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) { ; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0 +; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0) ; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s2, 0 ; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s3, 1 @@ -240,7 +240,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) { ; FLAT_SCR_ARCH-LABEL: test: ; FLAT_SCR_ARCH: ; %bb.0: ; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 -; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0 +; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0) ; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s2, 0 ; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s3, 1 diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll index 8607535dd849c..03c85b4470628 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll @@ -13,7 +13,7 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 { ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v3, s16, 0 ; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 ; CHECK-NEXT: s_add_i32 s12, s33, 0x100200 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll index f660d8df84052..c877740c1baa9 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll @@ -144,7 +144,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr5 +; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; W64-O0-NEXT: v_mov_b32_e32 v5, v3 ; W64-O0-NEXT: v_mov_b32_e32 v6, v2 @@ -497,7 +497,7 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr13 +; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill @@ -1019,7 +1019,7 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr8 +; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; W64-O0-NEXT: v_mov_b32_e32 v8, v6 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll index 7df4f98ddebfa..ac46f8ce20d60 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -143,7 +143,7 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr5 +; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; W64-O0-NEXT: v_mov_b32_e32 v6, v2 @@ -511,7 +511,7 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr13 +; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill @@ -1058,7 +1058,7 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr8 +; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll index b04069e5003a2..f4114a01e9b48 100644 --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -71,7 +71,7 @@ define amdgpu_kernel void @kernel_call() { ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v3, s16, 0 ; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1 ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill @@ -139,7 +139,7 @@ define amdgpu_kernel void @kernel_tailcall() { ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v3, s16, 0 ; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1 ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill @@ -260,7 +260,7 @@ define protected amdgpu_kernel void @kernel() { ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v3, s16, 0 ; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1 ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index f223a44731811..454dc881f7bf2 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -17,9 +17,9 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, ; GCN-NEXT: s_mov_b32 s95, 0xe8f000 ; GCN-NEXT: s_add_u32 s92, s92, s11 ; GCN-NEXT: s_addc_u32 s93, s93, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: ; implicit-def: $vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr2 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] @@ -488,8 +488,8 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 % ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s11 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: ; implicit-def: $vgpr1 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] @@ -738,8 +738,8 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 % ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s11 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[4:5], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND @@ -989,8 +989,8 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s11 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[4:5], 0x9 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll index d19a9233e118b..5f291489848fe 100644 --- a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll @@ -8,7 +8,7 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 { ; CHECK-LABEL: kernel0: ; CHECK: ; %bb.0: -; CHECK-NEXT: ; implicit-def: $vgpr23 +; CHECK-NEXT: ; implicit-def: $vgpr23 : SGPR spill to VGPR lane ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll index 90307f1c7905b..fbbcfd62f5c60 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll @@ -9,8 +9,8 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou ; GCN: ; %bb.0: ; GCN-NEXT: s_add_u32 s0, s0, s15 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s4, s[8:9], 0x2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll index 813e702288bb1..1f2c203459782 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll @@ -13,7 +13,7 @@ define amdgpu_kernel void @kernel() { ; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GCN-NEXT: s_mov_b32 s38, -1 ; GCN-NEXT: s_mov_b32 s39, 0xe00000 -; GCN-NEXT: ; implicit-def: $vgpr3 +; GCN-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; GCN-NEXT: s_add_u32 s36, s36, s11 ; GCN-NEXT: v_writelane_b32 v3, s4, 0 ; GCN-NEXT: s_movk_i32 s32, 0x400 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll index f4ae5877d1063..16550fc9588ae 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -715,7 +715,7 @@ define void @spill_sgpr_with_sgpr_uses() #0 { ; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:440 ; GCN-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll index 4639f28d68061..81dd2c4457b2f 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll @@ -13,7 +13,7 @@ define void @test() { ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: ; implicit-def: $vgpr0 +; CHECK-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; CHECK-NEXT: .LBB0_1: ; %bb.1 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_3 diff --git a/llvm/test/CodeGen/AMDGPU/swdev380865.ll b/llvm/test/CodeGen/AMDGPU/swdev380865.ll index 7f989e30118b9..7201ffaf56166 100644 --- a/llvm/test/CodeGen/AMDGPU/swdev380865.ll +++ b/llvm/test/CodeGen/AMDGPU/swdev380865.ll @@ -16,7 +16,7 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce) ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_mov_b64 s[0:1], 0 ; CHECK-NEXT: s_load_dword s2, s[0:1], 0x0 -; CHECK-NEXT: ; implicit-def: $vgpr2 +; CHECK-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; CHECK-NEXT: ; kill: killed $sgpr0_sgpr1 ; CHECK-NEXT: s_mov_b32 s7, 0x401c0000 ; CHECK-NEXT: s_mov_b32 s5, 0x40280000 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll index 2e1b86f69a521..c84eee4f9921d 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll @@ -13,7 +13,7 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() { ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_add_u32 s0, s0, s15 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr1 +; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; CHECK-NEXT: v_mov_b32_e32 v2, v0 ; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll index c2b86a5135022..1f286b766102c 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll @@ -16,7 +16,7 @@ define protected amdgpu_kernel void @kern(ptr %addr) !llvm.amdgcn.lds.kernel.id ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: v_writelane_b32 v3, s16, 0 ; CHECK-NEXT: s_or_saveexec_b32 s33, -1 ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll index cb4e2e2bf5a47..cfb1cffdb5096 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll @@ -19,7 +19,7 @@ define void @vector_reg_liverange_split() #0 { ; GFX90A-NEXT: buffer_store_dword a32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[18:19] ; GFX90A-NEXT: v_writelane_b32 v40, s16, 2 -; GFX90A-NEXT: ; implicit-def: $vgpr0 +; GFX90A-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 ; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll index f92ab5740fac1..7eabe982ff2bc 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll @@ -27,7 +27,7 @@ define void @test() #0 { ; GCN-NEXT: v_writelane_b32 v40, s28, 2 ; GCN-NEXT: v_writelane_b32 v40, s29, 3 ; GCN-NEXT: v_writelane_b32 v40, s16, 4 -; GCN-NEXT: ; implicit-def: $vgpr0 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 @@ -83,7 +83,7 @@ define void @test() #0 { ; GCN-O0-NEXT: v_writelane_b32 v40, s29, 3 ; GCN-O0-NEXT: v_writelane_b32 v40, s16, 4 ; GCN-O0-NEXT: s_add_i32 s32, s32, 0x400 -; GCN-O0-NEXT: ; implicit-def: $vgpr0 +; GCN-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-O0-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-O0-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index 7ce326359181b..b6e7da97e0089 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -145,7 +145,7 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O0-NEXT: ; implicit-def: $vgpr3 +; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload @@ -580,7 +580,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x1000 -; GFX9-O0-NEXT: ; implicit-def: $vgpr0 +; GFX9-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 1 ; GFX9-O0-NEXT: s_mov_b32 s34, s8