diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index 50addcbcca065..123d5cdc5bcc1 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -65,6 +65,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis, Matrix = &mat; MRI->freezeReservedRegs(); RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); + FailedVRegs.clear(); } // Visit all the live registers. If they are already assigned to a physical @@ -128,6 +129,7 @@ void RegAllocBase::allocatePhysRegs() { // Keep going after reporting the error. VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg); + FailedVRegs.insert(VirtReg->reg()); } else if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); @@ -161,6 +163,40 @@ void RegAllocBase::postOptimization() { DeadRemats.clear(); } +void RegAllocBase::cleanupFailedVRegs() { + SmallSet JunkRegs; + + for (Register FailedReg : FailedVRegs) { + JunkRegs.insert(FailedReg); + + MCRegister PhysReg = VRM->getPhys(FailedReg); + LiveInterval &FailedInterval = LIS->getInterval(FailedReg); + + // The liveness information for the failed register and anything interfering + // with the physical register we arbitrarily chose is junk and needs to be + // deleted. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units); + for (const LiveInterval *InterferingReg : Q.interferingVRegs()) + JunkRegs.insert(InterferingReg->reg()); + } + } + + // TODO: Probably need to set undef on any physreg uses not associated with + // a virtual register. + for (Register JunkReg : JunkRegs) { + // We still should produce valid IR. Kill all the uses and reduce the live + // ranges so that we don't think it's possible to introduce kill flags + // later which will fail the verifier. + for (MachineOperand &MO : MRI->reg_operands(JunkReg)) { + if (MO.readsReg()) + MO.setIsUndef(true); + } + + LIS->shrinkToUses(&LIS->getInterval(JunkReg)); + } +} + void RegAllocBase::enqueue(const LiveInterval *LI) { const Register Reg = LI->reg(); diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h index 5bd52da61f2dc..1fdbab694bb0e 100644 --- a/llvm/lib/CodeGen/RegAllocBase.h +++ b/llvm/lib/CodeGen/RegAllocBase.h @@ -37,6 +37,7 @@ #define LLVM_LIB_CODEGEN_REGALLOCBASE_H #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegAllocCommon.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -81,6 +82,7 @@ class RegAllocBase { /// always available for the remat of all the siblings of the original reg. SmallPtrSet DeadRemats; + SmallSet FailedVRegs; RegAllocBase(const RegAllocFilterFunc F = nullptr) : shouldAllocateRegisterImpl(F) {} @@ -104,6 +106,10 @@ class RegAllocBase { // rematerialization. virtual void postOptimization(); + /// Perform cleanups on registers that failed to allocate. This hacks on the + /// liveness in order to avoid spurious verifier errors in later passes. + void cleanupFailedVRegs(); + // Get a temporary reference to a Spiller instance. virtual Spiller &spiller() = 0; diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index 51e047b2fa3f0..d240bf916ac05 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -329,6 +329,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { allocatePhysRegs(); postOptimization(); + cleanupFailedVRegs(); // Diagnostic output before rewriting LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index bd81d630f9d1f..8978439bf6ba3 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -2795,6 +2795,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { if (VerifyEnabled) MF->verify(this, "Before post optimization", &errs()); postOptimization(); + cleanupFailedVRegs(); reportStats(); releaseMemory(); diff --git a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir index 99c27fa0bc95c..443d1e9d1bdf0 100644 --- a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir +++ b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir @@ -17,8 +17,8 @@ ... -# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def renamable $vgpr0_vgpr1_vgpr2_vgpr3 -# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3 +# CHECK: S_NOP 0, implicit-def renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def dead renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def dead renamable $vgpr0_vgpr1_vgpr2_vgpr3 +# CHECK: S_NOP 0, implicit killed renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed renamable $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed renamable $vgpr28_vgpr29_vgpr30_vgpr31, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3 --- name: foo diff --git a/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir b/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir index 503f27edf70df..6d63a4a1cc0ab 100644 --- a/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir +++ b/llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir @@ -27,10 +27,10 @@ # CHECK-LABEL: name: inflated_reg_class_copy_use_after_free # CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3 # CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) -# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) -# CHECK-NEXT: early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec -# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[MFMA0]].sub2_sub3 { -# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[MFMA0]].sub0 +# CHECK-NEXT: dead [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) +# CHECK-NEXT: dead early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, undef [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec +# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[MFMA0]].sub2_sub3 { +# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[MFMA0]].sub0 # CHECK-NEXT: } # CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 { # CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0 @@ -118,10 +118,10 @@ body: | # CHECK-LABEL: name: inflated_reg_class_copy_use_after_free_lane_subset # CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3 # CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) -# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) -# CHECK-NEXT: S_NOP 0, implicit-def early-clobber [[REG1:%[0-9]+]], implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7 -# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[REG1]].sub2_sub3 { -# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[REG1]].sub0 +# CHECK-NEXT: dead [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) +# CHECK-NEXT: S_NOP 0, implicit-def dead early-clobber [[REG1:%[0-9]+]], implicit undef [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit undef [[RESTORE_0]].sub4_sub5_sub6_sub7 +# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[REG1]].sub2_sub3 { +# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[REG1]].sub0 # CHECK-NEXT: } # CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 { # CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0 diff --git a/llvm/test/CodeGen/AMDGPU/issue48473.mir b/llvm/test/CodeGen/AMDGPU/issue48473.mir index e272bd3480383..ba03855d01cd9 100644 --- a/llvm/test/CodeGen/AMDGPU/issue48473.mir +++ b/llvm/test/CodeGen/AMDGPU/issue48473.mir @@ -43,7 +43,8 @@ # %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 # CHECK-LABEL: name: issue48473 -# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 +# CHECK: S_NOP 0, implicit undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit undef renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 + --- name: issue48473 diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll b/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll index 388a8e804a889..06c862c7fb481 100644 --- a/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-registers-error-all-regs-reserved.ll @@ -1,8 +1,6 @@ -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=greedy -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=basic -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=fast -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s - -; FIXME: Should pass verifier after failure. +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=greedy -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=basic -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -vgpr-regalloc=fast -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s declare <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32, i32, <32 x i32>, i32 immarg, i32 immarg, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir index fe01728c00563..01139b6aeef77 100644 --- a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir +++ b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir @@ -1,10 +1,7 @@ -# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s -# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s - -# FIXME: We should not produce a verifier error after erroring +# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -filetype=null 2>&1 | FileCheck -check-prefix=ERR %s # ERR: error: inline assembly requires more registers than available -# VERIFIER: *** Bad machine code: Using an undefined physical register *** +# ERR-NOT: Bad machine code # This testcase cannot be compiled with the enforced register # budget. Previously, tryLastChanceRecoloring would assert here. It diff --git a/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir new file mode 100644 index 0000000000000..b7a8624f2dc9d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure0.mir @@ -0,0 +1,59 @@ +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -vgpr-regalloc=basic -sgpr-regalloc=basic -start-before=regallocbasic,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.basic.err | FileCheck -check-prefix=BASIC %s +# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.greedy.err | FileCheck -check-prefix=GREEDY %s + +# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.basic.err +# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.greedy.err + +# This testcase must fail register allocation. It should also not +# produce a verifier error after doing so. Previously, it would not +# properly update the liveness for the dummy selected register. As a +# result, VirtRegRewriter would incorrectly add kill flags which +# combined with other uses of the physical register produced a +# verifier error. + +# ERR: error: :0:0: ran out of registers during register allocation + +# GREEDY: SI_SPILL_V256_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 +# GREEDY-NEXT: SI_SPILL_V512_SAVE undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 +# GREEDY-NEXT: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3 + +# GREEDY: dead renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 = SI_SPILL_V512_RESTORE +# GREEDY: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE +# GREEDY: S_NOP 0, implicit undef renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3 +# GREEDY: S_NOP 0, implicit killed renamable $vgpr20_vgpr21 + + +# BASIC: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3 +# BASIC: SI_SPILL_V256_SAVE killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 +# BASIC: SI_SPILL_V512_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 +# BASIC: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.{{[0-9]+}}, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.{{[0-9]+}}, align 4, addrspace 5) +# BASIC: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE +# BASIC: renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = SI_SPILL_V256_RESTORE +# BASIC: dead renamable $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE +# BASIC: S_NOP 0, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit killed renamable $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit undef renamable $vgpr0_vgpr1_vgpr2_vgpr3 +# BASIC: renamable $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE + +--- | + define void @killed_reg_after_regalloc_failure() #0 { + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="10,10" } + +... +--- +name: killed_reg_after_regalloc_failure +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128 + S_NOP 0, implicit-def %3:vreg_64 + S_NOP 0, implicit %0, implicit %1, implicit %2 + S_NOP 0, implicit %3 + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll new file mode 100644 index 0000000000000..5e466a9470fc5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/register-killed-error-after-alloc-failure1.ll @@ -0,0 +1,30 @@ +; RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR -implicit-check-not=error %s + +; ERR: error: inline assembly requires more registers than available +; ERR-NOT: ERROR +; ERR-NOT: Bad machine code + +; This test requires respecting undef on the spill source operand when +; expanding the pseudos to avoid all verifier errors + +%asm.output = type { <16 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, <3 x i32> } + +define void @foo(<32 x i32> addrspace(1)* %arg) #0 { + %agpr0 = call i32 asm sideeffect "; def $0","=${a0}"() + %asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,=v"() + %vgpr0 = extractvalue %asm.output %asm, 0 + %vgpr1 = extractvalue %asm.output %asm, 1 + %vgpr2 = extractvalue %asm.output %asm, 2 + %vgpr3 = extractvalue %asm.output %asm, 3 + %vgpr4 = extractvalue %asm.output %asm, 4 + call void asm sideeffect "; clobber", "~{a[0:31]},~{v[0:31]}"() + call void asm sideeffect "; use $0","v"(<16 x i32> %vgpr0) + call void asm sideeffect "; use $0","v"(<8 x i32> %vgpr1) + call void asm sideeffect "; use $0","v"(<4 x i32> %vgpr2) + call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr3) + call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr4) + call void asm sideeffect "; use $0","{a1}"(i32 %agpr0) + ret void +} + +attributes #0 = { "amdgpu-waves-per-eu"="8,8" } diff --git a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll index 8e3054cceb85b..d09d6608b216e 100644 --- a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll @@ -1,4 +1,4 @@ -; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s ; This testcase fails register allocation at the same time it performs ; virtual register splitting (by introducing VGPR to AGPR copies). We @@ -11,7 +11,6 @@ ; it takes the first avialable register. ; CHECK: error: :0:0: ran out of registers during register allocation -; CHECK: Bad machine code: Using an undefined physical register define amdgpu_kernel void @alloc_failure_with_split_vregs(float %v0, float %v1) #0 { %agpr0 = call float asm sideeffect "; def $0", "=${a0}"() %agpr.vec = insertelement <16 x float> undef, float %agpr0, i32 0 diff --git a/llvm/test/CodeGen/X86/inline-asm-assertion.ll b/llvm/test/CodeGen/X86/inline-asm-assertion.ll index d5507a1c44170..c6f4093760b97 100644 --- a/llvm/test/CodeGen/X86/inline-asm-assertion.ll +++ b/llvm/test/CodeGen/X86/inline-asm-assertion.ll @@ -1,5 +1,5 @@ ; RUN: not llc -verify-machineinstrs -O0 < %s 2>&1 | FileCheck %s -; RUN: not --crash llc -verify-machineinstrs -O2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-O2 +; RUN: not llc -verify-machineinstrs -O2 < %s 2>&1 | FileCheck %s --check-prefix=CHECK-O2 ; CHECK: error: inline assembly requires more registers than available ; CHECK: .size main, .Lfunc_end0-main ; CHECK-O2: error: inline assembly requires more registers than available