diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 5ebf834377f2c..f771a409f4d1a 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -183,18 +183,13 @@ collectVirtualRegUses(SmallVectorImpl &RegMaskPairs, })) continue; - LaneBitmask UseMask; - auto &LI = LIS.getInterval(Reg); - if (!LI.hasSubRanges()) - UseMask = MRI.getMaxLaneMaskForVReg(Reg); - else { - // For a tentative schedule LIS isn't updated yet but livemask should - // remain the same on any schedule. Subreg defs can be reordered but they - // all must dominate uses anyway. - if (!InstrSI) - InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex(); - UseMask = getLiveLaneMask(LI, InstrSI, MRI); - } + if (!InstrSI) + InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex(); + + // For a tentative schedule LIS isn't updated yet but livemask should + // remain the same on any schedule. Subreg defs can be reordered but they + // all must dominate uses anyway. + LaneBitmask UseMask = getLiveLaneMask(LIS.getInterval(Reg), InstrSI, MRI); RegMaskPairs.emplace_back(Reg, UseMask); } @@ -274,48 +269,48 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) { if (MI.isDebugInstr()) return; - auto DecrementDef = [this](const MachineOperand &MO) { + // Kill all defs. + GCNRegPressure DefPressure, ECDefPressure; + for (const MachineOperand &MO : MI.all_defs()) { + if (!MO.getReg().isVirtual()) + continue; + Register Reg = MO.getReg(); + LaneBitmask DefMask = getDefRegMask(MO, *MRI); + + // Treat a def as fully live at the moment of definition: keep a record. + (MO.isEarlyClobber() ? &ECDefPressure : &DefPressure) + ->inc(Reg, LaneBitmask::getNone(), DefMask, *MRI); + auto I = LiveRegs.find(Reg); if (I == LiveRegs.end()) - return; + continue; LaneBitmask &LiveMask = I->second; LaneBitmask PrevMask = LiveMask; - LiveMask &= ~getDefRegMask(MO, *MRI); + LiveMask &= ~DefMask; CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); if (LiveMask.none()) LiveRegs.erase(I); - }; - - // Decrement non-early-clobber defs. - SmallVector EarlyClobberDefs; - for (const MachineOperand &MO : MI.all_defs()) { - if (!MO.getReg().isVirtual()) - continue; - if (!MO.isEarlyClobber()) - DecrementDef(MO); - else - EarlyClobberDefs.push_back(&MO); } - // Increment uses. + // Update MaxPressure with defs pressure. + MaxPressure = max(CurPressure + DefPressure + ECDefPressure, MaxPressure); + + // Make uses alive. SmallVector RegUses; collectVirtualRegUses(RegUses, MI, LIS, *MRI); - for (const RegisterMaskPair &U : RegUses) { - LaneBitmask &LiveMask = LiveRegs[U.RegUnit]; + for (auto [Reg, LaneMask] : RegUses) { + if (LaneMask.none()) + continue; + LaneBitmask &LiveMask = LiveRegs[Reg]; LaneBitmask PrevMask = LiveMask; - LiveMask |= U.LaneMask; - CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI); + LiveMask |= LaneMask; + CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); } - // Point of maximum pressure: non-early-clobber defs are decremented and uses - // are incremented. - MaxPressure = max(CurPressure, MaxPressure); - - // Now decrement early clobber defs. - for (const MachineOperand *MO : EarlyClobberDefs) - DecrementDef(*MO); + // Update MaxPressure with all uses alive plus early-clobber defs pressure. + MaxPressure = max(CurPressure + ECDefPressure, MaxPressure); assert(CurPressure == getRegPressure(*MRI, LiveRegs)); } diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index e21bf10d795ba..4100970fe1a96 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -85,6 +85,18 @@ struct GCNRegPressure { return !(*this == O); } + GCNRegPressure &operator+=(const GCNRegPressure &RHS) { + for (unsigned I = 0; I < TOTAL_KINDS; ++I) + Value[I] += RHS.Value[I]; + return *this; + } + + GCNRegPressure &operator-=(const GCNRegPressure &RHS) { + for (unsigned I = 0; I < TOTAL_KINDS; ++I) + Value[I] -= RHS.Value[I]; + return *this; + } + void dump() const; private: @@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) { return Res; } +inline GCNRegPressure operator+(const GCNRegPressure &P1, + const GCNRegPressure &P2) { + GCNRegPressure Sum = P1; + Sum += P2; + return Sum; +} + +inline GCNRegPressure operator-(const GCNRegPressure &P1, + const GCNRegPressure &P2) { + GCNRegPressure Diff = P1; + Diff -= P2; + return Diff; +} + class GCNRPTracker { public: using LiveRegSet = DenseMap; diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir index a1722c42b189f..d5b62af4b02b0 100644 --- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir +++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir @@ -47,87 +47,46 @@ body: | name: live_through_test tracksRegLiveness: true body: | - ; RPU-LABEL: name: live_through_test - ; RPU: bb.0: - ; RPU-NEXT: Live-in: - ; RPU-NEXT: SGPR VGPR - ; RPU-NEXT: 0 0 - ; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF - ; RPU-NEXT: 3 0 - ; RPU-NEXT: Live-out: %0:00000000000000F3 - ; RPU-NEXT: Live-thr: - ; RPU-NEXT: 0 0 - ; RPU-NEXT: bb.1: - ; RPU-NEXT: Live-in: %0:00000000000000F3 - ; RPU-NEXT: SGPR VGPR - ; RPU-NEXT: 3 0 - ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128 - ; RPU-NEXT: 2 0 - ; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF - ; RPU-NEXT: 3 0 - ; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF - ; RPU-NEXT: 3 0 - ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 - ; RPU-NEXT: 2 0 - ; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF - ; RPU-NEXT: 3 0 - ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 - ; RPU-NEXT: 2 0 - ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128 - ; RPU-NEXT: 2 0 - ; RPU-NEXT: Live-out: %0:00000000000000C3 - ; RPU-NEXT: Live-thr: %0:00000000000000C0 - ; RPU-NEXT: 1 0 - ; RPU-NEXT: bb.2: - ; RPU-NEXT: Live-in: %0:00000000000000C3 - ; RPU-NEXT: SGPR VGPR - ; RPU-NEXT: 2 0 - ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128 - ; RPU-NEXT: 0 0 - ; RPU-NEXT: Live-out: - ; RPU-NEXT: Live-thr: - ; RPU-NEXT: 0 0 - ; - ; RPD-LABEL: name: live_through_test - ; RPD: bb.0: - ; RPD-NEXT: Live-in: - ; RPD-NEXT: SGPR VGPR - ; RPD-NEXT: 0 0 - ; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF - ; RPD-NEXT: 3 0 - ; RPD-NEXT: Live-out: %0:00000000000000F3 - ; RPD-NEXT: Live-thr: - ; RPD-NEXT: 0 0 - ; RPD-NEXT: bb.1: - ; RPD-NEXT: Live-in: %0:00000000000000F3 - ; RPD-NEXT: SGPR VGPR - ; RPD-NEXT: 3 0 - ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128 - ; RPD-NEXT: 2 0 - ; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF - ; RPD-NEXT: 3 0 - ; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF - ; RPD-NEXT: 3 0 - ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 - ; RPD-NEXT: 2 0 - ; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF - ; RPD-NEXT: 3 0 - ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 - ; RPD-NEXT: 2 0 - ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128 - ; RPD-NEXT: 2 0 - ; RPD-NEXT: Live-out: %0:00000000000000C3 - ; RPD-NEXT: Live-thr: %0:00000000000000C0 - ; RPD-NEXT: 1 0 - ; RPD-NEXT: bb.2: - ; RPD-NEXT: Live-in: %0:00000000000000C3 - ; RPD-NEXT: SGPR VGPR - ; RPD-NEXT: 2 0 - ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128 - ; RPD-NEXT: 0 0 - ; RPD-NEXT: Live-out: - ; RPD-NEXT: Live-thr: - ; RPD-NEXT: 0 0 + ; RP-LABEL: name: live_through_test + ; RP: bb.0: + ; RP-NEXT: Live-in: + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 0 0 + ; RP-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF + ; RP-NEXT: 3 0 + ; RP-NEXT: Live-out: %0:00000000000000F3 + ; RP-NEXT: Live-thr: + ; RP-NEXT: 0 0 + ; RP-NEXT: bb.1: + ; RP-NEXT: Live-in: %0:00000000000000F3 + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 3 0 + ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128 + ; RP-NEXT: 2 0 + ; RP-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF + ; RP-NEXT: 3 0 + ; RP-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF + ; RP-NEXT: 3 0 + ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RP-NEXT: 2 0 + ; RP-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF + ; RP-NEXT: 3 0 + ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RP-NEXT: 2 0 + ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128 + ; RP-NEXT: 2 0 + ; RP-NEXT: Live-out: %0:00000000000000C3 + ; RP-NEXT: Live-thr: %0:00000000000000C0 + ; RP-NEXT: 1 0 + ; RP-NEXT: bb.2: + ; RP-NEXT: Live-in: %0:00000000000000C3 + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 2 0 + ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128 + ; RP-NEXT: 0 0 + ; RP-NEXT: Live-out: + ; RP-NEXT: Live-thr: + ; RP-NEXT: 0 0 bb.0: %0:sgpr_128 = IMPLICIT_DEF bb.1: @@ -223,7 +182,7 @@ body: | ; RPU-NEXT: 0 7 ; RPU-NEXT: 0 7 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec ; RPU-NEXT: 0 6 - ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF ; RPU-NEXT: 0 7 ; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF ; RPU-NEXT: 0 9 @@ -262,7 +221,7 @@ body: | ; RPU-NEXT: 0 12 ; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec ; RPU-NEXT: 0 10 - ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec ; RPU-NEXT: 0 10 ; RPU-NEXT: 0 10 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec ; RPU-NEXT: 0 9 @@ -550,7 +509,7 @@ body: | ; RPU-NEXT: 0 0 ; RPU-NEXT: 0 0 $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0 ; RPU-NEXT: 0 0 - ; RPU-NEXT: 0 0 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec + ; RPU-NEXT: 0 1 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec ; RPU-NEXT: 0 0 ; RPU-NEXT: 0 0 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc ; RPU-NEXT: 0 0 @@ -569,7 +528,7 @@ body: | ; RPU-NEXT: 0 1 ; RPU-NEXT: 0 1 $m0 = S_MOV_B32 killed $sgpr0 ; RPU-NEXT: 0 1 - ; RPU-NEXT: 0 1 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec + ; RPU-NEXT: 0 16 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec ; RPU-NEXT: 0 1 ; RPU-NEXT: Live-out: %0:0000000000000C00 ; RPU-NEXT: Live-thr: @@ -666,3 +625,70 @@ body: | EXP_DONE 0, %49:vgpr_32, undef %51:vgpr_32, undef %53:vgpr_32, undef %55:vgpr_32, -1, 0, 1, implicit $exec S_ENDPGM 0 ... +--- +name: early_clobber_def_used_on_rhs +registers: + - { id: 0, class: vgpr_32 } +body: | + ; RPU-LABEL: name: early_clobber_def_used_on_rhs + ; RPU: bb.0: + ; RPU-NEXT: Live-in: + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 0 1 dead %3:vgpr_32 = COPY $vgpr0 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 0 1 early-clobber %2:vgpr_32 = COPY %0:vgpr_32 + ; RPU-NEXT: 0 1 + ; RPU-NEXT: 0 1 S_NOP 0, implicit %2:vgpr_32 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: Live-out: + ; RPU-NEXT: Live-thr: + ; RPU-NEXT: 0 0 + ; RPU-NEXT: bb.1: + ; RPU-NEXT: Live-in: + ; RPU-NEXT: SGPR VGPR + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 0 1 dead %1:vgpr_32 = COPY $vgpr0 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: 0 1 dead %0:vgpr_32 = COPY $vgpr0 + ; RPU-NEXT: 0 0 + ; RPU-NEXT: Live-out: + ; RPU-NEXT: Live-thr: + ; RPU-NEXT: 0 0 + ; + ; RPD-LABEL: name: early_clobber_def_used_on_rhs + ; RPD: bb.0: + ; RPD-NEXT: Live-in: + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 1 dead %3:vgpr_32 = COPY $vgpr0 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 1 early-clobber %2:vgpr_32 = COPY %0:vgpr_32 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 0 S_NOP 0, implicit %2:vgpr_32 + ; RPD-NEXT: 0 -1 + ; RPD-NEXT: Live-out: + ; RPD-NEXT: mis LIS: + ; RPD-NEXT: Live-thr: + ; RPD-NEXT: 0 0 + ; RPD-NEXT: bb.1: + ; RPD-NEXT: Live-in: + ; RPD-NEXT: SGPR VGPR + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 1 dead %1:vgpr_32 = COPY $vgpr0 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: 0 1 dead %0:vgpr_32 = COPY $vgpr0 + ; RPD-NEXT: 0 0 + ; RPD-NEXT: Live-out: + ; RPD-NEXT: Live-thr: + ; RPD-NEXT: 0 0 + bb.0: + liveins: $vgpr0 + %0 = COPY $vgpr0 + early-clobber %0 = COPY %0 + S_NOP 0, implicit %0 + bb.1: + liveins: $vgpr0 + %0 = COPY $vgpr0 + %0 = COPY $vgpr0 ; Force isSSA = false +...