From 9ea29dce91f6fba3e5c60f7c2448eecd3193d37c Mon Sep 17 00:00:00 2001 From: Valery Pykhtin Date: Tue, 5 Dec 2023 07:36:40 +0100 Subject: [PATCH] [AMDGPU] Fix GCNUpwardRPTracker: max pressure on defs. --- llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 50 ++--- llvm/lib/Target/AMDGPU/GCNRegPressure.h | 26 +++ .../CodeGen/AMDGPU/regpressure_printer.mir | 171 ++++++------------ 3 files changed, 112 insertions(+), 135 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 5ebf834377f2c..fd8f0bebd3bec 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -274,32 +274,42 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) { if (MI.isDebugInstr()) return; - auto DecrementDef = [this](const MachineOperand &MO) { + // Kill all defs. + GCNRegPressure DefPressure, ECDefPressure; + bool HasECDefs = false; + for (const MachineOperand &MO : MI.all_defs()) { + if (!MO.getReg().isVirtual()) + continue; + Register Reg = MO.getReg(); + LaneBitmask DefMask = getDefRegMask(MO, *MRI); + + // Treat a def as fully live at the moment of definition: keep a record. + if (MO.isEarlyClobber()) { + ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI); + HasECDefs = true; + } else + DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI); + auto I = LiveRegs.find(Reg); if (I == LiveRegs.end()) - return; + continue; LaneBitmask &LiveMask = I->second; LaneBitmask PrevMask = LiveMask; - LiveMask &= ~getDefRegMask(MO, *MRI); + LiveMask &= ~DefMask; CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); if (LiveMask.none()) LiveRegs.erase(I); - }; - - // Decrement non-early-clobber defs. - SmallVector EarlyClobberDefs; - for (const MachineOperand &MO : MI.all_defs()) { - if (!MO.getReg().isVirtual()) - continue; - if (!MO.isEarlyClobber()) - DecrementDef(MO); - else - EarlyClobberDefs.push_back(&MO); } - // Increment uses. + // Update MaxPressure with defs pressure. + DefPressure += CurPressure; + if (HasECDefs) + DefPressure += ECDefPressure; + MaxPressure = max(DefPressure, MaxPressure); + + // Make uses alive. SmallVector RegUses; collectVirtualRegUses(RegUses, MI, LIS, *MRI); for (const RegisterMaskPair &U : RegUses) { @@ -309,13 +319,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) { CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI); } - // Point of maximum pressure: non-early-clobber defs are decremented and uses - // are incremented. - MaxPressure = max(CurPressure, MaxPressure); - - // Now decrement early clobber defs. - for (const MachineOperand *MO : EarlyClobberDefs) - DecrementDef(*MO); + // Update MaxPressure with uses plus early-clobber defs pressure. + MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure) + : max(CurPressure, MaxPressure); assert(CurPressure == getRegPressure(*MRI, LiveRegs)); } diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index e21bf10d795ba..4100970fe1a96 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -85,6 +85,18 @@ struct GCNRegPressure { return !(*this == O); } + GCNRegPressure &operator+=(const GCNRegPressure &RHS) { + for (unsigned I = 0; I < TOTAL_KINDS; ++I) + Value[I] += RHS.Value[I]; + return *this; + } + + GCNRegPressure &operator-=(const GCNRegPressure &RHS) { + for (unsigned I = 0; I < TOTAL_KINDS; ++I) + Value[I] -= RHS.Value[I]; + return *this; + } + void dump() const; private: @@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) { return Res; } +inline GCNRegPressure operator+(const GCNRegPressure &P1, + const GCNRegPressure &P2) { + GCNRegPressure Sum = P1; + Sum += P2; + return Sum; +} + +inline GCNRegPressure operator-(const GCNRegPressure &P1, + const GCNRegPressure &P2) { + GCNRegPressure Diff = P1; + Diff -= P2; + return Diff; +} + class GCNRPTracker { public: using LiveRegSet = DenseMap; diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir index 83e85ccf7f8f1..f0c5ba489ef3d 100644 --- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir +++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir @@ -47,87 +47,46 @@ body: | name: live_through_test tracksRegLiveness: true body: | - ; RPU-LABEL: name: live_through_test - ; RPU: bb.0: - ; RPU-NEXT: Live-in: - ; RPU-NEXT: SGPR VGPR - ; RPU-NEXT: 0 0 - ; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF - ; RPU-NEXT: 3 0 - ; RPU-NEXT: Live-out: %0:00000000000000F3 - ; RPU-NEXT: Live-thr: - ; RPU-NEXT: 0 0 - ; RPU-NEXT: bb.1: - ; RPU-NEXT: Live-in: %0:00000000000000F3 - ; RPU-NEXT: SGPR VGPR - ; RPU-NEXT: 3 0 - ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128 - ; RPU-NEXT: 2 0 - ; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF - ; RPU-NEXT: 3 0 - ; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF - ; RPU-NEXT: 3 0 - ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 - ; RPU-NEXT: 2 0 - ; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF - ; RPU-NEXT: 3 0 - ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 - ; RPU-NEXT: 2 0 - ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128 - ; RPU-NEXT: 2 0 - ; RPU-NEXT: Live-out: %0:00000000000000C3 - ; RPU-NEXT: Live-thr: %0:00000000000000C0 - ; RPU-NEXT: 1 0 - ; RPU-NEXT: bb.2: - ; RPU-NEXT: Live-in: %0:00000000000000C3 - ; RPU-NEXT: SGPR VGPR - ; RPU-NEXT: 2 0 - ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128 - ; RPU-NEXT: 0 0 - ; RPU-NEXT: Live-out: - ; RPU-NEXT: Live-thr: - ; RPU-NEXT: 0 0 - ; - ; RPD-LABEL: name: live_through_test - ; RPD: bb.0: - ; RPD-NEXT: Live-in: - ; RPD-NEXT: SGPR VGPR - ; RPD-NEXT: 0 0 - ; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF - ; RPD-NEXT: 3 0 - ; RPD-NEXT: Live-out: %0:00000000000000F3 - ; RPD-NEXT: Live-thr: - ; RPD-NEXT: 0 0 - ; RPD-NEXT: bb.1: - ; RPD-NEXT: Live-in: %0:00000000000000F3 - ; RPD-NEXT: SGPR VGPR - ; RPD-NEXT: 3 0 - ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128 - ; RPD-NEXT: 2 0 - ; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF - ; RPD-NEXT: 3 0 - ; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF - ; RPD-NEXT: 3 0 - ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 - ; RPD-NEXT: 2 0 - ; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF - ; RPD-NEXT: 3 0 - ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 - ; RPD-NEXT: 2 0 - ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128 - ; RPD-NEXT: 2 0 - ; RPD-NEXT: Live-out: %0:00000000000000C3 - ; RPD-NEXT: Live-thr: %0:00000000000000C0 - ; RPD-NEXT: 1 0 - ; RPD-NEXT: bb.2: - ; RPD-NEXT: Live-in: %0:00000000000000C3 - ; RPD-NEXT: SGPR VGPR - ; RPD-NEXT: 2 0 - ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128 - ; RPD-NEXT: 0 0 - ; RPD-NEXT: Live-out: - ; RPD-NEXT: Live-thr: - ; RPD-NEXT: 0 0 + ; RP-LABEL: name: live_through_test + ; RP: bb.0: + ; RP-NEXT: Live-in: + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 0 0 + ; RP-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF + ; RP-NEXT: 3 0 + ; RP-NEXT: Live-out: %0:00000000000000F3 + ; RP-NEXT: Live-thr: + ; RP-NEXT: 0 0 + ; RP-NEXT: bb.1: + ; RP-NEXT: Live-in: %0:00000000000000F3 + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 3 0 + ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128 + ; RP-NEXT: 2 0 + ; RP-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF + ; RP-NEXT: 3 0 + ; RP-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF + ; RP-NEXT: 3 0 + ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RP-NEXT: 2 0 + ; RP-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF + ; RP-NEXT: 3 0 + ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128 + ; RP-NEXT: 2 0 + ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128 + ; RP-NEXT: 2 0 + ; RP-NEXT: Live-out: %0:00000000000000C3 + ; RP-NEXT: Live-thr: %0:00000000000000C0 + ; RP-NEXT: 1 0 + ; RP-NEXT: bb.2: + ; RP-NEXT: Live-in: %0:00000000000000C3 + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 2 0 + ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128 + ; RP-NEXT: 0 0 + ; RP-NEXT: Live-out: + ; RP-NEXT: Live-thr: + ; RP-NEXT: 0 0 bb.0: %0:sgpr_128 = IMPLICIT_DEF bb.1: @@ -223,7 +182,7 @@ body: | ; RPU-NEXT: 0 7 ; RPU-NEXT: 0 7 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec ; RPU-NEXT: 0 6 - ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF + ; RPU-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF ; RPU-NEXT: 0 7 ; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF ; RPU-NEXT: 0 9 @@ -262,7 +221,7 @@ body: | ; RPU-NEXT: 0 12 ; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec ; RPU-NEXT: 0 10 - ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec + ; RPU-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec ; RPU-NEXT: 0 10 ; RPU-NEXT: 0 10 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec ; RPU-NEXT: 0 9 @@ -550,7 +509,7 @@ body: | ; RPU-NEXT: 0 0 ; RPU-NEXT: 0 0 $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0 ; RPU-NEXT: 0 0 - ; RPU-NEXT: 0 0 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec + ; RPU-NEXT: 0 1 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec ; RPU-NEXT: 0 0 ; RPU-NEXT: 0 0 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc ; RPU-NEXT: 0 0 @@ -569,7 +528,7 @@ body: | ; RPU-NEXT: 0 1 ; RPU-NEXT: 0 1 $m0 = S_MOV_B32 killed $sgpr0 ; RPU-NEXT: 0 1 - ; RPU-NEXT: 0 1 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec + ; RPU-NEXT: 0 16 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec ; RPU-NEXT: 0 1 ; RPU-NEXT: Live-out: %0:0000000000000C00 ; RPU-NEXT: Live-thr: @@ -709,33 +668,19 @@ tracksRegLiveness: true body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - ; RPU-LABEL: name: test_partially_used_early_clobber_def - ; RPU: Live-in: - ; RPU-NEXT: SGPR VGPR - ; RPU-NEXT: 0 0 - ; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; RPU-NEXT: 4 0 - ; RPU-NEXT: 5 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128 - ; RPU-NEXT: 1 0 - ; RPU-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128 - ; RPU-NEXT: 0 0 - ; RPU-NEXT: Live-out: - ; RPU-NEXT: Live-thr: - ; RPU-NEXT: 0 0 - ; - ; RPD-LABEL: name: test_partially_used_early_clobber_def - ; RPD: Live-in: - ; RPD-NEXT: SGPR VGPR - ; RPD-NEXT: 0 0 - ; RPD-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; RPD-NEXT: 4 0 - ; RPD-NEXT: 8 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128 - ; RPD-NEXT: 1 0 - ; RPD-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128 - ; RPD-NEXT: 0 0 - ; RPD-NEXT: Live-out: - ; RPD-NEXT: Live-thr: - ; RPD-NEXT: 0 0 + ; RP-LABEL: name: test_partially_used_early_clobber_def + ; RP: Live-in: + ; RP-NEXT: SGPR VGPR + ; RP-NEXT: 0 0 + ; RP-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; RP-NEXT: 4 0 + ; RP-NEXT: 8 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128 + ; RP-NEXT: 1 0 + ; RP-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128 + ; RP-NEXT: 0 0 + ; RP-NEXT: Live-out: + ; RP-NEXT: Live-thr: + ; RP-NEXT: 0 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 early-clobber %1:sgpr_128 = COPY %0:sgpr_128 S_NOP 0, implicit %1.sub1 @@ -752,7 +697,7 @@ body: | ; RPU-NEXT: 0 0 ; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; RPU-NEXT: 4 0 - ; RPU-NEXT: 7 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128 + ; RPU-NEXT: 16 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128 ; RPU-NEXT: 6 0 ; RPU-NEXT: 6 0 S_NOP 0, implicit %1.sub1:sgpr_128, implicit %2.sub0_sub1:sgpr_128, implicit %3.sub0_sub1_sub2:sgpr_128 ; RPU-NEXT: 0 0