Skip to content

Commit 328b537

Browse files
committed
[AMDGPU] Fix GCNUpwardRPTracker.
1 parent d0dd2e2 commit 328b537

File tree

3 files changed

+109
-131
lines changed

3 files changed

+109
-131
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 33 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -183,18 +183,13 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
183183
}))
184184
continue;
185185

186-
LaneBitmask UseMask;
187-
auto &LI = LIS.getInterval(Reg);
188-
if (!LI.hasSubRanges())
189-
UseMask = MRI.getMaxLaneMaskForVReg(Reg);
190-
else {
191-
// For a tentative schedule LIS isn't updated yet but livemask should
192-
// remain the same on any schedule. Subreg defs can be reordered but they
193-
// all must dominate uses anyway.
194-
if (!InstrSI)
195-
InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
196-
UseMask = getLiveLaneMask(LI, InstrSI, MRI);
197-
}
186+
if (!InstrSI)
187+
InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
188+
189+
// For a tentative schedule LIS isn't updated yet but livemask should
190+
// remain the same on any schedule. Subreg defs can be reordered but they
191+
// all must dominate uses anyway.
192+
LaneBitmask UseMask = getLiveLaneMask(LIS.getInterval(Reg), InstrSI, MRI);
198193

199194
RegMaskPairs.emplace_back(Reg, UseMask);
200195
}
@@ -274,48 +269,48 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
274269
if (MI.isDebugInstr())
275270
return;
276271

277-
auto DecrementDef = [this](const MachineOperand &MO) {
272+
// Kill all defs.
273+
GCNRegPressure DefPressure, ECDefPressure;
274+
for (const MachineOperand &MO : MI.all_defs()) {
275+
if (!MO.getReg().isVirtual())
276+
continue;
277+
278278
Register Reg = MO.getReg();
279+
LaneBitmask DefMask = getDefRegMask(MO, *MRI);
280+
281+
// Treat a def as fully live at the moment of definition: keep a record.
282+
(MO.isEarlyClobber() ? &ECDefPressure : &DefPressure)
283+
->inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
284+
279285
auto I = LiveRegs.find(Reg);
280286
if (I == LiveRegs.end())
281-
return;
287+
continue;
282288

283289
LaneBitmask &LiveMask = I->second;
284290
LaneBitmask PrevMask = LiveMask;
285-
LiveMask &= ~getDefRegMask(MO, *MRI);
291+
LiveMask &= ~DefMask;
286292
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
287293
if (LiveMask.none())
288294
LiveRegs.erase(I);
289-
};
290-
291-
// Decrement non-early-clobber defs.
292-
SmallVector<const MachineOperand *, 2> EarlyClobberDefs;
293-
for (const MachineOperand &MO : MI.all_defs()) {
294-
if (!MO.getReg().isVirtual())
295-
continue;
296-
if (!MO.isEarlyClobber())
297-
DecrementDef(MO);
298-
else
299-
EarlyClobberDefs.push_back(&MO);
300295
}
301296

302-
// Increment uses.
297+
// Update MaxPressure with defs pressure.
298+
MaxPressure = max(CurPressure + DefPressure + ECDefPressure, MaxPressure);
299+
300+
// Make uses alive.
303301
SmallVector<RegisterMaskPair, 8> RegUses;
304302
collectVirtualRegUses(RegUses, MI, LIS, *MRI);
305-
for (const RegisterMaskPair &U : RegUses) {
306-
LaneBitmask &LiveMask = LiveRegs[U.RegUnit];
303+
for (auto [Reg, LaneMask] : RegUses) {
304+
if (LaneMask.none())
305+
continue;
306+
LaneBitmask &LiveMask = LiveRegs[Reg];
307307
LaneBitmask PrevMask = LiveMask;
308-
LiveMask |= U.LaneMask;
309-
CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
308+
LiveMask |= LaneMask;
309+
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
310310
}
311311

312-
// Point of maximum pressure: non-early-clobber defs are decremented and uses
313-
// are incremented.
314-
MaxPressure = max(CurPressure, MaxPressure);
315-
316-
// Now decrement early clobber defs.
317-
for (const MachineOperand *MO : EarlyClobberDefs)
318-
DecrementDef(*MO);
312+
// Update MaxPressure with all uses alive plus early-clobber defs pressure.
313+
MaxPressure = max(CurPressure + ECDefPressure, MaxPressure);
319314

320315
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
321316
}

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,18 @@ struct GCNRegPressure {
8585
return !(*this == O);
8686
}
8787

88+
GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
89+
for (unsigned I = 0; I < TOTAL_KINDS; ++I)
90+
Value[I] += RHS.Value[I];
91+
return *this;
92+
}
93+
94+
GCNRegPressure &operator-=(const GCNRegPressure &RHS) {
95+
for (unsigned I = 0; I < TOTAL_KINDS; ++I)
96+
Value[I] -= RHS.Value[I];
97+
return *this;
98+
}
99+
88100
void dump() const;
89101

90102
private:
@@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
105117
return Res;
106118
}
107119

120+
inline GCNRegPressure operator+(const GCNRegPressure &P1,
121+
const GCNRegPressure &P2) {
122+
GCNRegPressure Sum = P1;
123+
Sum += P2;
124+
return Sum;
125+
}
126+
127+
inline GCNRegPressure operator-(const GCNRegPressure &P1,
128+
const GCNRegPressure &P2) {
129+
GCNRegPressure Diff = P1;
130+
Diff -= P2;
131+
return Diff;
132+
}
133+
108134
class GCNRPTracker {
109135
public:
110136
using LiveRegSet = DenseMap<unsigned, LaneBitmask>;

llvm/test/CodeGen/AMDGPU/regpressure_printer.mir

Lines changed: 50 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -47,87 +47,46 @@ body: |
4747
name: live_through_test
4848
tracksRegLiveness: true
4949
body: |
50-
; RPU-LABEL: name: live_through_test
51-
; RPU: bb.0:
52-
; RPU-NEXT: Live-in:
53-
; RPU-NEXT: SGPR VGPR
54-
; RPU-NEXT: 0 0
55-
; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF
56-
; RPU-NEXT: 3 0
57-
; RPU-NEXT: Live-out: %0:00000000000000F3
58-
; RPU-NEXT: Live-thr:
59-
; RPU-NEXT: 0 0
60-
; RPU-NEXT: bb.1:
61-
; RPU-NEXT: Live-in: %0:00000000000000F3
62-
; RPU-NEXT: SGPR VGPR
63-
; RPU-NEXT: 3 0
64-
; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
65-
; RPU-NEXT: 2 0
66-
; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
67-
; RPU-NEXT: 3 0
68-
; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
69-
; RPU-NEXT: 3 0
70-
; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
71-
; RPU-NEXT: 2 0
72-
; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
73-
; RPU-NEXT: 3 0
74-
; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
75-
; RPU-NEXT: 2 0
76-
; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
77-
; RPU-NEXT: 2 0
78-
; RPU-NEXT: Live-out: %0:00000000000000C3
79-
; RPU-NEXT: Live-thr: %0:00000000000000C0
80-
; RPU-NEXT: 1 0
81-
; RPU-NEXT: bb.2:
82-
; RPU-NEXT: Live-in: %0:00000000000000C3
83-
; RPU-NEXT: SGPR VGPR
84-
; RPU-NEXT: 2 0
85-
; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
86-
; RPU-NEXT: 0 0
87-
; RPU-NEXT: Live-out:
88-
; RPU-NEXT: Live-thr:
89-
; RPU-NEXT: 0 0
90-
;
91-
; RPD-LABEL: name: live_through_test
92-
; RPD: bb.0:
93-
; RPD-NEXT: Live-in:
94-
; RPD-NEXT: SGPR VGPR
95-
; RPD-NEXT: 0 0
96-
; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
97-
; RPD-NEXT: 3 0
98-
; RPD-NEXT: Live-out: %0:00000000000000F3
99-
; RPD-NEXT: Live-thr:
100-
; RPD-NEXT: 0 0
101-
; RPD-NEXT: bb.1:
102-
; RPD-NEXT: Live-in: %0:00000000000000F3
103-
; RPD-NEXT: SGPR VGPR
104-
; RPD-NEXT: 3 0
105-
; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
106-
; RPD-NEXT: 2 0
107-
; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
108-
; RPD-NEXT: 3 0
109-
; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
110-
; RPD-NEXT: 3 0
111-
; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
112-
; RPD-NEXT: 2 0
113-
; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
114-
; RPD-NEXT: 3 0
115-
; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
116-
; RPD-NEXT: 2 0
117-
; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
118-
; RPD-NEXT: 2 0
119-
; RPD-NEXT: Live-out: %0:00000000000000C3
120-
; RPD-NEXT: Live-thr: %0:00000000000000C0
121-
; RPD-NEXT: 1 0
122-
; RPD-NEXT: bb.2:
123-
; RPD-NEXT: Live-in: %0:00000000000000C3
124-
; RPD-NEXT: SGPR VGPR
125-
; RPD-NEXT: 2 0
126-
; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
127-
; RPD-NEXT: 0 0
128-
; RPD-NEXT: Live-out:
129-
; RPD-NEXT: Live-thr:
130-
; RPD-NEXT: 0 0
50+
; RP-LABEL: name: live_through_test
51+
; RP: bb.0:
52+
; RP-NEXT: Live-in:
53+
; RP-NEXT: SGPR VGPR
54+
; RP-NEXT: 0 0
55+
; RP-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
56+
; RP-NEXT: 3 0
57+
; RP-NEXT: Live-out: %0:00000000000000F3
58+
; RP-NEXT: Live-thr:
59+
; RP-NEXT: 0 0
60+
; RP-NEXT: bb.1:
61+
; RP-NEXT: Live-in: %0:00000000000000F3
62+
; RP-NEXT: SGPR VGPR
63+
; RP-NEXT: 3 0
64+
; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
65+
; RP-NEXT: 2 0
66+
; RP-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
67+
; RP-NEXT: 3 0
68+
; RP-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
69+
; RP-NEXT: 3 0
70+
; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
71+
; RP-NEXT: 2 0
72+
; RP-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
73+
; RP-NEXT: 3 0
74+
; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
75+
; RP-NEXT: 2 0
76+
; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
77+
; RP-NEXT: 2 0
78+
; RP-NEXT: Live-out: %0:00000000000000C3
79+
; RP-NEXT: Live-thr: %0:00000000000000C0
80+
; RP-NEXT: 1 0
81+
; RP-NEXT: bb.2:
82+
; RP-NEXT: Live-in: %0:00000000000000C3
83+
; RP-NEXT: SGPR VGPR
84+
; RP-NEXT: 2 0
85+
; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
86+
; RP-NEXT: 0 0
87+
; RP-NEXT: Live-out:
88+
; RP-NEXT: Live-thr:
89+
; RP-NEXT: 0 0
13190
bb.0:
13291
%0:sgpr_128 = IMPLICIT_DEF
13392
bb.1:
@@ -223,7 +182,7 @@ body: |
223182
; RPU-NEXT: 0 7
224183
; RPU-NEXT: 0 7 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
225184
; RPU-NEXT: 0 6
226-
; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
185+
; RPU-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
227186
; RPU-NEXT: 0 7
228187
; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
229188
; RPU-NEXT: 0 9
@@ -262,7 +221,7 @@ body: |
262221
; RPU-NEXT: 0 12
263222
; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
264223
; RPU-NEXT: 0 10
265-
; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
224+
; RPU-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
266225
; RPU-NEXT: 0 10
267226
; RPU-NEXT: 0 10 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
268227
; RPU-NEXT: 0 9
@@ -550,7 +509,7 @@ body: |
550509
; RPU-NEXT: 0 0
551510
; RPU-NEXT: 0 0 $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0
552511
; RPU-NEXT: 0 0
553-
; RPU-NEXT: 0 0 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
512+
; RPU-NEXT: 0 1 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
554513
; RPU-NEXT: 0 0
555514
; RPU-NEXT: 0 0 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc
556515
; RPU-NEXT: 0 0
@@ -569,7 +528,7 @@ body: |
569528
; RPU-NEXT: 0 1
570529
; RPU-NEXT: 0 1 $m0 = S_MOV_B32 killed $sgpr0
571530
; RPU-NEXT: 0 1
572-
; RPU-NEXT: 0 1 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
531+
; RPU-NEXT: 0 16 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
573532
; RPU-NEXT: 0 1
574533
; RPU-NEXT: Live-out: %0:0000000000000C00
575534
; RPU-NEXT: Live-thr:
@@ -673,14 +632,12 @@ registers:
673632
body: |
674633
; RPU-LABEL: name: early_clobber_def_used_on_rhs
675634
; RPU: bb.0:
676-
; RPU-NEXT: Live-in: %0:0000000000000003
677-
; RPU-NEXT: mis LIS:
678-
; RPU-NEXT: %0:L0000000000000003 isn't found in LIS reported set
635+
; RPU-NEXT: Live-in:
679636
; RPU-NEXT: SGPR VGPR
680-
; RPU-NEXT: 0 1
637+
; RPU-NEXT: 0 0
681638
; RPU-NEXT: 0 1 dead %3:vgpr_32 = COPY $vgpr0
682-
; RPU-NEXT: 0 1
683-
; RPU-NEXT: 0 2 early-clobber %2:vgpr_32 = COPY %0:vgpr_32
639+
; RPU-NEXT: 0 0
640+
; RPU-NEXT: 0 1 early-clobber %2:vgpr_32 = COPY %0:vgpr_32
684641
; RPU-NEXT: 0 1
685642
; RPU-NEXT: 0 1 S_NOP 0, implicit %2:vgpr_32
686643
; RPU-NEXT: 0 0
@@ -691,9 +648,9 @@ body: |
691648
; RPU-NEXT: Live-in:
692649
; RPU-NEXT: SGPR VGPR
693650
; RPU-NEXT: 0 0
694-
; RPU-NEXT: 0 0 dead %1:vgpr_32 = COPY $vgpr0
651+
; RPU-NEXT: 0 1 dead %1:vgpr_32 = COPY $vgpr0
695652
; RPU-NEXT: 0 0
696-
; RPU-NEXT: 0 0 dead %0:vgpr_32 = COPY $vgpr0
653+
; RPU-NEXT: 0 1 dead %0:vgpr_32 = COPY $vgpr0
697654
; RPU-NEXT: 0 0
698655
; RPU-NEXT: Live-out:
699656
; RPU-NEXT: Live-thr:

0 commit comments

Comments
 (0)