Skip to content

Commit b71cad1

Browse files
authored
JIT: Clean up and optimize StackLevelSetter (#112561)
The stack levels computed are not actually used for anything outside x86, so avoid computing them in that case. Also, the max stack level does not actually get used by the backend, even for x86, so no need to save it in `Compiler`.
1 parent 93ebf52 commit b71cad1

File tree

11 files changed

+87
-81
lines changed

11 files changed

+87
-81
lines changed

src/coreclr/jit/block.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,8 +1432,8 @@ bool BasicBlock::endsWithJmpMethod(Compiler* comp) const
14321432
//
14331433
bool BasicBlock::endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly /*=false*/) const
14341434
{
1435-
GenTree* tailCall = nullptr;
1436-
bool tailCallsConvertibleToLoopOnly = false;
1435+
GenTreeCall* tailCall = nullptr;
1436+
bool tailCallsConvertibleToLoopOnly = false;
14371437
return endsWithJmpMethod(comp) ||
14381438
endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, &tailCall);
14391439
}
@@ -1454,10 +1454,10 @@ bool BasicBlock::endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly /*
14541454
// Notes:
14551455
// At most one of fastTailCallsOnly and tailCallsConvertibleToLoopOnly flags can be true.
14561456
//
1457-
bool BasicBlock::endsWithTailCall(Compiler* comp,
1458-
bool fastTailCallsOnly,
1459-
bool tailCallsConvertibleToLoopOnly,
1460-
GenTree** tailCall) const
1457+
bool BasicBlock::endsWithTailCall(Compiler* comp,
1458+
bool fastTailCallsOnly,
1459+
bool tailCallsConvertibleToLoopOnly,
1460+
GenTreeCall** tailCall) const
14611461
{
14621462
assert(!fastTailCallsOnly || !tailCallsConvertibleToLoopOnly);
14631463
*tailCall = nullptr;
@@ -1524,7 +1524,7 @@ bool BasicBlock::endsWithTailCall(Compiler* comp,
15241524
// Return Value:
15251525
// true if the block ends with a tail call convertible to loop.
15261526
//
1527-
bool BasicBlock::endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall) const
1527+
bool BasicBlock::endsWithTailCallConvertibleToLoop(Compiler* comp, GenTreeCall** tailCall) const
15281528
{
15291529
bool fastTailCallsOnly = false;
15301530
bool tailCallsConvertibleToLoopOnly = true;

src/coreclr/jit/block.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1766,14 +1766,14 @@ struct BasicBlock : private LIR::Range
17661766

17671767
bool endsWithJmpMethod(Compiler* comp) const;
17681768

1769-
bool endsWithTailCall(Compiler* comp,
1770-
bool fastTailCallsOnly,
1771-
bool tailCallsConvertibleToLoopOnly,
1772-
GenTree** tailCall) const;
1769+
bool endsWithTailCall(Compiler* comp,
1770+
bool fastTailCallsOnly,
1771+
bool tailCallsConvertibleToLoopOnly,
1772+
GenTreeCall** tailCall) const;
17731773

17741774
bool endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly = false) const;
17751775

1776-
bool endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall) const;
1776+
bool endsWithTailCallConvertibleToLoop(Compiler* comp, GenTreeCall** tailCall) const;
17771777

17781778
// Returns the first statement in the statement list of "this" that is
17791779
// not an SSA definition (a lcl = phi(...) store).

src/coreclr/jit/codegenxarch.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9607,9 +9607,6 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
96079607
0, // argSize. Again, we have to lie about it
96089608
EA_UNKNOWN); // retSize
96099609

9610-
// Check that we have place for the push.
9611-
assert(compiler->fgGetPtrArgCntMax() >= 1);
9612-
96139610
#if defined(UNIX_X86_ABI)
96149611
// Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
96159612
GetEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);
@@ -9688,9 +9685,6 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper)
96889685
#endif
96899686
genEmitHelperCall(helper, argSize, EA_UNKNOWN /* retSize */);
96909687

9691-
// Check that we have place for the push.
9692-
assert(compiler->fgGetPtrArgCntMax() >= 1);
9693-
96949688
#if defined(UNIX_X86_ABI)
96959689
// Restoring alignment manually. This is similar to CodeGen::genRemoveAlignmentAfterCall
96969690
GetEmitter()->emitIns_R_I(INS_add, EA_4BYTE, REG_SPBASE, 0x10);

src/coreclr/jit/compiler.h

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6703,33 +6703,6 @@ class Compiler
67036703

67046704
//------------------------- Morphing --------------------------------------
67056705

6706-
unsigned fgPtrArgCntMax = 0;
6707-
6708-
public:
6709-
//------------------------------------------------------------------------
6710-
// fgGetPtrArgCntMax: Return the maximum number of pointer-sized stack arguments that calls inside this method
6711-
// can push on the stack. This value is calculated during morph.
6712-
//
6713-
// Return Value:
6714-
// Returns fgPtrArgCntMax, that is a private field.
6715-
//
6716-
unsigned fgGetPtrArgCntMax() const
6717-
{
6718-
return fgPtrArgCntMax;
6719-
}
6720-
6721-
//------------------------------------------------------------------------
6722-
// fgSetPtrArgCntMax: Set the maximum number of pointer-sized stack arguments that calls inside this method
6723-
// can push on the stack. This function is used during StackLevelSetter to fix incorrect morph calculations.
6724-
//
6725-
void fgSetPtrArgCntMax(unsigned argCntMax)
6726-
{
6727-
fgPtrArgCntMax = argCntMax;
6728-
}
6729-
6730-
bool compCanEncodePtrArgCntMax();
6731-
6732-
private:
67336706
hashBv* fgAvailableOutgoingArgTemps;
67346707
ArrayStack<unsigned>* fgUsedSharedTemps = nullptr;
67356708

src/coreclr/jit/compiler.hpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3024,20 +3024,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
30243024
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
30253025
*/
30263026

3027-
inline bool Compiler::compCanEncodePtrArgCntMax()
3028-
{
3029-
#ifdef JIT32_GCENCODER
3030-
// DDB 204533:
3031-
// The GC encoding for fully interruptible methods does not
3032-
// support more than 1023 pushed arguments, so we have to
3033-
// use a partially interruptible GC info/encoding.
3034-
//
3035-
return (fgPtrArgCntMax < MAX_PTRARG_OFS);
3036-
#else // JIT32_GCENCODER
3037-
return true;
3038-
#endif
3039-
}
3040-
30413027
/*****************************************************************************
30423028
*
30433029
* Call the given function pointer for all nodes in the tree. The 'visitor'

src/coreclr/jit/flowgraph.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4022,7 +4022,7 @@ PhaseStatus Compiler::fgSetBlockOrder()
40224022
BasicBlock::s_nMaxTrees = 0;
40234023
#endif
40244024

4025-
if (compCanEncodePtrArgCntMax() && fgHasCycleWithoutGCSafePoint())
4025+
if (fgHasCycleWithoutGCSafePoint())
40264026
{
40274027
JITDUMP("Marking method as fully interruptible\n");
40284028
SetInterruptible(true);

src/coreclr/jit/lower.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10929,5 +10929,38 @@ void Lowering::FinalizeOutgoingArgSpace()
1092910929
comp->lvaOutgoingArgSpaceSize = m_outgoingArgSpaceSize;
1093010930
comp->lvaOutgoingArgSpaceSize.MarkAsReadOnly();
1093110931
comp->lvaGetDesc(comp->lvaOutgoingArgSpaceVar)->GrowBlockLayout(comp->typGetBlkLayout(m_outgoingArgSpaceSize));
10932+
10933+
SetFramePointerFromArgSpaceSize();
1093210934
#endif
1093310935
}
10936+
10937+
//----------------------------------------------------------------------------------------------
10938+
// Lowering::SetFramePointerFromArgSpaceSize:
10939+
// Set the frame pointer from the arg space size. This is a quirk because
10940+
// StackLevelSetter used to do this even outside x86.
10941+
//
10942+
void Lowering::SetFramePointerFromArgSpaceSize()
10943+
{
10944+
unsigned stackLevelSpace = m_outgoingArgSpaceSize;
10945+
10946+
if (comp->compTailCallUsed)
10947+
{
10948+
// StackLevelSetter also used to count tailcalls.
10949+
for (BasicBlock* block : comp->Blocks())
10950+
{
10951+
GenTreeCall* tailCall;
10952+
if (block->endsWithTailCall(comp, true, false, &tailCall))
10953+
{
10954+
stackLevelSpace = max(stackLevelSpace, tailCall->gtArgs.OutgoingArgsStackSize());
10955+
}
10956+
}
10957+
}
10958+
10959+
unsigned stackLevel =
10960+
(max(stackLevelSpace, (unsigned)MIN_ARG_AREA_FOR_CALL) - MIN_ARG_AREA_FOR_CALL) / TARGET_POINTER_SIZE;
10961+
10962+
if (stackLevel >= 4)
10963+
{
10964+
comp->codeGen->setFramePointerRequired(true);
10965+
}
10966+
}

src/coreclr/jit/lower.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class Lowering final : public Phase
4545
}
4646

4747
void FinalizeOutgoingArgSpace();
48+
void SetFramePointerFromArgSpaceSize();
4849

4950
private:
5051
// LowerRange handles new code that is introduced by or after Lowering.

src/coreclr/jit/morph.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12796,10 +12796,10 @@ void Compiler::fgMorphStmts(BasicBlock* block)
1279612796
}
1279712797

1279812798
#if FEATURE_FASTTAILCALL
12799-
GenTree* recursiveTailCall = nullptr;
12799+
GenTreeCall* recursiveTailCall = nullptr;
1280012800
if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
1280112801
{
12802-
fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
12802+
fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall);
1280312803
}
1280412804
#endif
1280512805

src/coreclr/jit/stacklevelsetter.cpp

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,28 +24,15 @@ StackLevelSetter::StackLevelSetter(Compiler* compiler)
2424
}
2525

2626
//------------------------------------------------------------------------
27-
// DoPhase: Calculate stack slots numbers for outgoing args.
27+
// DoPhase: Calculate stack slots numbers for outgoing args and compute
28+
// requirements of throw helper blocks.
2829
//
2930
// Returns:
3031
// PhaseStatus indicating what, if anything, was changed.
3132
//
32-
// Notes:
33-
// For non-x86 platforms it calculates the max number of slots
34-
// that calls inside this method can push on the stack.
35-
// This value is used for sanity checks in the emitter.
36-
//
37-
// Stack slots are pointer-sized: 4 bytes for 32-bit platforms, 8 bytes for 64-bit platforms.
38-
//
39-
// For x86 it also sets throw-helper blocks incoming stack depth and set
40-
// framePointerRequired when it is necessary. These values are used to pop
41-
// pushed args when an exception occurs.
42-
//
4333
PhaseStatus StackLevelSetter::DoPhase()
4434
{
45-
for (BasicBlock* const block : comp->Blocks())
46-
{
47-
ProcessBlock(block);
48-
}
35+
ProcessBlocks();
4936

5037
#if !FEATURE_FIXED_OUT_ARGS
5138
if (framePointerRequired)
@@ -56,7 +43,6 @@ PhaseStatus StackLevelSetter::DoPhase()
5643

5744
CheckAdditionalArgs();
5845

59-
comp->fgSetPtrArgCntMax(maxStackLevel);
6046
CheckArgCnt();
6147

6248
// When optimizing, check if there are any unused throw helper blocks,
@@ -109,7 +95,28 @@ PhaseStatus StackLevelSetter::DoPhase()
10995
}
11096

11197
//------------------------------------------------------------------------
112-
// ProcessBlock: Do stack level calculations for one block.
98+
// ProcessBlocks: Process all the blocks if necessary.
99+
//
100+
void StackLevelSetter::ProcessBlocks()
101+
{
102+
#ifndef TARGET_X86
103+
// Outside x86 we do not need to compute pushed/popped stack slots.
104+
// However, we do optimize throw-helpers and need to process the blocks for
105+
// that, but only when optimizing.
106+
if (!throwHelperBlocksUsed || comp->opts.OptimizationDisabled())
107+
{
108+
return;
109+
}
110+
#endif
111+
112+
for (BasicBlock* const block : comp->Blocks())
113+
{
114+
ProcessBlock(block);
115+
}
116+
}
117+
118+
//------------------------------------------------------------------------
119+
// ProcessBlock: Do stack level and throw helper determinations for one block.
113120
//
114121
// Notes:
115122
// Block starts and ends with an empty outgoing stack.
@@ -125,10 +132,13 @@ PhaseStatus StackLevelSetter::DoPhase()
125132
void StackLevelSetter::ProcessBlock(BasicBlock* block)
126133
{
127134
assert(currentStackLevel == 0);
135+
128136
LIR::ReadOnlyRange& range = LIR::AsRange(block);
129137
for (auto i = range.rbegin(); i != range.rend(); ++i)
130138
{
131139
GenTree* node = *i;
140+
141+
#ifdef TARGET_X86
132142
if (node->OperIsPutArgStkOrSplit())
133143
{
134144
GenTreePutArgStk* putArg = node->AsPutArgStk();
@@ -145,6 +155,7 @@ void StackLevelSetter::ProcessBlock(BasicBlock* block)
145155
call->gtArgs.SetStkSizeBytes(usedStackSlotsCount * TARGET_POINTER_SIZE);
146156
#endif // UNIX_X86_ABI
147157
}
158+
#endif
148159

149160
if (!throwHelperBlocksUsed)
150161
{
@@ -418,7 +429,12 @@ void StackLevelSetter::SubStackLevel(unsigned value)
418429
//
419430
void StackLevelSetter::CheckArgCnt()
420431
{
421-
if (!comp->compCanEncodePtrArgCntMax())
432+
#ifdef JIT32_GCENCODER
433+
// The GC encoding for fully interruptible methods does not
434+
// support more than 1023 pushed arguments, so we have to
435+
// use a partially interruptible GC info/encoding.
436+
//
437+
if (maxStackLevel >= MAX_PTRARG_OFS)
422438
{
423439
#ifdef DEBUG
424440
if (comp->verbose)
@@ -429,6 +445,7 @@ void StackLevelSetter::CheckArgCnt()
429445
#endif
430446
comp->SetInterruptible(false);
431447
}
448+
432449
if (maxStackLevel >= sizeof(unsigned))
433450
{
434451
#ifdef DEBUG
@@ -439,6 +456,7 @@ void StackLevelSetter::CheckArgCnt()
439456
#endif
440457
comp->codeGen->setFramePointerRequired(true);
441458
}
459+
#endif
442460
}
443461

444462
//------------------------------------------------------------------------

src/coreclr/jit/stacklevelsetter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class StackLevelSetter final : public Phase
1414
virtual PhaseStatus DoPhase() override;
1515

1616
private:
17+
void ProcessBlocks();
1718
void ProcessBlock(BasicBlock* block);
1819

1920
void SetThrowHelperBlocks(GenTree* node, BasicBlock* block);

0 commit comments

Comments
 (0)