diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h index e1c5717f5face..e9ca9e36c95b9 100644 --- a/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/llvm/include/llvm/CodeGen/LiveInterval.h @@ -237,9 +237,9 @@ namespace llvm { } /// Constructs a new LiveRange object. - LiveRange(bool UseSegmentSet = false) - : segmentSet(UseSegmentSet ? std::make_unique() - : nullptr) {} + explicit LiveRange(bool UseSegmentSet = false) + : segmentSet(UseSegmentSet ? std::make_unique() : nullptr) { + } /// Constructs a new LiveRange object by copying segments and valnos from /// another LiveRange. diff --git a/llvm/include/llvm/CodeGen/LiveStacks.h b/llvm/include/llvm/CodeGen/LiveStacks.h index 02c640bfc4a93..3b4550901dc7e 100644 --- a/llvm/include/llvm/CodeGen/LiveStacks.h +++ b/llvm/include/llvm/CodeGen/LiveStacks.h @@ -40,49 +40,43 @@ class LiveStacks { /// VNInfo::Allocator VNInfoAllocator; - /// S2IMap - Stack slot indices to live interval mapping. - using SS2IntervalMap = std::unordered_map; - SS2IntervalMap S2IMap; - - /// S2RCMap - Stack slot indices to register class mapping. - std::map S2RCMap; + int StartIdx = -1; + SmallVector S2LI; + SmallVector S2RC; public: - using iterator = SS2IntervalMap::iterator; - using const_iterator = SS2IntervalMap::const_iterator; + using iterator = SmallVector::iterator; + using const_iterator = SmallVector::const_iterator; - const_iterator begin() const { return S2IMap.begin(); } - const_iterator end() const { return S2IMap.end(); } - iterator begin() { return S2IMap.begin(); } - iterator end() { return S2IMap.end(); } + const_iterator begin() const { return S2LI.begin(); } + const_iterator end() const { return S2LI.end(); } + iterator begin() { return S2LI.begin(); } + iterator end() { return S2LI.end(); } - unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); } + unsigned getStartIdx() const { return StartIdx; } + unsigned getNumIntervals() const { return (unsigned)S2LI.size(); } LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC); LiveInterval &getInterval(int Slot) { assert(Slot >= 0 && "Spill slot indice must be >= 0"); - SS2IntervalMap::iterator I = S2IMap.find(Slot); - assert(I != S2IMap.end() && "Interval does not exist for stack slot"); - return I->second; + return *S2LI[Slot - StartIdx]; } const LiveInterval &getInterval(int Slot) const { assert(Slot >= 0 && "Spill slot indice must be >= 0"); - SS2IntervalMap::const_iterator I = S2IMap.find(Slot); - assert(I != S2IMap.end() && "Interval does not exist for stack slot"); - return I->second; + return *S2LI[Slot - StartIdx]; } - bool hasInterval(int Slot) const { return S2IMap.count(Slot); } + bool hasInterval(int Slot) const { + if (Slot < StartIdx || StartIdx == -1) + return false; + return !getInterval(Slot).empty(); + } const TargetRegisterClass *getIntervalRegClass(int Slot) const { assert(Slot >= 0 && "Spill slot indice must be >= 0"); - std::map::const_iterator I = - S2RCMap.find(Slot); - assert(I != S2RCMap.end() && - "Register class info does not exist for stack slot"); - return I->second; + return S2RC[Slot - StartIdx]; } VNInfo::Allocator &getVNInfoAllocator() { return VNInfoAllocator; } diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 403e5eda949f1..5c05b792cd1e0 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -120,12 +120,18 @@ class MachineFrameInfo { ///< triggered protection. 3rd closest to the protector. }; + static constexpr int NoUnderlyingSlot = std::numeric_limits::min(); + static constexpr int IsUnderlyingSlot = std::numeric_limits::min() + 1; + private: // Represent a single object allocated on the stack. struct StackObject { // The offset of this object from the stack pointer on entry to // the function. This field has no meaning for a variable sized element. - int64_t SPOffset; + // After getting placed this is relative to SP + // If UnderlyingSlot is not NoUnderlyingSlot, this is relative to the start + // of the UnderlyingSlot + int64_t Offset; // The size of this object on the stack. 0 means a variable sized object, // ~0ULL means a dead object. @@ -134,6 +140,10 @@ class MachineFrameInfo { // The required alignment of this stack slot. Align Alignment; + // If not NoUnderlyingSlot, it Indicate that this slot should be placed + // at Offset, into the slot UnderlyingSlot + int UnderlyingSlot = NoUnderlyingSlot; + // If true, the value of the stack object is set before // entering the function and is not modified inside the function. By // default, fixed objects are immutable unless marked otherwise. @@ -183,10 +193,10 @@ class MachineFrameInfo { uint8_t SSPLayout = SSPLK_None; - StackObject(uint64_t Size, Align Alignment, int64_t SPOffset, + StackObject(uint64_t Size, Align Alignment, int64_t Offset, bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca, bool IsAliased, uint8_t StackID = 0) - : SPOffset(SPOffset), Size(Size), Alignment(Alignment), + : Offset(Offset), Size(Size), Alignment(Alignment), isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), StackID(StackID), Alloca(Alloca), isAliased(IsAliased) {} }; @@ -532,7 +542,7 @@ class MachineFrameInfo { "Invalid Object Idx!"); assert(!isDeadObjectIndex(ObjectIdx) && "Getting frame offset for a dead object?"); - return Objects[ObjectIdx+NumFixedObjects].SPOffset; + return Objects[ObjectIdx + NumFixedObjects].Offset; } bool isObjectZExt(int ObjectIdx) const { @@ -561,12 +571,12 @@ class MachineFrameInfo { /// Set the stack frame offset of the specified object. The /// offset is relative to the stack pointer on entry to the function. - void setObjectOffset(int ObjectIdx, int64_t SPOffset) { + void setObjectOffset(int ObjectIdx, int64_t Offset) { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); assert(!isDeadObjectIndex(ObjectIdx) && "Setting frame offset for a dead object?"); - Objects[ObjectIdx+NumFixedObjects].SPOffset = SPOffset; + Objects[ObjectIdx + NumFixedObjects].Offset = Offset; } SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const { @@ -762,6 +772,18 @@ class MachineFrameInfo { // If ID == 0, MaxAlignment will need to be updated separately. } + int getUnderlyingSlot(int ObjectIdx) const { + assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + return Objects[ObjectIdx + NumFixedObjects].UnderlyingSlot; + } + + void setUnderlyingSlot(int ObjectIdx, int Underlying) { + assert(unsigned(ObjectIdx + NumFixedObjects) < Objects.size() && + "Invalid Object Idx!"); + Objects[ObjectIdx + NumFixedObjects].UnderlyingSlot = Underlying; + } + /// Returns true if the specified index corresponds to a dead object. bool isDeadObjectIndex(int ObjectIdx) const { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 94d04b82666be..faf860c656af4 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1340,7 +1340,9 @@ class MachineInstr } // True if the instruction represents a position in the function. - bool isPosition() const { return isLabel() || isCFIInstruction(); } + bool isPosition() const { + return isLifetimeMarker() || isLabel() || isCFIInstruction(); + } bool isNonListDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h index 76a019ddf8f34..a76b5c0d44791 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/FormatUtil.h @@ -62,49 +62,6 @@ LLVM_ABI std::string formatChunkKind(codeview::DebugSubsectionKind Kind, LLVM_ABI std::string formatSymbolKind(codeview::SymbolKind K); LLVM_ABI std::string formatTypeLeafKind(codeview::TypeLeafKind K); -/// Returns the number of digits in the given integer. -inline int NumDigits(uint64_t N) { - if (N < 10ULL) - return 1; - if (N < 100ULL) - return 2; - if (N < 1000ULL) - return 3; - if (N < 10000ULL) - return 4; - if (N < 100000ULL) - return 5; - if (N < 1000000ULL) - return 6; - if (N < 10000000ULL) - return 7; - if (N < 100000000ULL) - return 8; - if (N < 1000000000ULL) - return 9; - if (N < 10000000000ULL) - return 10; - if (N < 100000000000ULL) - return 11; - if (N < 1000000000000ULL) - return 12; - if (N < 10000000000000ULL) - return 13; - if (N < 100000000000000ULL) - return 14; - if (N < 1000000000000000ULL) - return 15; - if (N < 10000000000000000ULL) - return 16; - if (N < 100000000000000000ULL) - return 17; - if (N < 1000000000000000000ULL) - return 18; - if (N < 10000000000000000000ULL) - return 19; - return 20; -} - namespace detail { template struct EndianAdapter final diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index ae3150e5602ee..a374f93d8538e 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -795,6 +795,49 @@ using stack_float_t = volatile float; using stack_float_t = float; #endif +/// Returns the number of digits in the given integer. +inline int NumDigits(uint64_t N) { + if (N < 10ULL) + return 1; + if (N < 100ULL) + return 2; + if (N < 1000ULL) + return 3; + if (N < 10000ULL) + return 4; + if (N < 100000ULL) + return 5; + if (N < 1000000ULL) + return 6; + if (N < 10000000ULL) + return 7; + if (N < 100000000ULL) + return 8; + if (N < 1000000000ULL) + return 9; + if (N < 10000000000ULL) + return 10; + if (N < 100000000000ULL) + return 11; + if (N < 1000000000000ULL) + return 12; + if (N < 10000000000000ULL) + return 13; + if (N < 100000000000000ULL) + return 14; + if (N < 1000000000000000ULL) + return 15; + if (N < 10000000000000000ULL) + return 16; + if (N < 100000000000000000ULL) + return 17; + if (N < 1000000000000000000ULL) + return 18; + if (N < 10000000000000000000ULL) + return 19; + return 20; +} + } // namespace llvm #endif diff --git a/llvm/lib/CodeGen/LiveStacks.cpp b/llvm/lib/CodeGen/LiveStacks.cpp index c07d985a09d1f..ea158b2d96a4e 100644 --- a/llvm/lib/CodeGen/LiveStacks.cpp +++ b/llvm/lib/CodeGen/LiveStacks.cpp @@ -37,10 +37,12 @@ void LiveStacksWrapperLegacy::getAnalysisUsage(AnalysisUsage &AU) const { } void LiveStacks::releaseMemory() { + for (int Idx = 0; Idx < (int)S2LI.size(); ++Idx) + S2LI[Idx]->~LiveInterval(); // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. VNInfoAllocator.Reset(); - S2IMap.clear(); - S2RCMap.clear(); + S2LI.clear(); + S2RC.clear(); } void LiveStacks::init(MachineFunction &MF) { @@ -52,20 +54,22 @@ void LiveStacks::init(MachineFunction &MF) { LiveInterval & LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { assert(Slot >= 0 && "Spill slot indice must be >= 0"); - SS2IntervalMap::iterator I = S2IMap.find(Slot); - if (I == S2IMap.end()) { - I = S2IMap - .emplace( - std::piecewise_construct, std::forward_as_tuple(Slot), - std::forward_as_tuple(Register::index2StackSlot(Slot), 0.0F)) - .first; - S2RCMap.insert(std::make_pair(Slot, RC)); + if (StartIdx == -1) + StartIdx = Slot; + + int Idx = Slot - StartIdx; + assert(Idx >= 0 && "Slot not in order ?"); + if (Idx < (int)S2LI.size()) { + S2RC[Idx] = TRI->getCommonSubClass(S2RC[Idx], RC); } else { - // Use the largest common subclass register class. - const TargetRegisterClass *&OldRC = S2RCMap[Slot]; - OldRC = TRI->getCommonSubClass(OldRC, RC); + S2RC.resize(Idx + 1); + S2LI.resize(Idx + 1); + S2LI[Idx] = this->VNInfoAllocator.Allocate(); + new (S2LI[Idx]) LiveInterval(Register::index2StackSlot(Slot), 0.0F); + S2RC[Idx] = RC; } - return I->second; + assert(S2RC.size() == S2LI.size()); + return *S2LI[Idx]; } AnalysisKey LiveStacksAnalysis::Key; @@ -96,13 +100,12 @@ void LiveStacksWrapperLegacy::print(raw_ostream &OS, const Module *) const { } /// print - Implement the dump method. -void LiveStacks::print(raw_ostream &OS, const Module*) const { +void LiveStacks::print(raw_ostream &OS, const Module *) const { OS << "********** INTERVALS **********\n"; - for (const_iterator I = begin(), E = end(); I != E; ++I) { - I->second.print(OS); - int Slot = I->first; - const TargetRegisterClass *RC = getIntervalRegClass(Slot); + for (int Idx = 0; Idx < (int)S2LI.size(); ++Idx) { + S2LI[Idx]->print(OS); + const TargetRegisterClass *RC = S2RC[Idx]; if (RC) OS << " [" << TRI->getRegClassName(RC) << "]\n"; else diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp index e4b993850f73d..e3d1761ef894a 100644 --- a/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include @@ -80,7 +81,7 @@ int MachineFrameInfo::CreateVariableSizedObject(Align Alignment, return (int)Objects.size()-NumFixedObjects-1; } -int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, +int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t Offset, bool IsImmutable, bool IsAliased) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); // The alignment of the frame index can be determined from its offset from @@ -90,23 +91,22 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // stack needs realignment, we can't assume that the stack will in fact be // aligned. Align Alignment = - commonAlignment(ForcedRealign ? Align(1) : StackAlignment, SPOffset); + commonAlignment(ForcedRealign ? Align(1) : StackAlignment, Offset); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), - StackObject(Size, Alignment, SPOffset, IsImmutable, + StackObject(Size, Alignment, Offset, IsImmutable, /*IsSpillSlot=*/false, /*Alloca=*/nullptr, IsAliased)); return -++NumFixedObjects; } -int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, - int64_t SPOffset, +int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t Offset, bool IsImmutable) { Align Alignment = - commonAlignment(ForcedRealign ? Align(1) : StackAlignment, SPOffset); + commonAlignment(ForcedRealign ? Align(1) : StackAlignment, Offset); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), - StackObject(Size, Alignment, SPOffset, IsImmutable, + StackObject(Size, Alignment, Offset, IsImmutable, /*IsSpillSlot=*/true, /*Alloca=*/nullptr, /*IsAliased=*/false)); return -++NumFixedObjects; @@ -221,6 +221,12 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (SO.StackID != 0) OS << "id=" << static_cast(SO.StackID) << ' '; + if (SO.Alloca && !SO.Alloca->getName().empty()) + OS << "alloca=" << SO.Alloca->getName() << ' '; + + if (SO.isSpillSlot) + OS << "spill "; + if (SO.Size == ~0ULL) { OS << "dead\n"; continue; @@ -233,8 +239,13 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (i < NumFixedObjects) OS << ", fixed"; - if (i < NumFixedObjects || SO.SPOffset != -1) { - int64_t Off = SO.SPOffset - ValOffset; + if (SO.UnderlyingSlot == MachineFrameInfo::IsUnderlyingSlot) + OS << ", underlying"; + if (SO.UnderlyingSlot > MachineFrameInfo::IsUnderlyingSlot) { + OS << ", placed=" << "fi#" << (int)(SO.UnderlyingSlot - NumFixedObjects) + << "+" << SO.Offset; + } else if (i < NumFixedObjects || SO.Offset != -1) { + int64_t Off = SO.Offset - ValOffset; OS << ", at location [SP"; if (Off > 0) OS << "+" << Off; diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index da3665b3b6a0b..3e5fd59534105 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1417,10 +1417,6 @@ bool MachineInstr::isDead(const MachineRegisterInfo &MRI, if (isInlineAsm()) return false; - // FIXME: See issue #105950 for why LIFETIME markers are considered dead here. - if (isLifetimeMarker()) - return true; - // If there are no defs with uses, then we call the instruction dead so long // as we do not suspect it may have sideeffects. return wouldBeTriviallyDead(); diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index f66f54682c84c..7a44b3937a63b 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -694,6 +694,13 @@ void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) { } } +static inline void UpdateOffset(MachineFrameInfo &MFI, int FrameIdx, + int64_t Offset) { + LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset + << "]\n"); + MFI.setObjectOffset(FrameIdx, Offset); // Set the computed offset +} + /// AdjustStackOffset - Helper function used to adjust the stack frame offset. static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, @@ -712,13 +719,9 @@ static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, Offset = alignTo(Offset, Alignment); if (StackGrowsDown) { - LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset - << "]\n"); - MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset + UpdateOffset(MFI, FrameIdx, -Offset); } else { - LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset - << "]\n"); - MFI.setObjectOffset(FrameIdx, Offset); + UpdateOffset(MFI, FrameIdx, Offset); Offset += MFI.getObjectSize(FrameIdx); } } @@ -1044,6 +1047,7 @@ void PEIImpl::calculateFrameObjectOffsets(MachineFunction &MF) { } SmallVector ObjectsToAllocate; + SmallVector UpdateOffsetAfterAllocate; // Then prepare to assign frame offsets to stack objects that are not used to // spill callee saved registers. @@ -1064,6 +1068,11 @@ void PEIImpl::calculateFrameObjectOffsets(MachineFunction &MF) { if (MFI.getStackID(i) != TargetStackID::Default) continue; + if (MFI.getUnderlyingSlot(i) > MachineFrameInfo::IsUnderlyingSlot) { + UpdateOffsetAfterAllocate.push_back(i); + continue; + } + // Add the objects that we need to allocate to our working set. ObjectsToAllocate.push_back(i); } @@ -1104,6 +1113,14 @@ void PEIImpl::calculateFrameObjectOffsets(MachineFunction &MF) { AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } + for (int FrameIdx : UpdateOffsetAfterAllocate) { + int UnderlyingSlot = MFI.getUnderlyingSlot(FrameIdx); + int64_t ObjOffset = + MFI.getObjectOffset(UnderlyingSlot) + MFI.getObjectOffset(FrameIdx); + UpdateOffset(MFI, FrameIdx, ObjOffset); + MFI.setUnderlyingSlot(FrameIdx, MachineFrameInfo::NoUnderlyingSlot); + } + if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp index 0f93822d9792b..798eef9354256 100644 --- a/llvm/lib/CodeGen/StackColoring.cpp +++ b/llvm/lib/CodeGen/StackColoring.cpp @@ -10,13 +10,7 @@ // lifetime markers machine instructions (LIFETIME_START and LIFETIME_END), // which represent the possible lifetime of stack slots. It attempts to // merge disjoint stack slots and reduce the used stack space. -// NOTE: This pass is not StackSlotColoring, which optimizes spill slots. -// -// TODO: In the future we plan to improve stack coloring in the following ways: -// 1. Allow merging multiple small slots into a single larger slot at different -// offsets. -// 2. Merge this pass with StackSlotColoring and allow merging of allocas with -// spill slots. +// NOTE: This pass is not StackSlotColoring, which optimizes only spill slots. // //===----------------------------------------------------------------------===// @@ -29,6 +23,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -40,6 +35,7 @@ #include "llvm/CodeGen/PseudoSourceValueManager.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" @@ -54,6 +50,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -65,10 +62,12 @@ using namespace llvm; #define DEBUG_TYPE "stack-coloring" -static cl::opt -DisableColoring("no-stack-coloring", - cl::init(false), cl::Hidden, - cl::desc("Disable stack coloring")); +DEBUG_COUNTER(ProcessSlot, DEBUG_TYPE "-slot", + "Controls which slot get processed"); + +static cl::opt DisableColoring("no-stack-coloring", cl::init(false), + cl::Hidden, + cl::desc("Disable stack coloring")); /// The user may write code that uses allocas outside of the declared lifetime /// zone. This can happen when the user returns a reference to a local @@ -76,22 +75,31 @@ DisableColoring("no-stack-coloring", /// code. If this flag is enabled, we try to save the user. This option /// is treated as overriding LifetimeStartOnFirstUse below. static cl::opt -ProtectFromEscapedAllocas("protect-from-escaped-allocas", - cl::init(false), cl::Hidden, - cl::desc("Do not optimize lifetime zones that " - "are broken")); + ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), + cl::Hidden, + cl::desc("Do not optimize lifetime zones that " + "are broken")); /// Enable enhanced dataflow scheme for lifetime analysis (treat first /// use of stack slot as start of slot lifetime, as opposed to looking /// for LIFETIME_START marker). See "Implementation notes" below for /// more info. static cl::opt -LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", - cl::init(true), cl::Hidden, - cl::desc("Treat stack lifetimes as starting on first use, not on START marker.")); + LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", + cl::init(true), cl::Hidden, + cl::desc("Treat stack lifetimes as starting on " + "first use, not on START marker.")); +static cl::opt UseNewStackColoring( + "new-stack-coloring", cl::init(false), cl::Hidden, + cl::desc("Use a better logic to try to reduce stack usage")); -STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); +static cl::opt MaxCandidatesOpt( + "stackcoloring-max-candidates", cl::init(0), cl::Hidden, + cl::desc( + "Max number of candidates that will be evaluated, 0 means no limit")); + +STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); @@ -375,12 +383,47 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); namespace { +constexpr unsigned InvalidIdx = -1; + /// StackColoring - A machine pass for merging disjoint stack allocations, /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. class StackColoring { MachineFrameInfo *MFI = nullptr; MachineFunction *MF = nullptr; + LiveStacks *LS = nullptr; + + struct SlotInfo { + // All places in the current function where this Slot is live + BitVector Liveness; + + // Use to make overlap queries faster + SmallVector StartLiveness; + + int64_t SlotPriority = 0; + + unsigned UseCount = 0; + + unsigned Offset = InvalidIdx; + + unsigned Size = 0; + + Align Align; + + bool hasOverlap(SlotInfo &Other) { + // NOTE: This is not just a faster way to say + // return Liveness.anyCommon(Other.Liveness); + // This also allows merging slots that have overlapping lifetimes but + // cannot be live simultaneously + return any_of(StartLiveness, + [&](int Idx) { return Other.Liveness[Idx]; }) || + any_of(Other.StartLiveness, + [&](int Idx) { return Liveness[Idx]; }); + } + + LLVM_DUMP_METHOD void dump(const StackColoring *State = nullptr) const; + }; + /// A class representing liveness information for a single basic block. /// Each bit in the BitVector represents the liveness property /// for a different stack slot. @@ -396,21 +439,21 @@ class StackColoring { /// Which slots are marked as LIVE_OUT, coming out of each basic block. BitVector LiveOut; - }; - /// Maps active slots (per bit) for each basic block. - using LivenessMap = DenseMap; - LivenessMap BlockLiveness; + bool isEmpty() { return Begin.empty(); } + }; - /// Maps serial numbers to basic blocks. - DenseMap BasicBlocks; + SmallVector BlockLiveness; /// Maps basic blocks to a serial number. SmallVector BasicBlockNumbering; + unsigned LivenessSize; + SmallVector Slot2Info; + /// Maps slots to their use interval. Outside of this interval, slots /// values are either dead or `undef` and they will not be written to. - SmallVector, 16> Intervals; + SmallVector, 16> Intervals; /// Maps slots to the points where they can become in-use. SmallVector, 16> LiveStarts; @@ -423,7 +466,7 @@ class StackColoring { /// The list of lifetime markers found. These markers are to be removed /// once the coloring is done. - SmallVector Markers; + SmallVector Markers; /// Record the FI slots for which we have seen some sort of /// lifetime marker (either start or end). @@ -437,18 +480,15 @@ class StackColoring { unsigned NumIterations; public: - StackColoring(SlotIndexes *Indexes) : Indexes(Indexes) {} + StackColoring(SlotIndexes *Indexes, LiveStacks *LS) + : LS(LS), Indexes(Indexes) {} bool run(MachineFunction &Func); private: - /// Used in collectMarkers - using BlockBitVecMap = DenseMap; - /// Debug. void dump() const; void dumpIntervals() const; void dumpBB(MachineBasicBlock *MBB) const; - void dumpBV(const char *tag, const BitVector &BV) const; /// Removes all of the lifetime marker instructions from the function. /// \returns true if any markers were removed. @@ -465,8 +505,11 @@ class StackColoring { /// in and out blocks. void calculateLocalLiveness(); + unsigned doMerging(unsigned NumSlots); + /// Returns TRUE if we're using the first-use-begins-lifetime method for - /// this slot (if FALSE, then the start marker is treated as start of lifetime). + /// this slot (if FALSE, then the start marker is treated as start of + /// lifetime). bool applyFirstUse(int Slot) { if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas) return false; @@ -480,8 +523,7 @@ class StackColoring { /// starting or ending are added to the vector "slots" and "isStart" is set /// accordingly. /// \returns True if inst contains a lifetime start or end - bool isLifetimeStartOrEnd(const MachineInstr &MI, - SmallVector &slots, + bool isLifetimeStartOrEnd(const MachineInstr &MI, SmallVector &slots, bool &isStart); /// Construct the LiveIntervals for the slots. @@ -528,22 +570,52 @@ INITIALIZE_PASS_END(StackColoringLegacy, DEBUG_TYPE, void StackColoringLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addUsedIfAvailable(); MachineFunctionPass::getAnalysisUsage(AU); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag, - const BitVector &BV) const { - dbgs() << tag << " : { "; - for (unsigned I = 0, E = BV.size(); I != E; ++I) - dbgs() << BV.test(I) << " "; - dbgs() << "}\n"; + +LLVM_DUMP_METHOD void dumpBV(StringRef tag, const BitVector &BV) { + if (BV.size() == 0) { + dbgs() << tag << " : EMPTY\n"; + return; + } + constexpr unsigned ColumnWidth = 150; + unsigned LineStartOffset = tag.size() + /*" : "*/ 3; + unsigned WidthAfterTag = ColumnWidth - LineStartOffset; + unsigned NumBitsPerColumn = WidthAfterTag / 2; + unsigned BitsCount = BV.size(); + for (unsigned Bits = 0; Bits < BitsCount; Bits += NumBitsPerColumn) { + unsigned Start = Bits; + unsigned End = std::min(Start + NumBitsPerColumn, BitsCount); + + dbgs() << tag << " : "; + + for (unsigned I = Start; I < End; ++I) + dbgs() << BV.test(I) << " "; + dbgs() << '\n'; + dbgs() << tag << " : "; + unsigned next = Start; + for (unsigned I = Start; I < End; ++I) { + if (I < next) + continue; + if (BV.test(I)) { + int numDigits = NumDigits(I); + // Make sure number have spacing while staying aligned to the line above + next = I + 1 + numDigits / 2; + dbgs() << I << ' '; + if (numDigits % 2 == 0) + dbgs() << ' '; + } else + dbgs() << " "; + } + dbgs() << '\n'; + } } LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const { - LivenessMap::const_iterator BI = BlockLiveness.find(MBB); - assert(BI != BlockLiveness.end() && "Block not found"); - const BlockLifetimeInfo &BlockInfo = BI->second; + const BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB->getNumber()]; dumpBV("BEGIN", BlockInfo.Begin); dumpBV("END", BlockInfo.End); @@ -553,22 +625,61 @@ LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const { LLVM_DUMP_METHOD void StackColoring::dump() const { for (MachineBasicBlock *MBB : depth_first(MF)) { - dbgs() << "Inspecting block #" << MBB->getNumber() << " [" - << MBB->getName() << "]\n"; + dbgs() << "Inspecting block #" << MBB->getNumber() << " [" << MBB->getName() + << "]\n"; dumpBB(MBB); } } LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const { for (unsigned I = 0, E = Intervals.size(); I != E; ++I) { - dbgs() << "Interval[" << I << "]:\n"; - Intervals[I]->dump(); + dbgs() << "Interval[" << I << "]:"; + if (MFI->getObjectAllocation(I)) + dbgs() << *MFI->getObjectAllocation(I); + dbgs() << '\n' << *Intervals[I] << '\n'; + dbgs() << "LiveStarts:"; + for (SlotIndex SIdx : LiveStarts[I]) + dbgs() << ' ' << SIdx; + dbgs() << '\n'; + } +} + +LLVM_DUMP_METHOD void +StackColoring::SlotInfo::dump(const StackColoring *State) const { + unsigned Slot = InvalidIdx; + if (State) { + Slot = this - State->Slot2Info.data(); + dbgs() << "fi#" << Slot; + } else + dbgs() << "SlotInfo"; + dbgs() << ":"; + if (Offset != InvalidIdx) + dbgs() << " offset=" << Offset; + dbgs() << " uses=" << UseCount; + dbgs() << " prio=" << SlotPriority; + if (State) { + if (State->MFI->getObjectAllocation(Slot)) + dbgs() << " alloca=\"" << State->MFI->getObjectAllocation(Slot)->getName() + << "\""; + if (State->MFI->isSpillSlotObjectIndex(Slot)) + dbgs() << " spill"; } + dbgs() << " size=" << Size << " align=" << Align.value() << '\n'; + dumpBV("LIVENESS ", Liveness); + BitVector Start; + Start.resize(Liveness.size()); + for (unsigned idx : StartLiveness) { + if (idx >= Start.size()) + Start.resize(idx + 1); + Start[idx] = true; + } + dumpBV("LIVE START ", Start); + dbgs() << "\n"; } + #endif -static inline int getStartOrEndSlot(const MachineInstr &MI) -{ +static inline int getStartOrEndSlot(const MachineInstr &MI) { assert((MI.getOpcode() == TargetOpcode::LIFETIME_START || MI.getOpcode() == TargetOpcode::LIFETIME_END) && "Expected LIFETIME_START or LIFETIME_END op"); @@ -609,7 +720,7 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, if (!MO.isFI()) continue; int Slot = MO.getIndex(); - if (Slot<0) + if (Slot < 0) continue; if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) { slots.push_back(Slot); @@ -627,16 +738,21 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, unsigned StackColoring::collectMarkers(unsigned NumSlot) { unsigned MarkersFound = 0; - BlockBitVecMap SeenStartMap; + SmallVector SeenStartMap; InterestingSlots.clear(); InterestingSlots.resize(NumSlot); ConservativeSlots.clear(); ConservativeSlots.resize(NumSlot); + if (LS) + MarkersFound += LS->getNumIntervals() * 2; + // number of start and end lifetime ops for each slot SmallVector NumStartLifetimes(NumSlot, 0); SmallVector NumEndLifetimes(NumSlot, 0); + SeenStartMap.resize(MF->getNumBlockIDs()); + // Step 1: collect markers and populate the "InterestingSlots" // and "ConservativeSlots" sets. for (MachineBasicBlock *MBB : depth_first(MF)) { @@ -645,10 +761,11 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { // to this bb). BitVector BetweenStartEnd; BetweenStartEnd.resize(NumSlot); + SeenStartMap[MBB->getNumber()].resize(NumSlot); for (const MachineBasicBlock *Pred : MBB->predecessors()) { - BlockBitVecMap::const_iterator I = SeenStartMap.find(Pred); - if (I != SeenStartMap.end()) { - BetweenStartEnd |= I->second; + BitVector &PredSet = SeenStartMap[Pred->getNumber()]; + if (!PredSet.empty()) { + BetweenStartEnd |= PredSet; } } @@ -659,8 +776,10 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { if (MI.getOpcode() == TargetOpcode::LIFETIME_START || MI.getOpcode() == TargetOpcode::LIFETIME_END) { int Slot = getStartOrEndSlot(MI); - if (Slot < 0) + if (Slot < 0 || MFI->isObjectPreAllocated(Slot)) { + Markers.push_back(&MI); continue; + } InterestingSlots.set(Slot); if (MI.getOpcode() == TargetOpcode::LIFETIME_START) { BetweenStartEnd.set(Slot); @@ -688,13 +807,14 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { int Slot = MO.getIndex(); if (Slot < 0) continue; - if (! BetweenStartEnd.test(Slot)) { + Slot2Info[Slot].UseCount++; + if (!BetweenStartEnd.test(Slot)) { ConservativeSlots.set(Slot); } } } } - BitVector &SeenStart = SeenStartMap[MBB]; + BitVector &SeenStart = SeenStartMap[MBB->getNumber()]; SeenStart |= BetweenStartEnd; } if (!MarkersFound) { @@ -721,17 +841,17 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots)); + BlockLiveness.resize(MF->getNumBlockIDs()); // Step 2: compute begin/end sets for each block // NOTE: We use a depth-first iteration to ensure that we obtain a // deterministic numbering. for (MachineBasicBlock *MBB : depth_first(MF)) { // Assign a serial number to this basic block. - BasicBlocks[MBB] = BasicBlockNumbering.size(); BasicBlockNumbering.push_back(MBB); // Keep a reference to avoid repeated lookups. - BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB]; + BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB->getNumber()]; BlockInfo.Begin.resize(NumSlot); BlockInfo.End.resize(NumSlot); @@ -788,19 +908,19 @@ void StackColoring::calculateLocalLiveness() { for (const MachineBasicBlock *BB : BasicBlockNumbering) { // Use an iterator to avoid repeated lookups. - LivenessMap::iterator BI = BlockLiveness.find(BB); - assert(BI != BlockLiveness.end() && "Block not found"); - BlockLifetimeInfo &BlockInfo = BI->second; + BlockLifetimeInfo &BlockInfo = BlockLiveness[BB->getNumber()]; + if (BlockInfo.isEmpty()) + continue; // Compute LiveIn by unioning together the LiveOut sets of all preds. LocalLiveIn.clear(); for (MachineBasicBlock *Pred : BB->predecessors()) { - LivenessMap::const_iterator I = BlockLiveness.find(Pred); + BlockLifetimeInfo &PrefInfo = BlockLiveness[Pred->getNumber()]; // PR37130: transformations prior to stack coloring can // sometimes leave behind statically unreachable blocks; these // can be safely skipped here. - if (I != BlockLiveness.end()) - LocalLiveIn |= I->second.LiveOut; + if (!PrefInfo.isEmpty()) + LocalLiveIn |= PrefInfo.LiveOut; } // Compute LiveOut by subtracting out lifetimes that end in this @@ -833,42 +953,169 @@ void StackColoring::calculateLocalLiveness() { void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SmallVector Starts; - SmallVector DefinitelyInUse; + BitVector DefinitelyInUse; + SmallVector StartIdx; + + int CurrIdx = 0; + + DefinitelyInUse.resize(NumSlots); + struct SplitSlotChanges { + const MachineInstr *AtMI; + unsigned BlockIdx : 31; + unsigned IsStart : 1; + unsigned Slot; + }; + SmallVector MidBlockSpillChanges; + unsigned SpillChangeCounter = 0; + + if (LS && LS->getNumIntervals()) { + // Here we prepare Spill slots lifetime informations + // Live ranges in the LiveStacks seem to be slightly outdated in many small + // ways. this is not an issue for stack-slot-coloring, because its only + // operating on LiveRange form LiveStack, but it is an issue here, + // So we only rely on LiveStack, to give us live edges, and conservatively + // re-construct in-block liveness changes + + for (const MachineBasicBlock &MBB : *MF) { + BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB.getNumber()]; + MBBLiveness.LiveIn.resize(NumSlots); + MBBLiveness.LiveOut.resize(NumSlots); + } + for (const MachineBasicBlock &MBB : *MF) { + unsigned Base = LS->getStartIdx(); + BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB.getNumber()]; + for (unsigned I = 0; I < LS->getNumIntervals(); I++) { + unsigned Slot = Base + I; + if (LS->getInterval(Slot).liveAt(Indexes->getMBBStartIdx(&MBB))) { + MBBLiveness.LiveIn[Slot] = true; + // Checking if the end of the block is in the live-range is not + // reliable + for (MachineBasicBlock *Pred : MBB.predecessors()) + BlockLiveness[Pred->getNumber()].LiveOut[Slot] = true; + } + } + } + for (const MachineBasicBlock &MBB : *MF) { + unsigned SizeOnStart = MidBlockSpillChanges.size(); + BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB.getNumber()]; + BitVector IsStoredTo; + IsStoredTo.resize(NumSlots, false); + struct MIBlockIdx { + const MachineInstr *MI; + unsigned BlockIdx; + }; + unsigned BlockIdx = 0; + SmallVector LastUse; + LastUse.resize(NumSlots, {nullptr, 0}); + for (const MachineInstr &MI : MBB) { + if (MI.isDebugInstr()) + continue; + for (MachineMemOperand *MMO : MI.memoperands()) { + auto *PSV = dyn_cast_if_present( + MMO->getPseudoValue()); + if (!PSV) + continue; + unsigned Slot = PSV->getFrameIndex(); + if (!LS->hasInterval(Slot)) + continue; + assert(MMO->isStore() != MMO->isLoad()); + if (MMO->isStore()) { + if (!IsStoredTo[Slot]) { + MidBlockSpillChanges.push_back( + {&MI, BlockIdx, /*IsStart=*/true, Slot}); + IsStoredTo[Slot] = true; + } + } else + LastUse[Slot] = {&MI, BlockIdx}; + } + BlockIdx++; + } + + BitVector Liveness = MBBLiveness.LiveIn; + Liveness |= IsStoredTo; + Liveness &= MBBLiveness.LiveOut.flip(); + for (unsigned Slot : Liveness.set_bits()) { + if (!LS->hasInterval(Slot)) + continue; + if (LastUse[Slot].MI) + MidBlockSpillChanges.push_back({LastUse[Slot].MI, + LastUse[Slot].BlockIdx, + /*IsStart=*/false, Slot}); + } + + // Ensure that the changes are in the same order they will be found and + // need to be processed in + std::stable_sort(MidBlockSpillChanges.begin() + SizeOnStart, + MidBlockSpillChanges.end(), + [&](SplitSlotChanges Lhs, SplitSlotChanges Rhs) -> bool { + if (Lhs.BlockIdx == Rhs.BlockIdx) + assert(Lhs.Slot != Rhs.Slot); + if (Lhs.BlockIdx != Rhs.BlockIdx) + return Lhs.BlockIdx < Rhs.BlockIdx; + // Avoid overlap of lifetime when the same instruction + // starts some spill lifetime and ends others. + return Rhs.IsStart; + }); + } + } + + // To avoid needing bounds checks + MidBlockSpillChanges.push_back({nullptr, 0, false, InvalidIdx}); // For each block, find which slots are active within this block // and update the live intervals. for (const MachineBasicBlock &MBB : *MF) { - Starts.clear(); - Starts.resize(NumSlots); - DefinitelyInUse.clear(); - DefinitelyInUse.resize(NumSlots); + Starts.assign(NumSlots, SlotIndex()); + StartIdx.assign(NumSlots, -1); + DefinitelyInUse.reset(); // Start the interval of the slots that we previously found to be 'in-use'. - BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; - for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; - pos = MBBLiveness.LiveIn.find_next(pos)) { + BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB.getNumber()]; + for (int pos : MBBLiveness.LiveIn.set_bits()) { Starts[pos] = Indexes->getMBBStartIdx(&MBB); + StartIdx[pos] = CurrIdx; } + bool StartedSinceInc = false; + auto EndRangeFor = [&](int Slot) { + // The less index the better, so we only increase if the ranges would not + // be accurate without + if (StartIdx[Slot] == CurrIdx || StartedSinceInc) { + CurrIdx++; + StartedSinceInc = false; + } + Slot2Info[Slot].Liveness.resize(CurrIdx + 1); + Slot2Info[Slot].Liveness.set(StartIdx[Slot], CurrIdx); + StartIdx[Slot] = -1; + DefinitelyInUse[Slot] = false; + }; + // Create the interval for the basic blocks containing lifetime begin/end. for (const MachineInstr &MI : MBB) { SmallVector slots; bool IsStart = false; - if (!isLifetimeStartOrEnd(MI, slots, IsStart)) + bool AnyChange = isLifetimeStartOrEnd(MI, slots, IsStart); + AnyChange |= MidBlockSpillChanges[SpillChangeCounter].AtMI == &MI; + if (!AnyChange) continue; SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); - for (auto Slot : slots) { + auto OnChange = [&](unsigned Slot, bool IsStart) { if (IsStart) { + StartedSinceInc = true; // If a slot is already definitely in use, we don't have to emit // a new start marker because there is already a pre-existing // one. if (!DefinitelyInUse[Slot]) { LiveStarts[Slot].push_back(ThisIndex); + Slot2Info[Slot].StartLiveness.push_back(CurrIdx); DefinitelyInUse[Slot] = true; } if (!Starts[Slot].isValid()) Starts[Slot] = ThisIndex; + if (StartIdx[Slot] == -1) + StartIdx[Slot] = CurrIdx; } else { + assert(Starts[Slot].isValid() == (StartIdx[Slot] != -1)); if (Starts[Slot].isValid()) { VNInfo *VNI = Intervals[Slot]->getValNumInfo(0); Intervals[Slot]->addSegment( @@ -876,10 +1123,26 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { Starts[Slot] = SlotIndex(); // Invalidate the start index DefinitelyInUse[Slot] = false; } + if (StartIdx[Slot] != -1) + EndRangeFor(Slot); } + }; + for (auto Slot : slots) + OnChange(Slot, IsStart); + for (; SpillChangeCounter < MidBlockSpillChanges.size() && + MidBlockSpillChanges[SpillChangeCounter].AtMI == &MI; + SpillChangeCounter++) { + SplitSlotChanges Change = MidBlockSpillChanges[SpillChangeCounter]; + OnChange(Change.Slot, Change.IsStart); } } + for (unsigned i = 0; i < NumSlots; ++i) { + if (StartIdx[i] == -1) + continue; + EndRangeFor(i); + } + // Finish up started segments for (unsigned i = 0; i < NumSlots; ++i) { if (!Starts[i].isValid()) @@ -890,6 +1153,22 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI)); } } + // Make sure we reached the end + assert(!MidBlockSpillChanges[SpillChangeCounter].AtMI); + + LivenessSize = CurrIdx; + for (SlotInfo &Info : Slot2Info) { + Info.Liveness.resize(CurrIdx); + + // This is only to make us index into Liveness in order when doing a + // SlotInfo::hasOverlap, which should have better cache locality + std::sort(Info.StartLiveness.begin(), Info.StartLiveness.end()); +#ifndef NDEBUG + assert(Info.Liveness.any() == !Info.StartLiveness.empty()); + for (int Start : Info.StartLiveness) + assert(Info.Liveness[Start]); +#endif + } } bool StackColoring::removeAllMarkers() { @@ -900,6 +1179,17 @@ bool StackColoring::removeAllMarkers() { } Markers.clear(); + for (MachineBasicBlock &MBB : *MF) { + if (BlockLiveness.empty() || BlockLiveness[MBB.getNumber()].isEmpty()) + for (MachineInstr &MI : make_early_inc_range(MBB)) { + if (MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) { + Count++; + MI.eraseFromParent(); + } + } + } + LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n"); return Count; } @@ -923,10 +1213,10 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { } // Keep a list of *allocas* which need to be remapped. - DenseMap Allocas; + DenseMap Allocas; // Keep a list of allocas which has been affected by the remap. - SmallPtrSet MergedAllocas; + SmallPtrSet MergedAllocas; for (const std::pair &SI : SlotRemap) { const AllocaInst *From = MFI->getObjectAllocation(SI.first); @@ -960,8 +1250,8 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray. - MachineFrameInfo::SSPLayoutKind FromKind - = MFI->getObjectSSPLayout(SI.first); + MachineFrameInfo::SSPLayoutKind FromKind = + MFI->getObjectSSPLayout(SI.first); MachineFrameInfo::SSPLayoutKind ToKind = MFI->getObjectSSPLayout(SI.second); if (FromKind != MachineFrameInfo::SSPLK_None && (ToKind == MachineFrameInfo::SSPLK_None || @@ -1019,27 +1309,27 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { int FromSlot = MO.getIndex(); // Don't touch arguments. - if (FromSlot<0) + if (FromSlot < 0) continue; // Only look at mapped slots. if (!SlotRemap.count(FromSlot)) continue; - // In a debug build, check that the instruction that we are modifying is - // inside the expected live range. If the instruction is not inside - // the calculated range then it means that the alloca usage moved - // outside of the lifetime markers, or that the user has a bug. - // NOTE: Alloca address calculations which happen outside the lifetime - // zone are okay, despite the fact that we don't have a good way - // for validating all of the usages of the calculation. + // In a debug build, check that the instruction that we are modifying is + // inside the expected live range. If the instruction is not inside + // the calculated range then it means that the alloca usage moved + // outside of the lifetime markers, or that the user has a bug. + // NOTE: Alloca address calculations which happen outside the lifetime + // zone are okay, despite the fact that we don't have a good way + // for validating all of the usages of the calculation. #ifndef NDEBUG bool TouchesMemory = I.mayLoadOrStore(); // If we *don't* protect the user from escaped allocas, don't bother // validating the instructions. if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) { SlotIndex Index = Indexes->getInstructionIndex(I); - const LiveInterval *Interval = &*Intervals[FromSlot]; + const LiveRange *Interval = &*Intervals[FromSlot]; assert(Interval->find(Index) != Interval->end() && "Found instruction usage outside of live range."); } @@ -1123,9 +1413,9 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n"); LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n"); LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n"); - (void) FixedMemOp; - (void) FixedDbg; - (void) FixedInstr; + (void)FixedMemOp; + (void)FixedDbg; + (void)FixedInstr; } void StackColoring::removeInvalidSlotRanges() { @@ -1151,7 +1441,7 @@ void StackColoring::removeInvalidSlotRanges() { int Slot = MO.getIndex(); - if (Slot<0) + if (Slot < 0) continue; if (Intervals[Slot]->empty()) @@ -1159,7 +1449,7 @@ void StackColoring::removeInvalidSlotRanges() { // Check that the used slot is inside the calculated lifetime range. // If it is not, warn about it and invalidate the range. - LiveInterval *Interval = &*Intervals[Slot]; + LiveRange *Interval = &*Intervals[Slot]; SlotIndex Index = Indexes->getInstructionIndex(I); if (Interval->find(Index) == Interval->end()) { Interval->clear(); @@ -1173,7 +1463,7 @@ void StackColoring::removeInvalidSlotRanges() { void StackColoring::expungeSlotMap(DenseMap &SlotRemap, unsigned NumSlots) { // Expunge slot remap map. - for (unsigned i=0; i < NumSlots; ++i) { + for (unsigned i = 0; i < NumSlots; ++i) { // If we are remapping i if (auto It = SlotRemap.find(i); It != SlotRemap.end()) { int Target = It->second; @@ -1193,69 +1483,321 @@ bool StackColoringLegacy::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - StackColoring SC(&getAnalysis().getSI()); + LiveStacks *LS = nullptr; + LiveStacksWrapperLegacy *LSWL = + getAnalysisIfAvailable(); + if (LSWL) + LS = &LSWL->getLS(); + + StackColoring SC(&getAnalysis().getSI(), LS); return SC.run(MF); } PreservedAnalyses StackColoringPass::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { - StackColoring SC(&MFAM.getResult(MF)); + StackColoring SC(&MFAM.getResult(MF), + MFAM.getCachedResult(MF)); if (SC.run(MF)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } +unsigned StackColoring::doMerging(unsigned NumSlots) { + SmallVector SlotStack; + Align FinalAlign; + + int64_t OrigOptSize = 0; + int64_t OrigPesSize = 0; + for (unsigned Slot = 0; Slot < NumSlots; Slot++) { + SlotInfo &Info = Slot2Info[Slot]; + if (Info.StartLiveness.empty()) + assert(!LS || !LS->hasInterval(Slot)); + if (!Info.StartLiveness.empty() && + DebugCounter::shouldExecute(ProcessSlot)) { + FinalAlign = std::max(FinalAlign, Info.Align); + + // Note: This is maybe not a completely fair comparaison compared to the + // previous algo, as PEI should be smarter than that about alignment But + // faire comparaison is hard since the old algo doesn't deal in alignment + // at all + OrigPesSize = alignTo(OrigPesSize, Info.Align); + OrigPesSize += Info.Size; + OrigOptSize += Info.Size; + SlotStack.push_back(Slot); + } + } + + if (SlotStack.size() <= 1) + return InvalidIdx; + + // This logic is optimized for x86_64, it probably needs to be adapted to + // other targets to get good code-size/stack-size balance. + // Its inspired from X86FrameLowering::orderFrameObjects, but modified weight + // in alignments helping with stack size + auto IsLower = [&](unsigned Lhs, unsigned Rhs) { + SlotInfo &L = Slot2Info[Lhs]; + SlotInfo &R = Slot2Info[Rhs]; + uint64_t DensityLScaled = static_cast(L.UseCount) * + static_cast(R.Size + Log2(R.Align)); + uint64_t DensityRScaled = static_cast(R.UseCount) * + static_cast(L.Size + Log2(L.Align)); + return DensityLScaled < DensityRScaled; + }; + std::stable_sort(SlotStack.begin(), SlotStack.end(), IsLower); + + int Prio = 0; + for (int Slot : SlotStack) + Slot2Info[Slot].SlotPriority = Prio++; + + SlotInfo *LastQueryLhs = nullptr; + SlotInfo *LastQueryRhs = nullptr; + bool LastQueryRes = false; + // Maybe there should be real caching here + auto HasOverlapCached = [&](SlotInfo &Lhs, SlotInfo &Rhs) { + if (&Lhs == LastQueryLhs && LastQueryRhs == &Rhs) + return LastQueryRes; + LastQueryLhs = &Lhs; + LastQueryRhs = &Rhs; + LastQueryRes = Lhs.hasOverlap(Rhs); + return LastQueryRes; + }; + + struct Status { + // This is the offset at which a slot on top should be placed. So the offset + // of the slot + the size of the slot + unsigned Offset = 0; + + // The Slot just below the offset. + unsigned Slot = InvalidIdx; + + // The index of the previous status in OlderStatus + unsigned Prev = InvalidIdx; + }; + + SmallVector LatestStatus; + LatestStatus.resize(LivenessSize, Status{}); + SmallVector OlderStatus; + + auto FindStatus = [&](SlotInfo &Info, unsigned Pt) -> Status & { + Status *Last = &LatestStatus[Pt]; + + // The slots in the linked-list are always kept in ascending order, so the + // earliest slot has the lowest offset + // This loop handles cases where this slot and the latest slot doesn't + // cannot be both live because of the CFG, so even if there lifetime + // overlap, they can overlap + // See comment about implementation higher in the file + while (LLVM_UNLIKELY(Last->Slot != InvalidIdx && + !HasOverlapCached(Info, Slot2Info[Last->Slot]))) + Last = &OlderStatus[Last->Prev]; + return *Last; + }; + auto UpdateStatus = [&](SlotInfo &Info, unsigned Pt, unsigned Offset) { + Status *Last = &LatestStatus[Pt]; + unsigned Idx = OlderStatus.size(); + OlderStatus.push_back(*Last); + + // this is branch is not taken only when we are inserting a slot that wasn't + // overlapping with the previous slot and is smaller. so the slot inserted + // slot is not the new start of the linked-list + if (LLVM_LIKELY(Last->Offset <= Offset)) { + Last->Prev = Idx; + Last->Offset = Offset; + Last->Slot = &Info - Slot2Info.data(); + return; + } + + // Ensure ordering of slots + Status *Inserted = &OlderStatus.back(); + Inserted->Offset = Offset; + Inserted->Slot = &Info - Slot2Info.data(); + Status *Curr = Last; + while (Curr->Prev != InvalidIdx && OlderStatus[Curr->Prev].Offset > Offset) + Curr = &OlderStatus[Curr->Prev]; + + // Insert the new node in the linked-list + Inserted->Prev = Curr->Prev; + Curr->Prev = Idx; + }; + + // This is a vector but element ordering is not relevant + SmallVector Candidates; + + unsigned MaxCandidates = MaxCandidatesOpt == 0 ? ~0u : MaxCandidatesOpt; + for (unsigned I = 0; I < MaxCandidates; I++) { + if (SlotStack.empty()) + break; + Candidates.push_back(SlotStack.pop_back_val()); + } + + LLVM_DEBUG(dbgs() << "\nStarting Placement:\n"); + unsigned WorseCaseOffset = 0; + while (!Candidates.empty()) { + unsigned BestIdx = InvalidIdx; + unsigned BestOffset = InvalidIdx; + + LLVM_DEBUG(dbgs() << "Worse is at " << WorseCaseOffset << ", choosing: "); + for (unsigned K = 0; K < Candidates.size(); K++) { + SlotInfo &Info = Slot2Info[Candidates[K]]; + unsigned Offset = 0; + unsigned PrevSlot = InvalidIdx; + (void)PrevSlot; // Only use in LLVM_DEBUG + + for (unsigned Pt : Info.Liveness.set_bits()) { + Status S = FindStatus(Info, Pt); + if (S.Offset > Offset) { + PrevSlot = S.Slot; + Offset = S.Offset; + } + + // If Offset == WorseCaseOffset, this is always a valid, options. so no + // more checking needed + // If Offset > BestOffset, we already found a better solution, so this + // one doesn't matter + if (Offset == WorseCaseOffset || Offset > BestOffset) + break; + } + + Offset = alignTo(Offset, Info.Align); + + LLVM_DEBUG({ + dbgs() << "fi#" << Candidates[K] << "@" << Offset; + if (PrevSlot != InvalidIdx) + dbgs() << "->" << "fi#" << PrevSlot; + dbgs() << ", "; + }); + + bool IsBetter = [&] { + if (BestIdx == InvalidIdx) + return true; + SlotInfo &Best = Slot2Info[Candidates[BestIdx]]; + if (BestOffset != Offset) + return BestOffset > Offset; + if (Best.SlotPriority != Info.SlotPriority) + return Best.SlotPriority < Info.SlotPriority; + if (Best.Align != Info.Align) + return Best.Align < Info.Align; + + // Both are always stored in Slot2Info, so this is equivalent to + // FrameIndex comparaison + return &Best < &Info; + }(); + + if (IsBetter) { + BestIdx = K; + BestOffset = Offset; + } + } + SlotInfo &Info = Slot2Info[Candidates[BestIdx]]; + Info.Offset = BestOffset; + WorseCaseOffset = std::max(WorseCaseOffset, BestOffset + Info.Size); + + LLVM_DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Placing: "); + LLVM_DEBUG(Info.dump(this)); + + for (unsigned Pt : Info.Liveness.set_bits()) + UpdateStatus(Info, Pt, BestOffset + Info.Size); +#ifdef EXPENSIVE_CHECKS + // Validate the order of offsets in the linked-list + for (Status &S : LatestStatus) { + Status *Curr = &S; + unsigned CurrOffset = Curr->Offset; + while (Curr->Prev != InvalidIdx) { + assert(Curr->Offset <= CurrOffset); + CurrOffset = Curr->Offset; + Curr = &OlderStatus[Curr->Prev]; + } + } +#endif + + std::swap(Candidates[BestIdx], Candidates.back()); + Candidates.pop_back(); + if (!SlotStack.empty()) + Candidates.push_back(SlotStack.pop_back_val()); + } + + unsigned FinalSize = 0; + for (Status &U : LatestStatus) + FinalSize = std::max(FinalSize, U.Offset); + LLVM_DEBUG(dbgs() << "MergedSize=" << FinalSize << " OrigPesSize=" + << OrigPesSize << " OrigOptSize" << OrigOptSize << "\n"); + if (FinalSize >= OrigPesSize) { + return InvalidIdx; + } + + int MergedSlot = + MFI->CreateStackObject(FinalSize, FinalAlign, /*isSpillSlot=*/false); + MFI->setUnderlyingSlot(MergedSlot, MachineFrameInfo::IsUnderlyingSlot); + + for (unsigned Slot = 0; Slot < NumSlots; Slot++) + if (Slot2Info[Slot].Offset != InvalidIdx) { + MFI->setUnderlyingSlot(Slot, MergedSlot); + MFI->setObjectOffset(Slot, Slot2Info[Slot].Offset); + } + + // Note: this is counts differently from the previous algo because this logic + // cares about alignment, while the older algo doesn't. + StackSpaceSaved += OrigPesSize - FinalSize; + + return MergedSlot; +} + bool StackColoring::run(MachineFunction &Func) { LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n" << "********** Function: " << Func.getName() << '\n'); MF = &Func; MFI = &MF->getFrameInfo(); BlockLiveness.clear(); - BasicBlocks.clear(); BasicBlockNumbering.clear(); Markers.clear(); Intervals.clear(); LiveStarts.clear(); VNInfoAllocator.Reset(); + Slot2Info.clear(); + + if (!UseNewStackColoring) + LS = nullptr; unsigned NumSlots = MFI->getObjectIndexEnd(); // If there are no stack slots then there are no markers to remove. - if (!NumSlots) - return false; + if (NumSlots < 2 || DisableColoring) + return removeAllMarkers(); SmallVector SortedSlots; SortedSlots.reserve(NumSlots); Intervals.reserve(NumSlots); LiveStarts.resize(NumSlots); + Slot2Info.resize(NumSlots); unsigned NumMarkers = collectMarkers(NumSlots); unsigned TotalSize = 0; LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots << " slots\n"); - LLVM_DEBUG(dbgs() << "Slot structure:\n"); - for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { - LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i) - << " bytes.\n"); + for (int i = 0; i < MFI->getObjectIndexEnd(); ++i) TotalSize += MFI->getObjectSize(i); - } LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n"); // Don't continue because there are not enough lifetime markers, or the // stack is too small, or we are told not to optimize the slots. - if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { + if (NumMarkers < 2 || TotalSize < 16) { LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n"); return removeAllMarkers(); } - for (unsigned i=0; i < NumSlots; ++i) { - std::unique_ptr LI(new LiveInterval(i, 0)); + for (unsigned i = 0; i < NumSlots; ++i) { + std::unique_ptr LI(new LiveRange()); LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); Intervals.push_back(std::move(LI)); SortedSlots.push_back(i); + + Slot2Info[i].Align = MFI->getObjectAlign(i); + Slot2Info[i].Size = MFI->getObjectSize(i); + Slot2Info[i].Offset = InvalidIdx; } // Calculate the liveness of each block. @@ -1265,112 +1807,118 @@ bool StackColoring::run(MachineFunction &Func) { // Propagate the liveness information. calculateLiveIntervals(NumSlots); - LLVM_DEBUG(dumpIntervals()); // Search for allocas which are used outside of the declared lifetime // markers. if (ProtectFromEscapedAllocas) removeInvalidSlotRanges(); - // Maps old slots to new slots. - DenseMap SlotRemap; - unsigned RemovedSlots = 0; - unsigned ReducedSize = 0; - - // Do not bother looking at empty intervals. - for (unsigned I = 0; I < NumSlots; ++I) { - if (Intervals[SortedSlots[I]]->empty()) - SortedSlots[I] = -1; - } - - // This is a simple greedy algorithm for merging allocas. First, sort the - // slots, placing the largest slots first. Next, perform an n^2 scan and look - // for disjoint slots. When you find disjoint slots, merge the smaller one - // into the bigger one and update the live interval. Remove the small alloca - // and continue. - - // Sort the slots according to their size. Place unused slots at the end. - // Use stable sort to guarantee deterministic code generation. - llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) { - // We use -1 to denote a uninteresting slot. Place these slots at the end. - if (LHS == -1) - return false; - if (RHS == -1) - return true; - // Sort according to size. - return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); - }); - - for (auto &s : LiveStarts) - llvm::sort(s); + if (!UseNewStackColoring) { + LLVM_DEBUG(dumpIntervals()); + // Maps old slots to new slots. + DenseMap SlotRemap; + unsigned RemovedSlots = 0; + unsigned ReducedSize = 0; - bool Changed = true; - while (Changed) { - Changed = false; + // Do not bother looking at empty intervals. for (unsigned I = 0; I < NumSlots; ++I) { - if (SortedSlots[I] == -1) - continue; + if (Intervals[SortedSlots[I]]->empty()) + SortedSlots[I] = -1; + } - for (unsigned J=I+1; J < NumSlots; ++J) { - if (SortedSlots[J] == -1) + // This is a simple greedy algorithm for merging allocas. First, sort the + // slots, placing the largest slots first. Next, perform an n^2 scan and + // look for disjoint slots. When you find disjoint slots, merge the smaller + // one into the bigger one and update the live interval. Remove the small + // alloca and continue. + + // Sort the slots according to their size. Place unused slots at the end. + // Use stable sort to guarantee deterministic code generation. + llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) { + // We use -1 to denote a uninteresting slot. Place these slots at the end. + if (LHS == -1) + return false; + if (RHS == -1) + return true; + // Sort according to size. + return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); + }); + + for (auto &s : LiveStarts) + llvm::sort(s); + + bool Changed = true; + while (Changed) { + Changed = false; + for (unsigned I = 0; I < NumSlots; ++I) { + if (SortedSlots[I] == -1) continue; - int FirstSlot = SortedSlots[I]; - int SecondSlot = SortedSlots[J]; + for (unsigned J = I + 1; J < NumSlots; ++J) { + if (SortedSlots[J] == -1) + continue; + + int FirstSlot = SortedSlots[I]; + int SecondSlot = SortedSlots[J]; - // Objects with different stack IDs cannot be merged. - if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot)) - continue; + // Objects with different stack IDs cannot be merged. + if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot)) + continue; - LiveInterval *First = &*Intervals[FirstSlot]; - LiveInterval *Second = &*Intervals[SecondSlot]; - auto &FirstS = LiveStarts[FirstSlot]; - auto &SecondS = LiveStarts[SecondSlot]; - assert(!First->empty() && !Second->empty() && "Found an empty range"); - - // Merge disjoint slots. This is a little bit tricky - see the - // Implementation Notes section for an explanation. - if (!First->isLiveAtIndexes(SecondS) && - !Second->isLiveAtIndexes(FirstS)) { - Changed = true; - First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); - - int OldSize = FirstS.size(); - FirstS.append(SecondS.begin(), SecondS.end()); - auto Mid = FirstS.begin() + OldSize; - std::inplace_merge(FirstS.begin(), Mid, FirstS.end()); - - SlotRemap[SecondSlot] = FirstSlot; - SortedSlots[J] = -1; - LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #" - << SecondSlot << " together.\n"); - Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot), - MFI->getObjectAlign(SecondSlot)); - - assert(MFI->getObjectSize(FirstSlot) >= - MFI->getObjectSize(SecondSlot) && - "Merging a small object into a larger one"); - - RemovedSlots+=1; - ReducedSize += MFI->getObjectSize(SecondSlot); - MFI->setObjectAlignment(FirstSlot, MaxAlignment); - MFI->RemoveStackObject(SecondSlot); + LiveRange *First = &*Intervals[FirstSlot]; + LiveRange *Second = &*Intervals[SecondSlot]; + auto &FirstS = LiveStarts[FirstSlot]; + auto &SecondS = LiveStarts[SecondSlot]; + assert(!First->empty() && !Second->empty() && "Found an empty range"); + + // Merge disjoint slots. This is a little bit tricky - see the + // Implementation Notes section for an explanation. + if (!First->isLiveAtIndexes(SecondS) && + !Second->isLiveAtIndexes(FirstS)) { + Changed = true; + First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); + + int OldSize = FirstS.size(); + FirstS.append(SecondS.begin(), SecondS.end()); + auto Mid = FirstS.begin() + OldSize; + std::inplace_merge(FirstS.begin(), Mid, FirstS.end()); + + SlotRemap[SecondSlot] = FirstSlot; + SortedSlots[J] = -1; + LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #" + << SecondSlot << " together.\n"); + Align Alignment = std::max(MFI->getObjectAlign(FirstSlot), + MFI->getObjectAlign(SecondSlot)); + + assert(MFI->getObjectSize(FirstSlot) >= + MFI->getObjectSize(SecondSlot) && + "Merging a small object into a larger one"); + + RemovedSlots += 1; + ReducedSize += MFI->getObjectSize(SecondSlot); + MFI->setObjectAlignment(FirstSlot, Alignment); + MFI->RemoveStackObject(SecondSlot); + } } } + } // While changed. + + // Record statistics. + StackSpaceSaved += ReducedSize; + StackSlotMerged += RemovedSlots; + LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved " + << ReducedSize << " bytes\n"); + + // Scan the entire function and update all machine operands that use frame + // indices to use the remapped frame index. + if (!SlotRemap.empty()) { + expungeSlotMap(SlotRemap, NumSlots); + remapInstructions(SlotRemap); } - }// While changed. - - // Record statistics. - StackSpaceSaved += ReducedSize; - StackSlotMerged += RemovedSlots; - LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved " - << ReducedSize << " bytes\n"); - - // Scan the entire function and update all machine operands that use frame - // indices to use the remapped frame index. - if (!SlotRemap.empty()) { - expungeSlotMap(SlotRemap, NumSlots); - remapInstructions(SlotRemap); + } else { + // Maybe this entire logic should be moved to a generic StackLayouter that + // is used for PrologEpilogInserter and LocalStackSlotAllocation. + doMerging(NumSlots); } return removeAllMarkers(); diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index 2f81bea4e86ba..95597be5f1ebe 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -262,24 +262,14 @@ void StackSlotColoring::InitializeSlots() { UsedColors[0].resize(LastFI); Assignments.resize(LastFI); - using Pair = std::iterator_traits::value_type; - - SmallVector Intervals; - - Intervals.reserve(LS->getNumIntervals()); - for (auto &I : *LS) - Intervals.push_back(&I); - llvm::sort(Intervals, - [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; }); - // Gather all spill slots into a list. LLVM_DEBUG(dbgs() << "Spill slot intervals:\n"); - for (auto *I : Intervals) { - LiveInterval &li = I->second; - LLVM_DEBUG(li.dump()); - int FI = li.reg().stackSlotIndex(); - if (MFI->isDeadObjectIndex(FI)) + for (auto [Idx, I] : llvm::enumerate(*LS)) { + int FI = Idx + LS->getStartIdx(); + if (!I || MFI->isDeadObjectIndex(FI)) continue; + LiveInterval &li = *I; + LLVM_DEBUG(li.dump()); SSIntervals.push_back(&li); OrigAlignments[FI] = MFI->getObjectAlign(FI); @@ -369,7 +359,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { SmallVector SlotMapping(NumObjs, -1); SmallVector SlotWeights(NumObjs, 0.0); SmallVector, 16> RevMap(NumObjs); - BitVector UsedColors(NumObjs); LLVM_DEBUG(dbgs() << "Color spill slot intervals:\n"); bool Changed = false; @@ -380,7 +369,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { SlotMapping[SS] = NewSS; RevMap[NewSS].push_back(SS); SlotWeights[NewSS] += li->weight(); - UsedColors.set(NewSS); Changed |= (SS != NewSS); } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 4ae52b056d844..343e25ae17fd7 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -266,6 +266,9 @@ static cl::opt cl::desc("Split static data sections into hot and cold " "sections using profile information")); +static cl::opt MergedStackColoring("merged-stack-coloring", + cl::init(false), cl::Hidden); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -1305,9 +1308,11 @@ void TargetPassConfig::addMachineSSAOptimization() { // instructions dead. addPass(&OptimizePHIsLegacyID); - // This pass merges large allocas. StackSlotColoring is a different pass - // which merges spill slots. - addPass(&StackColoringLegacyID); + if (!MergedStackColoring) { + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(&StackColoringLegacyID); + } // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. @@ -1493,8 +1498,14 @@ void TargetPassConfig::addOptimizedRegAlloc() { addPass(&MachineSchedulerID); if (addRegAssignAndRewriteOptimized()) { - // Perform stack slot coloring and post-ra machine LICM. - addPass(&StackSlotColoringID); + if (MergedStackColoring) { + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(&StackColoringLegacyID); + } else { + // Perform stack slot coloring and post-ra machine LICM. + addPass(&StackSlotColoringID); + } // Allow targets to expand pseudo instructions depending on the choice of // registers before MachineCopyPropagation. diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index 989fde9749b18..1c6876ce4e87c 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -84,7 +84,7 @@ class SourceCode { void format(raw_ostream &OS) { if (!PrunedSource) return; - size_t MaxLineNumberWidth = std::ceil(std::log10(LastLine)); + size_t MaxLineNumberWidth = NumDigits(LastLine); int64_t L = FirstLine; for (size_t Pos = 0; Pos < PrunedSource->size(); ++L) { size_t PosEnd = PrunedSource->find('\n', Pos); diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp index 9f9030e79d104..b8449683363b0 100644 --- a/llvm/lib/Support/Signals.cpp +++ b/llvm/lib/Support/Signals.cpp @@ -221,7 +221,7 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, for (int i = 0; i < Depth; i++) { auto PrintLineHeader = [&]() { OS << right_justify(formatv("#{0}", frame_no++).str(), - std::log10(Depth) + 2) + NumDigits(Depth) + 1) << ' ' << format_ptr(StackTrace[i]) << ' '; }; if (!Modules[i]) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp index 9b6bb56c85d24..2dcf695e9c583 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp @@ -102,15 +102,15 @@ bool AMDGPUMarkLastScratchLoad::run(MachineFunction &MF) { bool Changed = false; - for (auto &[SS, LI] : *LS) { - for (const LiveRange::Segment &Segment : LI.segments) { + for (auto *LI : *LS) { + for (const LiveRange::Segment &Segment : LI->segments) { // Ignore segments that run to the end of basic block because in this case // slot is still live at the end of it. if (Segment.end.isBlock()) continue; - const int FrameIndex = LI.reg().stackSlotIndex(); + const int FrameIndex = LI->reg().stackSlotIndex(); MachineInstr *LastLoad = nullptr; MachineInstr *MISegmentEnd = SI->getInstructionFromIndex(Segment.end); diff --git a/llvm/test/CodeGen/X86/StackColoring.ll b/llvm/test/CodeGen/X86/StackColoring.ll index db3e7dcdfe2d5..4cc54c5bd1361 100644 --- a/llvm/test/CodeGen/X86/StackColoring.ll +++ b/llvm/test/CodeGen/X86/StackColoring.ll @@ -581,6 +581,41 @@ onerr: %Data = type { [32 x i64] } +declare void @throw() + +declare i32 @__CxxFrameHandler3(...) + +declare void @llvm.trap() + +;CHECK-LABEL: removed_all_lifetime: +;YESCOLOR-NOT: LIFETIME_END +;NOFIRSTUSE-NOT: LIFETIME_END +;NOCOLOR-NOT: LIFETIME_END +define void @removed_all_lifetime() personality ptr @__CxxFrameHandler3 { +entry: + %alloca2 = alloca ptr, align 4 + %alloca1 = alloca ptr, align 4 + store volatile ptr null, ptr %alloca1 + invoke void @throw() + to label %unreachable unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %cs = catchswitch within none [label %catch.pad] unwind to caller + +catch.pad: ; preds = %catch.dispatch + %cp = catchpad within %cs [ptr null, i32 0, ptr %alloca1] + %v = load volatile ptr, ptr %alloca1 + store volatile ptr null, ptr %alloca1 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %alloca1) + call void @llvm.lifetime.start.p0(i64 4, ptr %alloca2) + store volatile ptr null, ptr %alloca1 + call void @llvm.trap() + unreachable + +unreachable: ; preds = %entry + unreachable +} + declare void @destructor() declare void @inita(ptr) diff --git a/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp b/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp index 7c8ac474c0fdb..9b0a518f7e49a 100644 --- a/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp +++ b/llvm/tools/llvm-remarkutil/RemarkInstructionMix.cpp @@ -111,7 +111,7 @@ static Error tryInstructionMix() { Mix.begin(), Mix.end(), 1, [](unsigned MaxValue, const MixEntry &Elt) { return std::max(MaxValue, Elt.second); }); - unsigned ValueWidth = std::log10(MaxValue) + 1; + unsigned ValueWidth = NumDigits(MaxValue); FOS << "Instruction"; FOS.PadToColumn(MaxMnemonic + 1) << "Count\n"; FOS << "-----------"; diff --git a/llvm/utils/FileCheck/FileCheck.cpp b/llvm/utils/FileCheck/FileCheck.cpp index 9cf3a3164dfec..96e6e418b96f4 100644 --- a/llvm/utils/FileCheck/FileCheck.cpp +++ b/llvm/utils/FileCheck/FileCheck.cpp @@ -595,7 +595,7 @@ static void DumpAnnotatedInput(raw_ostream &OS, const FileCheckRequest &Req, unsigned LineCount = InputFileText.count('\n'); if (InputFileEnd[-1] != '\n') ++LineCount; - unsigned LineNoWidth = std::log10(LineCount) + 1; + unsigned LineNoWidth = NumDigits(LineCount); // +3 below adds spaces (1) to the left of the (right-aligned) line numbers // on input lines and (2) to the right of the (left-aligned) labels on // annotation lines so that input lines and annotation lines are more