Skip to content

[LLD] Implement --enable-non-contiguous-regions #90007

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lld/ELF/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ struct Config {
bool emitLLVM;
bool emitRelocs;
bool enableNewDtags;
bool enableNonContiguousRegions;
bool executeOnly;
bool exportDynamic;
bool fixCortexA53Errata843419;
Expand Down
4 changes: 3 additions & 1 deletion lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,8 @@ static void readConfigs(opt::InputArgList &args) {
config->emitRelocs = args.hasArg(OPT_emit_relocs);
config->enableNewDtags =
args.hasFlag(OPT_enable_new_dtags, OPT_disable_new_dtags, true);
config->enableNonContiguousRegions =
args.hasArg(OPT_enable_non_contiguous_regions);
config->entry = args.getLastArgValue(OPT_entry);

errorHandler().errorHandlingScript =
Expand Down Expand Up @@ -3077,7 +3079,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// sectionBases.
for (SectionCommand *cmd : script->sectionCommands)
if (auto *osd = dyn_cast<OutputDesc>(cmd))
osd->osec.finalizeInputSections();
osd->osec.finalizeInputSections(script.get());
}

// Two input sections with different output sections should not be folded.
Expand Down
7 changes: 7 additions & 0 deletions lld/ELF/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ uint64_t SectionBase::getOffset(uint64_t offset) const {
}
case Regular:
case Synthetic:
case Spill:
return cast<InputSection>(this)->outSecOff + offset;
case EHFrame: {
// Two code paths may reach here. First, clang_rt.crtbegin.o and GCC
Expand Down Expand Up @@ -309,6 +310,12 @@ std::string InputSectionBase::getObjMsg(uint64_t off) const {
.str();
}

PotentialSpillSection::PotentialSpillSection(const InputSectionBase &source,
InputSectionDescription &isd)
: InputSection(source.file, source.flags, source.type, source.addralign, {},
source.name, SectionBase::Spill),
isd(&isd) {}

InputSection InputSection::discarded(nullptr, 0, 0, 0, ArrayRef<uint8_t>(), "");

InputSection::InputSection(InputFile *f, uint64_t flags, uint32_t type,
Expand Down
25 changes: 23 additions & 2 deletions lld/ELF/InputSection.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ template <class ELFT> struct RelsOrRelas {
// sections.
class SectionBase {
public:
enum Kind { Regular, Synthetic, EHFrame, Merge, Output };
enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output };

Kind kind() const { return (Kind)sectionKind; }

Expand Down Expand Up @@ -382,7 +382,8 @@ class InputSection : public InputSectionBase {

static bool classof(const SectionBase *s) {
return s->kind() == SectionBase::Regular ||
s->kind() == SectionBase::Synthetic;
s->kind() == SectionBase::Synthetic ||
s->kind() == SectionBase::Spill;
}

// Write this section to a mmap'ed file, assuming Buf is pointing to
Expand Down Expand Up @@ -425,6 +426,26 @@ class InputSection : public InputSectionBase {
template <class ELFT> void copyShtGroup(uint8_t *buf);
};

// A marker for a potential spill location for another input section. This
// broadly acts as if it were the original section until address assignment.
// Then it is either replaced with the real input section or removed.
class PotentialSpillSection : public InputSection {
public:
// The containing input section description; used to quickly replace this stub
// with the actual section.
InputSectionDescription *isd;

// Next potential spill location for the same source input section.
PotentialSpillSection *next = nullptr;

PotentialSpillSection(const InputSectionBase &source,
InputSectionDescription &isd);

static bool classof(const SectionBase *sec) {
return sec->kind() == InputSectionBase::Spill;
}
};

static_assert(sizeof(InputSection) <= 160, "InputSection is too big");

class SyntheticSection : public InputSection {
Expand Down
181 changes: 174 additions & 7 deletions lld/ELF/LinkerScript.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ getChangedSymbolAssignment(const SymbolAssignmentMap &oldValues) {
void LinkerScript::processInsertCommands() {
SmallVector<OutputDesc *, 0> moves;
for (const InsertCommand &cmd : insertCommands) {
if (config->enableNonContiguousRegions)
error("INSERT cannot be used with --enable-non-contiguous-regions");

for (StringRef name : cmd.names) {
// If base is empty, it may have been discarded by
// adjustOutputSections(). We do not handle such output sections.
Expand Down Expand Up @@ -486,10 +489,12 @@ static void sortInputSections(MutableArrayRef<InputSectionBase *> vec,
// Compute and remember which sections the InputSectionDescription matches.
SmallVector<InputSectionBase *, 0>
LinkerScript::computeInputSections(const InputSectionDescription *cmd,
ArrayRef<InputSectionBase *> sections) {
ArrayRef<InputSectionBase *> sections,
const OutputSection &outCmd) {
SmallVector<InputSectionBase *, 0> ret;
SmallVector<size_t, 0> indexes;
DenseSet<size_t> seen;
DenseSet<InputSectionBase *> spills;
auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) {
llvm::sort(MutableArrayRef<size_t>(indexes).slice(begin, end - begin));
for (size_t i = begin; i != end; ++i)
Expand All @@ -505,10 +510,10 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd,
size_t sizeBeforeCurrPat = ret.size();

for (size_t i = 0, e = sections.size(); i != e; ++i) {
// Skip if the section is dead or has been matched by a previous input
// section description or a previous pattern.
// Skip if the section is dead or has been matched by a previous pattern
// in this input section description.
InputSectionBase *sec = sections[i];
if (!sec->isLive() || sec->parent || seen.contains(i))
if (!sec->isLive() || seen.contains(i))
continue;

// For --emit-relocs we have to ignore entries like
Expand All @@ -529,6 +534,29 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd,
(sec->flags & cmd->withoutFlags) != 0)
continue;

if (sec->parent) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is moved before ret.push_back(sec);, we can save one if (sec->parent) check and group enableNonContiguousRegions code together.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

// Skip if not allowing multiple matches.
if (!config->enableNonContiguousRegions)
continue;

// Disallow spilling into /DISCARD/; special handling would be needed
// for this in address assignment, and the semantics are nebulous.
if (outCmd.name == "/DISCARD/")
continue;

// Skip if the section's first match was /DISCARD/; such sections are
// always discarded.
if (sec->parent->name == "/DISCARD/")
continue;

// Skip if the section was already matched by a different input section
// description within this output section.
if (sec->parent == &outCmd)
continue;

spills.insert(sec);
}

ret.push_back(sec);
indexes.push_back(i);
seen.insert(i);
Expand All @@ -555,6 +583,30 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd,
// Matched sections after the last SORT* are sorted by (--sort-alignment,
// input order).
sortByPositionThenCommandLine(sizeAfterPrevSort, ret.size());

// The flag --enable-non-contiguous-regions may cause sections to match an
// InputSectionDescription in more than one OutputSection. Matches after the
// first were collected in the spills set, so replace these with potential
// spill sections.
if (!spills.empty()) {
for (InputSectionBase *&sec : ret) {
if (!spills.contains(sec))
continue;

// Append the spill input section to the list for the input section,
// creating it if necessary.
PotentialSpillSection *pss = make<PotentialSpillSection>(
*sec, const_cast<InputSectionDescription &>(*cmd));
auto [it, inserted] =
potentialSpillLists.try_emplace(sec, PotentialSpillList{pss, pss});
if (!inserted) {
PotentialSpillSection *&tail = it->second.tail;
tail = tail->next = pss;
}
sec = pss;
}
}

return ret;
}

Expand All @@ -577,7 +629,7 @@ void LinkerScript::discardSynthetic(OutputSection &outCmd) {
part.armExidx->exidxSections.end());
for (SectionCommand *cmd : outCmd.commands)
if (auto *isd = dyn_cast<InputSectionDescription>(cmd))
for (InputSectionBase *s : computeInputSections(isd, secs))
for (InputSectionBase *s : computeInputSections(isd, secs, outCmd))
discard(*s);
}
}
Expand All @@ -588,7 +640,7 @@ LinkerScript::createInputSectionList(OutputSection &outCmd) {

for (SectionCommand *cmd : outCmd.commands) {
if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) {
isd->sectionBases = computeInputSections(isd, ctx.inputSections);
isd->sectionBases = computeInputSections(isd, ctx.inputSections, outCmd);
for (InputSectionBase *s : isd->sectionBases)
s->parent = &outCmd;
ret.insert(ret.end(), isd->sectionBases.begin(), isd->sectionBases.end());
Expand Down Expand Up @@ -644,6 +696,9 @@ void LinkerScript::processSectionCommands() {

// Process OVERWRITE_SECTIONS first so that it can overwrite the main script
// or orphans.
if (config->enableNonContiguousRegions && !overwriteSections.empty())
error("OVERWRITE_SECTIONS cannot be used with "
"--enable-non-contiguous-regions");
DenseMap<CachedHashStringRef, OutputDesc *> map;
size_t i = 0;
for (OutputDesc *osd : overwriteSections) {
Expand Down Expand Up @@ -1066,8 +1121,12 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
// Handle a single input section description command.
// It calculates and assigns the offsets for each section and also
// updates the output section size.
for (InputSection *isec : cast<InputSectionDescription>(cmd)->sections) {

auto &sections = cast<InputSectionDescription>(cmd)->sections;
for (InputSection *isec : sections) {
assert(isec->getParent() == sec);
if (isa<PotentialSpillSection>(isec))
continue;
const uint64_t pos = dot;
dot = alignToPowerOf2(dot, isec->addralign);
isec->outSecOff = dot - sec->addr;
Expand Down Expand Up @@ -1364,6 +1423,114 @@ const Defined *LinkerScript::assignAddresses() {
return getChangedSymbolAssignment(oldValues);
}

static bool hasRegionOverflowed(MemoryRegion *mr) {
if (!mr)
return false;
return mr->curPos - mr->getOrigin() > mr->getLength();
}

// Spill input sections in reverse order of address assignment to (potentially)
// bring memory regions out of overflow. The size savings of a spill can only be
// estimated, since general linker script arithmetic may occur afterwards.
// Under-estimates may cause unnecessary spills, but over-estimates can always
// be corrected on the next pass.
bool LinkerScript::spillSections() {
if (!config->enableNonContiguousRegions)
return false;

bool spilled = false;
for (SectionCommand *cmd : reverse(sectionCommands)) {
auto *od = dyn_cast<OutputDesc>(cmd);
if (!od)
continue;
OutputSection *osec = &od->osec;
if (!osec->memRegion)
continue;

// Input sections that have replaced a potential spill and should be removed
// from their input section description.
DenseSet<InputSection *> spilledInputSections;

for (SectionCommand *cmd : reverse(osec->commands)) {
if (!hasRegionOverflowed(osec->memRegion) &&
!hasRegionOverflowed(osec->lmaRegion))
break;

auto *isd = dyn_cast<InputSectionDescription>(cmd);
if (!isd)
continue;
for (InputSection *isec : reverse(isd->sections)) {
// Potential spill locations cannot be spilled.
if (isa<PotentialSpillSection>(isec))
continue;

// Find the next potential spill location and remove it from the list.
auto it = potentialSpillLists.find(isec);
if (it == potentialSpillLists.end())
continue;
PotentialSpillList &list = it->second;
PotentialSpillSection *spill = list.head;
if (spill->next)
list.head = spill->next;
else
potentialSpillLists.erase(isec);

// Replace the next spill location with the spilled section and adjust
// its properties to match the new location. Note that the alignment of
// the spill section may have diverged from the original due to e.g. a
// SUBALIGN. Correct assignment requires the spill's alignment to be
// used, not the original.
spilledInputSections.insert(isec);
*llvm::find(spill->isd->sections, spill) = isec;
isec->parent = spill->parent;
isec->addralign = spill->addralign;

// Record the (potential) reduction in the region's end position.
osec->memRegion->curPos -= isec->getSize();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory this could be a potentially large underestimate if isec has a high alignment and there is a lot of alignment padding.

Copy link
Contributor Author

@mysterymath mysterymath Apr 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true; this is definitely a best-effort satisficing type of algorithm.

The situation could be improved by running the spilling both forwards during assignment and backwards afterwards (as opposed to just backwards).

This could be improved further by binary searching for the earliest "spill point" in the address assignment that allows the first memory region to eventually fit, then actualizing all potential spills for that region after that point, then binary searching for the spill point for the next overflowing region, etc. Overall that would take O(m log n) time, where m = |regions| and n = |sections|.

Alternatively, we may be able to handle alignment more specifically and accurately determine its effects.

At the extreme end, we could get a kind of optimality by backtracking whenever a forward assignment overflows and performing the last spill opportunity, for O(n^2) time. That might not actually be too bad in practice.

Anyway, I do think that there's potential room to grow here, but my goal was to find the simplest algorithm that might satisfy most embedded projects. I had originally only had forward spilling (inline with assignment), but late unspillable sections cropped up too many times, and backwards spilling is resistant to them without being more complicated.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed it is best to keep it simple at first. I'm hoping that the majority of cases where a really highly aligned section is used will be specific to a single region.

if (osec->lmaRegion)
osec->lmaRegion->curPos -= isec->getSize();

// Spilling continues until the end position no longer overflows the
// region. Then, another round of address assignment will either confirm
// the spill's success or lead to yet more spilling.
if (!hasRegionOverflowed(osec->memRegion) &&
!hasRegionOverflowed(osec->lmaRegion))
break;
}

// Remove any spilled input sections to complete their move.
if (!spilledInputSections.empty()) {
spilled = true;
llvm::erase_if(isd->sections, [&](InputSection *isec) {
return spilledInputSections.contains(isec);
});
}
}
}

return spilled;
}

// Erase any potential spill sections that were not used.
void LinkerScript::erasePotentialSpillSections() {
if (potentialSpillLists.empty())
return;

// Collect the set of input section descriptions that contain potential
// spills.
DenseSet<InputSectionDescription *> isds;
for (const auto &[_, list] : potentialSpillLists)
for (PotentialSpillSection *s = list.head; s; s = s->next)
isds.insert(s->isd);

for (InputSectionDescription *isd : isds)
llvm::erase_if(isd->sections, [](InputSection *s) {
return isa<PotentialSpillSection>(s);
});

potentialSpillLists.clear();
}

// Creates program headers as instructed by PHDRS linker script command.
SmallVector<PhdrEntry *, 0> LinkerScript::createPhdrs() {
SmallVector<PhdrEntry *, 0> ret;
Expand Down
15 changes: 14 additions & 1 deletion lld/ELF/LinkerScript.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define LLD_ELF_LINKER_SCRIPT_H

#include "Config.h"
#include "InputSection.h"
#include "Writer.h"
#include "lld/Common/LLVM.h"
#include "lld/Common/Strings.h"
Expand Down Expand Up @@ -287,7 +288,8 @@ class LinkerScript final {

SmallVector<InputSectionBase *, 0>
computeInputSections(const InputSectionDescription *,
ArrayRef<InputSectionBase *>);
ArrayRef<InputSectionBase *>,
const OutputSection &outCmd);

SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);

Expand Down Expand Up @@ -333,6 +335,8 @@ class LinkerScript final {

bool shouldKeep(InputSectionBase *s);
const Defined *assignAddresses();
bool spillSections();
void erasePotentialSpillSections();
void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
void processSectionCommands();
void processSymbolAssignments();
Expand Down Expand Up @@ -400,6 +404,15 @@ class LinkerScript final {
//
// then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap;

// List of potential spill locations (PotentialSpillSection) for an input
// section.
struct PotentialSpillList {
// Never nullptr.
PotentialSpillSection *head;
PotentialSpillSection *tail;
};
llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists;
};

LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script;
Expand Down
Loading