From 8233eec397a7e84f8b668ff0a9373cf317f20a06 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 14 Sep 2023 16:58:52 +0200 Subject: [PATCH 1/2] [BOLT] Improve handling of relocations targeting specific instructions On RISC-V, there are certain relocations that target a specific instruction instead of a more abstract location like a function or basic block. Take the following example that loads a value from symbol `foo`: ``` nop 1: auipc t0, %pcrel_hi(foo) ld t0, %pcrel_lo(1b)(t0) ``` This results in two relocation: - auipc: `R_RISCV_PCREL_HI20` referencing `foo`; - ld: `R_RISCV_PCREL_LO12_I` referencing to local label `1` which points to the auipc instruction. It is of utmost importance that the `R_RISCV_PCREL_LO12_I` keeps referring to the auipc instruction; if not, the program will fail to assemble. However, BOLT currently does not guarantee this. BOLT currently assumes that all local symbols are jump targets and always starts a new basic block at symbol locations. The example above results in a CFG the looks like this: ``` .BB0: nop .BB1: auipc t0, %pcrel_hi(foo) ld t0, %pcrel_lo(.BB1)(t0) ``` While this currently works (i.e., the `R_RISCV_PCREL_LO12_I` relocation points to the correct instruction), it has two downsides: - Too many basic blocks are created (the example above is logically only one yet two are created); - If instructions are inserted in `.BB1` (e.g., by instrumentation), things will break since the label will not point to the auipc anymore. This patch proposes to fix this issue by teaching BOLT to track labels that should always point to a specific instruction. This is implemented as follows: - Add a new annotation type (`kLabel`) that allows us to annotate instructions with an `MCSymbol *`; - Whenever we encounter a relocation type that is used to refer to a specific instruction (`Relocation::isInstructionReference`), we register a new type of label (`InstructionLabels`) with the corresponding `BinaryFunction`; - During disassembly, add these instruction labels to the correct instructions; - During emission, emit these labels right before the instruction. I believe the use of annotations works quite well for this use case as it allows us to reliably track instruction labels. If we were to store them as offsets in basic blocks, it would be error prone to keep them updated whenever instructions are inserted or removed. I have chosen to add labels as first-class annotations (as opposed to a generic one) because the documentation of `MCAnnotation` suggests that generic annotations are to be used for optional metadata that can be discarded without affecting correctness. As this is not the case for labels, a first-class annotation seemed more appropriate. --- bolt/include/bolt/Core/BinaryFunction.h | 13 ++++++++ bolt/include/bolt/Core/MCPlus.h | 1 + bolt/include/bolt/Core/MCPlusBuilder.h | 7 +++++ bolt/include/bolt/Core/Relocation.h | 4 +++ bolt/lib/Core/BinaryContext.cpp | 2 ++ bolt/lib/Core/BinaryEmitter.cpp | 3 ++ bolt/lib/Core/BinaryFunction.cpp | 18 +++++++++++ bolt/lib/Core/MCPlusBuilder.cpp | 11 +++++++ bolt/lib/Core/Relocation.cpp | 13 ++++++++ bolt/lib/Passes/BinaryPasses.cpp | 5 +++ bolt/lib/Rewrite/RewriteInstance.cpp | 4 ++- bolt/test/RISCV/reloc-abs.s | 3 +- bolt/test/RISCV/reloc-bb-split.s | 42 +++++++++++++++++++++++++ bolt/test/RISCV/reloc-got.s | 3 +- bolt/test/RISCV/reloc-pcrel.s | 6 ++-- 15 files changed, 126 insertions(+), 9 deletions(-) create mode 100644 bolt/test/RISCV/reloc-bb-split.s diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index dea1d55b15026..a3af996cb726b 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -468,6 +468,13 @@ class BinaryFunction { using LabelsMapType = std::map; LabelsMapType Labels; + /// Map offset in the function to a label that should always point to the + /// corresponding instruction. This is used for labels that shouldn't point to + /// the start of a basic block but always to a specific instruction. This is + /// used, for example, on RISC-V where %pcrel_lo relocations point to the + /// corresponding %pcrel_hi. + LabelsMapType InstructionLabels; + /// Temporary holder of instructions before CFG is constructed. /// Map offset in the function to MCInst. using InstrMapType = std::map; @@ -591,6 +598,11 @@ class BinaryFunction { /// a global symbol that corresponds to an entry at this address. MCSymbol *getOrCreateLocalLabel(uint64_t Address, bool CreatePastEnd = false); + /// Return a label for the instruction at a given \p Address in the function. + /// This label will not be used to delineate basic blocks in the CFG but will + /// be attached to the corresponding instruction during disassembly. + MCSymbol *getOrCreateInstructionLabel(uint64_t Address); + /// Register an data entry at a given \p Offset into the function. void markDataAtOffset(uint64_t Offset) { if (!Islands) @@ -722,6 +734,7 @@ class BinaryFunction { clearList(LSDATypeAddressTable); clearList(LabelToBB); + clearList(InstructionLabels); if (!isMultiEntry()) clearList(Labels); diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h index b4a72ac274fad..31cc9071de76a 100644 --- a/bolt/include/bolt/Core/MCPlus.h +++ b/bolt/include/bolt/Core/MCPlus.h @@ -66,6 +66,7 @@ class MCAnnotation { kTailCall, /// Tail call. kConditionalTailCall, /// CTC. kOffset, /// Offset in the function. + kLabel, /// MCSymbol pointing to this instruction. kGeneric /// First generic annotation. }; diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index e7b6c8e3a7473..85c83bc3e0751 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1169,6 +1169,13 @@ class MCPlusBuilder { /// Remove offset annotation. bool clearOffset(MCInst &Inst); + /// Return the label of \p Inst, if available. + std::optional getLabel(const MCInst &Inst) const; + + /// Set the label of \p Inst. This label will be emitted right before \p Inst + /// is emitted to MCStreamer. + bool setLabel(MCInst &Inst, MCSymbol *Label); + /// Return MCSymbol that represents a target of this instruction at a given /// operand number \p OpNum. If there's no symbol associated with /// the operand - return nullptr. diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h index 5ae288a91986e..1ddba9d78b3b8 100644 --- a/bolt/include/bolt/Core/Relocation.h +++ b/bolt/include/bolt/Core/Relocation.h @@ -97,6 +97,10 @@ struct Relocation { /// Return true if relocation type is for thread local storage. static bool isTLS(uint64_t Type); + /// Return true of relocation type is for referencing a specific instruction + /// (as opposed to a function, basic block, etc). + static bool isInstructionReference(uint64_t Type); + /// Return code for a NONE relocation static uint64_t getNone(); diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index ffecc52098043..4c67f41300b60 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1863,6 +1863,8 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, } if (std::optional Offset = MIB->getOffset(Instruction)) OS << " # Offset: " << *Offset; + if (auto Label = MIB->getLabel(Instruction)) + OS << " # Label: " << **Label; MIB->printAnnotations(Instruction, OS); diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 95ab63521c06a..b1ee6cc221d71 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -498,6 +498,9 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, BB->getLocSyms().emplace_back(Offset, LocSym); } + if (auto Label = BC.MIB->getLabel(Instr)) + Streamer.emitLabel(*Label); + Streamer.emitInstruction(Instr, *BC.STI); LastIsPrefix = BC.MIB->isPrefix(Instr); } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 38d97d90acc13..afb6d5368be1b 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -965,6 +965,20 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address, return Label; } +MCSymbol *BinaryFunction::getOrCreateInstructionLabel(uint64_t Address) { + const uint64_t Offset = Address - getAddress(); + assert(Offset < getSize() && "Instruction label past function end"); + + auto LI = InstructionLabels.find(Offset); + if (LI != InstructionLabels.end()) + return LI->second; + + MCSymbol *Label = BC.Ctx->createNamedTempSymbol(); + InstructionLabels[Offset] = Label; + + return Label; +} + ErrorOr> BinaryFunction::getData() const { BinarySection &Section = *getOriginSection(); assert(Section.containsRange(getAddress(), getMaxSize()) && @@ -1363,6 +1377,10 @@ bool BinaryFunction::disassemble() { MIB->addAnnotation(Instruction, "Size", static_cast(Size)); } + auto InstructionLabel = InstructionLabels.find(Offset); + if (InstructionLabel != InstructionLabels.end()) + BC.MIB->setLabel(Instruction, InstructionLabel->second); + addInstruction(Offset, std::move(Instruction)); } diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index 027cef1063ee3..0a5eb44e4876f 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -268,6 +268,17 @@ bool MCPlusBuilder::clearOffset(MCInst &Inst) { return true; } +std::optional MCPlusBuilder::getLabel(const MCInst &Inst) const { + if (auto Label = tryGetAnnotationAs(Inst, MCAnnotation::kLabel)) + return *Label; + return std::nullopt; +} + +bool MCPlusBuilder::setLabel(MCInst &Inst, MCSymbol *Label) { + getOrCreateAnnotationAs(Inst, MCAnnotation::kLabel) = Label; + return true; +} + bool MCPlusBuilder::hasAnnotation(const MCInst &Inst, unsigned Index) const { const MCInst *AnnotationInst = getAnnotationInst(Inst); if (!AnnotationInst) diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp index 886fa314b2b73..7572c352a312d 100644 --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -797,6 +797,19 @@ bool Relocation::isTLS(uint64_t Type) { return isTLSX86(Type); } +bool Relocation::isInstructionReference(uint64_t Type) { + if (Arch != Triple::riscv64) + return false; + + switch (Type) { + default: + return false; + case ELF::R_RISCV_PCREL_LO12_I: + case ELF::R_RISCV_PCREL_LO12_S: + return true; + } +} + uint64_t Relocation::getNone() { if (Arch == Triple::aarch64) return ELF::R_AARCH64_NONE; diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp index bb760ea93ad16..3ba53d7b2b798 100644 --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -575,6 +575,7 @@ bool CheckLargeFunctions::shouldOptimize(const BinaryFunction &BF) const { void LowerAnnotations::runOnFunctions(BinaryContext &BC) { std::vector> PreservedOffsetAnnotations; + std::vector> PreservedLabelAnnotations; for (auto &It : BC.getBinaryFunctions()) { BinaryFunction &BF = It.second; @@ -609,6 +610,8 @@ void LowerAnnotations::runOnFunctions(BinaryContext &BC) { if (BF.requiresAddressTranslation() && BC.MIB->getOffset(*II)) PreservedOffsetAnnotations.emplace_back(&(*II), *BC.MIB->getOffset(*II)); + if (auto Label = BC.MIB->getLabel(*II)) + PreservedLabelAnnotations.emplace_back(&*II, *Label); BC.MIB->stripAnnotations(*II); } } @@ -625,6 +628,8 @@ void LowerAnnotations::runOnFunctions(BinaryContext &BC) { // Reinsert preserved annotations we need during code emission. for (const std::pair &Item : PreservedOffsetAnnotations) BC.MIB->setOffset(*Item.first, Item.second); + for (auto [Instr, Label] : PreservedLabelAnnotations) + BC.MIB->setLabel(*Instr, Label); } // Check for dirty state in MCSymbol objects that might be a consequence diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index b9bc35c88e987..84111deadabf4 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2545,7 +2545,9 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, // Adjust the point of reference to a code location inside a function. if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */ true)) { RefFunctionOffset = Address - ReferencedBF->getAddress(); - if (RefFunctionOffset) { + if (Relocation::isInstructionReference(RType)) { + ReferencedSymbol = ReferencedBF->getOrCreateInstructionLabel(Address); + } else if (RefFunctionOffset) { if (ContainingBF && ContainingBF != ReferencedBF) { ReferencedSymbol = ReferencedBF->addEntryPointAtOffset(RefFunctionOffset); diff --git a/bolt/test/RISCV/reloc-abs.s b/bolt/test/RISCV/reloc-abs.s index 3e4b8b1395e1f..5b728f092b3c9 100644 --- a/bolt/test/RISCV/reloc-abs.s +++ b/bolt/test/RISCV/reloc-abs.s @@ -17,8 +17,7 @@ _start: .option push .option norelax 1: -// CHECK: .Ltmp0 -// CHECK: auipc gp, %pcrel_hi(__global_pointer$) +// CHECK: auipc gp, %pcrel_hi(__global_pointer$) # Label: .Ltmp0 // CHECK-NEXT: addi gp, gp, %pcrel_lo(.Ltmp0) auipc gp, %pcrel_hi(__global_pointer$) addi gp, gp, %pcrel_lo(1b) diff --git a/bolt/test/RISCV/reloc-bb-split.s b/bolt/test/RISCV/reloc-bb-split.s new file mode 100644 index 0000000000000..dc616392d9032 --- /dev/null +++ b/bolt/test/RISCV/reloc-bb-split.s @@ -0,0 +1,42 @@ +// RUN: %clang %cflags -o %t %s +// RUN: llvm-bolt --print-cfg --print-only=_start -o /dev/null %t \ +// RUN: | FileCheck %s + + .data + .globl d + .p2align 3 +d: + .dword 0 + + .text + .globl _start + .p2align 1 +// CHECK-LABEL: Binary Function "_start" after building cfg { +_start: +/// The local label is used for %pcrel_lo as well as a jump target so a new +/// basic block should start there. +// CHECK-LABEL: {{^}}.LBB00 +// CHECK: nop +// CHECK-LABEL: {{^}}.Ltmp1 +// CHECK: auipc t0, %pcrel_hi(d) # Label: .Ltmp0 +// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0) +// CHECK-NEXT: j .Ltmp1 + nop +1: + auipc t0, %pcrel_hi(d) + ld t0, %pcrel_lo(1b)(t0) + j 1b + +/// The local label is used only for %pcrel_lo so no new basic block should +/// start there. +// CHECK-LABEL: {{^}}.LFT0 +// CHECK: nop +// CHECK-NEXT: auipc t0, %pcrel_hi(d) # Label: .Ltmp2 +// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp2)(t0) +// CHECK-NEXT: ret + nop +1: + auipc t0, %pcrel_hi(d) + ld t0, %pcrel_lo(1b)(t0) + ret + .size _start, .-_start diff --git a/bolt/test/RISCV/reloc-got.s b/bolt/test/RISCV/reloc-got.s index b6cd61be723bf..dcf9d0ea3ffbf 100644 --- a/bolt/test/RISCV/reloc-got.s +++ b/bolt/test/RISCV/reloc-got.s @@ -14,8 +14,7 @@ d: // CHECK: Binary Function "_start" after building cfg { _start: nop // Here to not make the _start and .Ltmp0 symbols coincide -// CHECK: .Ltmp0 -// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) +// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0 // CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0) 1: auipc t0, %got_pcrel_hi(d) diff --git a/bolt/test/RISCV/reloc-pcrel.s b/bolt/test/RISCV/reloc-pcrel.s index 36b132727291e..3ad3015a0a57f 100644 --- a/bolt/test/RISCV/reloc-pcrel.s +++ b/bolt/test/RISCV/reloc-pcrel.s @@ -14,12 +14,10 @@ d: // CHECK: Binary Function "_start" after building cfg { _start: nop // Here to not make the _start and .Ltmp0 symbols coincide -// CHECK: .Ltmp0 -// CHECK: auipc t0, %pcrel_hi(d) +// CHECK: auipc t0, %pcrel_hi(d) # Label: .Ltmp0 // CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0) ld t0, d -// CHECK: .Ltmp1 -// CHECK: auipc t1, %pcrel_hi(d) +// CHECK-NEXT: auipc t1, %pcrel_hi(d) # Label: .Ltmp1 // CHECK-NEXT: sd t0, %pcrel_lo(.Ltmp1)(t1) sd t0, d, t1 ret From 6e96ad045cd0540134d6482e70d8a1d5fad9987a Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 5 Oct 2023 13:50:24 +0200 Subject: [PATCH 2/2] Remove BinaryFunction::InstructionLabels --- bolt/include/bolt/Core/BinaryFunction.h | 13 ------- bolt/lib/Core/BinaryFunction.cpp | 50 +++++++++++++++---------- bolt/lib/Rewrite/RewriteInstance.cpp | 10 ++++- bolt/test/RISCV/reloc-bb-split.s | 8 ++-- 4 files changed, 43 insertions(+), 38 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index a3af996cb726b..dea1d55b15026 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -468,13 +468,6 @@ class BinaryFunction { using LabelsMapType = std::map; LabelsMapType Labels; - /// Map offset in the function to a label that should always point to the - /// corresponding instruction. This is used for labels that shouldn't point to - /// the start of a basic block but always to a specific instruction. This is - /// used, for example, on RISC-V where %pcrel_lo relocations point to the - /// corresponding %pcrel_hi. - LabelsMapType InstructionLabels; - /// Temporary holder of instructions before CFG is constructed. /// Map offset in the function to MCInst. using InstrMapType = std::map; @@ -598,11 +591,6 @@ class BinaryFunction { /// a global symbol that corresponds to an entry at this address. MCSymbol *getOrCreateLocalLabel(uint64_t Address, bool CreatePastEnd = false); - /// Return a label for the instruction at a given \p Address in the function. - /// This label will not be used to delineate basic blocks in the CFG but will - /// be attached to the corresponding instruction during disassembly. - MCSymbol *getOrCreateInstructionLabel(uint64_t Address); - /// Register an data entry at a given \p Offset into the function. void markDataAtOffset(uint64_t Offset) { if (!Islands) @@ -734,7 +722,6 @@ class BinaryFunction { clearList(LSDATypeAddressTable); clearList(LabelToBB); - clearList(InstructionLabels); if (!isMultiEntry()) clearList(Labels); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index afb6d5368be1b..24ea1fe2ed5a0 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -965,20 +965,6 @@ MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address, return Label; } -MCSymbol *BinaryFunction::getOrCreateInstructionLabel(uint64_t Address) { - const uint64_t Offset = Address - getAddress(); - assert(Offset < getSize() && "Instruction label past function end"); - - auto LI = InstructionLabels.find(Offset); - if (LI != InstructionLabels.end()) - return LI->second; - - MCSymbol *Label = BC.Ctx->createNamedTempSymbol(); - InstructionLabels[Offset] = Label; - - return Label; -} - ErrorOr> BinaryFunction::getData() const { BinarySection &Section = *getOriginSection(); assert(Section.containsRange(getAddress(), getMaxSize()) && @@ -1187,6 +1173,13 @@ bool BinaryFunction::disassemble() { // basic block. Labels[0] = Ctx->createNamedTempSymbol("BB0"); + // Map offsets in the function to a label that should always point to the + // corresponding instruction. This is used for labels that shouldn't point to + // the start of a basic block but always to a specific instruction. This is + // used, for example, on RISC-V where %pcrel_lo relocations point to the + // corresponding %pcrel_hi. + LabelsMapType InstructionLabels; + uint64_t Size = 0; // instruction size for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) { MCInst Instruction; @@ -1343,9 +1336,23 @@ bool BinaryFunction::disassemble() { ItrE = Relocations.lower_bound(Offset + Size); Itr != ItrE; ++Itr) { const Relocation &Relocation = Itr->second; + MCSymbol *Symbol = Relocation.Symbol; + + if (Relocation::isInstructionReference(Relocation.Type)) { + uint64_t RefOffset = Relocation.Value - getAddress(); + LabelsMapType::iterator LI = InstructionLabels.find(RefOffset); + + if (LI == InstructionLabels.end()) { + Symbol = BC.Ctx->createNamedTempSymbol(); + InstructionLabels.emplace(RefOffset, Symbol); + } else { + Symbol = LI->second; + } + } + int64_t Value = Relocation.Value; const bool Result = BC.MIB->replaceImmWithSymbolRef( - Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(), Value, + Instruction, Symbol, Relocation.Addend, Ctx.get(), Value, Relocation.Type); (void)Result; assert(Result && "cannot replace immediate with relocation"); @@ -1377,13 +1384,16 @@ bool BinaryFunction::disassemble() { MIB->addAnnotation(Instruction, "Size", static_cast(Size)); } - auto InstructionLabel = InstructionLabels.find(Offset); - if (InstructionLabel != InstructionLabels.end()) - BC.MIB->setLabel(Instruction, InstructionLabel->second); - addInstruction(Offset, std::move(Instruction)); } + for (auto [Offset, Label] : InstructionLabels) { + InstrMapType::iterator II = Instructions.find(Offset); + assert(II != Instructions.end() && "reference to non-existing instruction"); + + BC.MIB->setLabel(II->second, Label); + } + // Reset symbolizer for the disassembler. BC.SymbolicDisAsm->setSymbolizer(nullptr); @@ -4502,7 +4512,7 @@ void BinaryFunction::addRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Offset = Address - getAddress(); LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addRelocation in " << formatv("{0}@{1:x} against {2}\n", *this, Offset, - Symbol->getName())); + (Symbol ? Symbol->getName() : ""))); bool IsCI = BC.isAArch64() && isInConstantIsland(Address); std::map &Rels = IsCI ? Islands->Relocations : Relocations; diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 84111deadabf4..34f838cedd292 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2546,7 +2546,15 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection, if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */ true)) { RefFunctionOffset = Address - ReferencedBF->getAddress(); if (Relocation::isInstructionReference(RType)) { - ReferencedSymbol = ReferencedBF->getOrCreateInstructionLabel(Address); + // Instruction labels are created while disassembling so we just leave + // the symbol empty for now. Since the extracted value is typically + // unrelated to the referenced symbol (e.g., %pcrel_lo in RISC-V + // references an instruction but the patched value references the low + // bits of a data address), we set the extracted value to the symbol + // address in order to be able to correctly reconstruct the reference + // later. + ReferencedSymbol = nullptr; + ExtractedValue = Address; } else if (RefFunctionOffset) { if (ContainingBF && ContainingBF != ReferencedBF) { ReferencedSymbol = diff --git a/bolt/test/RISCV/reloc-bb-split.s b/bolt/test/RISCV/reloc-bb-split.s index dc616392d9032..5995562cf130b 100644 --- a/bolt/test/RISCV/reloc-bb-split.s +++ b/bolt/test/RISCV/reloc-bb-split.s @@ -17,10 +17,10 @@ _start: /// basic block should start there. // CHECK-LABEL: {{^}}.LBB00 // CHECK: nop -// CHECK-LABEL: {{^}}.Ltmp1 -// CHECK: auipc t0, %pcrel_hi(d) # Label: .Ltmp0 -// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0) -// CHECK-NEXT: j .Ltmp1 +// CHECK-LABEL: {{^}}.Ltmp0 +// CHECK: auipc t0, %pcrel_hi(d) # Label: .Ltmp1 +// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp1)(t0) +// CHECK-NEXT: j .Ltmp0 nop 1: auipc t0, %pcrel_hi(d)