From 72286701c48d722a7fbb43b4e1ca36b4f99ef2e1 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Wed, 6 Sep 2023 11:29:28 +0200 Subject: [PATCH 1/2] [MC][NFC] Allow MCInstrAnalysis to store state Currently, all the analysis functions provided by `MCInstrAnalysis` work on a single instruction. On some targets, this limits the kind of instructions that can be successfully analyzed as common constructs may need multiple instructions. For example, a typical call sequence on RISC-V uses a auipc+jalr pair. In order to analyse the jalr inside `evaluateBranch`, information about the corresponding auipc is needed. Similarly, AArch64 uses adrp+ldr pairs to access globals. This patch proposes to add state to `MCInstrAnalysis` to support these use cases. Two new virtual methods are added: - `updateState`: takes an instruction and its address. This methods should be called by clients on every instruction and allows targets to store whatever information they need to analyse future instructions. - `resetState`: clears the state whenever it becomes irrelevant. Clients could call this, for example, when starting to disassemble a new function. Note that the default implementations do nothing so this patch is NFC. No actual state is stored inside `MCInstrAnalysis`; deciding the structure of the state is left to the targets. This patch also modifies llvm-objdump to use the new interface. This patch is an alternative to D116677 and the idea of storing state in `MCInstrAnalysis` was first discussed there. --- llvm/include/llvm/MC/MCInstrAnalysis.h | 15 +++++++++++++++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 21 ++++++++++++++++----- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h index c3c675c39c559..dac12af599e6f 100644 --- a/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -37,6 +37,21 @@ class MCInstrAnalysis { MCInstrAnalysis(const MCInstrInfo *Info) : Info(Info) {} virtual ~MCInstrAnalysis() = default; + /// Clear the internal state. See updateState for more information. + virtual void resetState() {} + + /// Update internal state with \p Inst at \p Addr. + /// + /// For some types a analyses, inspecting a single instruction is not + /// sufficient. Some examples are auipc/jalr pairs on RISC-V or adrp/ldr pairs + /// on AArch64. To support inspecting multiple instructions, targets may keep + /// track of an internal state while analysing instructions. Clients should + /// call updateState for every instruction which allows later calls to one of + /// the analysis functions to take previous instructions into account. + /// Whenever state becomes irrelevant (e.g., when starting to disassemble a + /// new function), clients should call resetState to clear it. + virtual void updateState(const MCInst &Inst, uint64_t Addr) {} + virtual bool isBranch(const MCInst &Inst) const { return Info->get(Inst.getOpcode()).isBranch(); } diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 96d74d6e2d5e8..8f6479d3c6e31 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -842,7 +842,7 @@ class DisassemblerTarget { std::unique_ptr SubtargetInfo; std::shared_ptr Context; std::unique_ptr DisAsm; - std::shared_ptr InstrAnalysis; + std::shared_ptr InstrAnalysis; std::shared_ptr InstPrinter; PrettyPrinter *Printer; @@ -1265,14 +1265,19 @@ collectBBAddrMapLabels(const std::unordered_map &AddrToBBAd } } -static void collectLocalBranchTargets( - ArrayRef Bytes, const MCInstrAnalysis *MIA, MCDisassembler *DisAsm, - MCInstPrinter *IP, const MCSubtargetInfo *STI, uint64_t SectionAddr, - uint64_t Start, uint64_t End, std::unordered_map &Labels) { +static void +collectLocalBranchTargets(ArrayRef Bytes, MCInstrAnalysis *MIA, + MCDisassembler *DisAsm, MCInstPrinter *IP, + const MCSubtargetInfo *STI, uint64_t SectionAddr, + uint64_t Start, uint64_t End, + std::unordered_map &Labels) { // So far only supports PowerPC and X86. if (!STI->getTargetTriple().isPPC() && !STI->getTargetTriple().isX86()) return; + if (MIA) + MIA->resetState(); + Labels.clear(); unsigned LabelCount = 0; Start += SectionAddr; @@ -1298,6 +1303,7 @@ static void collectLocalBranchTargets( !Labels.count(Target) && !(STI->getTargetTriple().isPPC() && Target == Index)) Labels[Target] = ("L" + Twine(LabelCount++)).str(); + MIA->updateState(Inst, Index); } Index += Size; } @@ -1939,6 +1945,9 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, BBAddrMapLabels); } + if (DT->InstrAnalysis) + DT->InstrAnalysis->resetState(); + while (Index < End) { // ARM and AArch64 ELF binaries can interleave data and text in the // same section. We rely on the markers introduced to understand what @@ -2155,6 +2164,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, if (TargetOS == &CommentStream) *TargetOS << "\n"; } + + DT->InstrAnalysis->updateState(Inst, SectionAddr + Index); } } From 2eac9f59aca1c59d47a615084b2bfb839e9f1b0c Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Mon, 2 Oct 2023 10:18:30 +0200 Subject: [PATCH 2/2] fixup! [MC][NFC] Allow MCInstrAnalysis to store state Fix typo --- llvm/include/llvm/MC/MCInstrAnalysis.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h index dac12af599e6f..e3ddf0b8b8939 100644 --- a/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -42,7 +42,7 @@ class MCInstrAnalysis { /// Update internal state with \p Inst at \p Addr. /// - /// For some types a analyses, inspecting a single instruction is not + /// For some types of analyses, inspecting a single instruction is not /// sufficient. Some examples are auipc/jalr pairs on RISC-V or adrp/ldr pairs /// on AArch64. To support inspecting multiple instructions, targets may keep /// track of an internal state while analysing instructions. Clients should