diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index f2a707a8ba8d7..29654eb5774ac 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -221,6 +221,7 @@ class CodeGenOptions : public CodeGenOptionsBase { /// The file to use for dumping bug report by `Debugify` for original /// debug info. std::string DIBugsReportFilePath; + std::string DIBugsReportArgString; /// The floating-point denormal mode to use. llvm::DenormalMode FPDenormalMode = llvm::DenormalMode::getIEEE(); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e765bbf637a66..88f86bfd62c06 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -904,13 +904,43 @@ void EmitAssemblyHelper::RunOptimizationPipeline( DebugifyEachInstrumentation Debugify; DebugInfoPerPass DebugInfoBeforePass; if (CodeGenOpts.EnableDIPreservationVerify) { + Debugify.setDebugifyMode(DebugifyMode::OriginalDebugInfo); Debugify.setDebugInfoBeforePass(DebugInfoBeforePass); - if (!CodeGenOpts.DIBugsReportFilePath.empty()) + if (!CodeGenOpts.DIBugsReportFilePath.empty()) { Debugify.setOrigDIVerifyBugsReportFilePath( CodeGenOpts.DIBugsReportFilePath); + std::error_code EC; + raw_fd_ostream OS_FILE{CodeGenOpts.DIBugsReportFilePath, EC, + sys::fs::OF_Append | sys::fs::OF_TextWithCRLF}; + if (EC) { + errs() << "Could not open file: " << EC.message() << ", " + << CodeGenOpts.DIBugsReportFilePath << '\n'; + } else { + if (auto L = OS_FILE.lock()) { + OS_FILE << CodeGenOpts.DIBugsReportArgString; + } + OS_FILE.close(); + } + } Debugify.registerCallbacks(PIC, MAM); + +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + // If we're using debug location coverage tracking, mark all the + // instructions coming out of the frontend without a DebugLoc as being + // intentional line-zero locations, to prevent both those instructions and + // new instructions that inherit their location from being treated as + // incorrectly empty locations. + for (Function &F : *TheModule) { + if (!F.getSubprogram()) + continue; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (!I.getDebugLoc()) + I.setDebugLoc(DebugLoc::getLineZero()); + } +#endif } // Attempt to load pass plugins and register their callbacks with PB. for (auto &PluginFN : CodeGenOpts.PassPlugins) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index f6b6c44a4cab6..16c9a71a0020f 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1876,6 +1876,16 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, << Opts.DIBugsReportFilePath; Opts.DIBugsReportFilePath = ""; } + if (Opts.EnableDIPreservationVerify && Opts.DIBugsReportFilePath.size()) { + std::string ArgString; + llvm::raw_string_ostream OS(ArgString); + OS << "{\"file\":\"" << OutputFile << "\", \"args\":\""; + for (Arg *A : Args) { + OS << A->getAsString(Args) << " "; + } + OS << "\"}\n"; + Opts.DIBugsReportArgString = ArgString; + } Opts.NewStructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa) && Args.hasArg(OPT_new_struct_path_tbaa); diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 12618966c4adf..2a404ba598feb 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -524,6 +524,11 @@ endif() option(LLVM_ENABLE_CRASH_DUMPS "Turn on memory dumps on crashes. Currently only implemented on Windows." OFF) +option(LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING "Enhance debugify's line number tracking at the cost of performance; abi-breaking." OFF) +if(LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING) + set(ENABLE_DEBUGLOC_COVERAGE_TRACKING 1) +endif() + set(WINDOWS_PREFER_FORWARD_SLASH_DEFAULT OFF) if (MINGW) # Cygwin doesn't identify itself as Windows, and thus gets path::Style::posix diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index bc83f19dc581f..f7bbcb8d52696 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -100,8 +100,8 @@ class LegalizationArtifactCombiner { const LLT DstTy = MRI.getType(DstReg); if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { auto &CstVal = SrcMI->getOperand(1); - auto *MergedLocation = DILocation::getMergedLocation( - MI.getDebugLoc().get(), SrcMI->getDebugLoc().get()); + auto MergedLocation = + DebugLoc::getMergedLocation(MI.getDebugLoc(), SrcMI->getDebugLoc()); // Set the debug location to the merged location of the SrcMI and the MI // if the aext fold is successful. Builder.setDebugLoc(MergedLocation); diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index ff30741c8f360..388ce1e8f74e3 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -19,6 +19,10 @@ /* Define to 1 to enable crash memory dumps, and to 0 otherwise. */ #cmakedefine01 LLVM_ENABLE_CRASH_DUMPS +/* Define to 1 to enable expensive checks for debug location coverage checking, + and to 0 otherwise. */ +#cmakedefine01 ENABLE_DEBUGLOC_COVERAGE_TRACKING + /* Define to 1 to prefer forward slashes on Windows, and to 0 prefer backslashes. */ #cmakedefine01 LLVM_WINDOWS_PREFER_FORWARD_SLASH diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h index c22d3e9b10d27..b4ff0d9929183 100644 --- a/llvm/include/llvm/IR/DebugLoc.h +++ b/llvm/include/llvm/IR/DebugLoc.h @@ -14,6 +14,7 @@ #ifndef LLVM_IR_DEBUGLOC_H #define LLVM_IR_DEBUGLOC_H +#include "llvm/Config/config.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/Support/DataTypes.h" @@ -22,6 +23,74 @@ namespace llvm { class LLVMContext; class raw_ostream; class DILocation; + class Function; + +#ifdef ENABLE_DEBUGLOC_COVERAGE_TRACKING + struct DbgLocOriginBacktrace { + static constexpr unsigned long MaxDepth = 16; + SmallVector>, 0> Stacktraces; + DbgLocOriginBacktrace(bool ShouldCollectTrace); + void addTrace(); + }; + + // Used to represent different "kinds" of DebugLoc, expressing that a DebugLoc + // is either ordinary, containing a valid DILocation, or otherwise describing + // the reason why the DebugLoc does not contain a valid DILocation. + enum class DebugLocKind : uint8_t { + // DebugLoc should contain a valid DILocation. + Normal, + // DebugLoc intentionally does not have a valid DILocation; may be for a + // compiler-generated instruction, or an explicitly dropped location. + LineZero, + // DebugLoc does not have a known or currently knowable source location. + Unknown, + // Used for instructions that we don't expect to be emitted, and so can omit + // a valid DILocation. It is an error to try and emit a Temporary DebugLoc + // into the line table. + Temporary + }; + + // Extends TrackingMDNodeRef to also store a DebugLocKind and Backtrace, + // allowing Debugify to ignore intentionally-empty DebugLocs and display the + // code responsible for generating unintentionally-empty DebugLocs. + class DILocAndCoverageTracking : public TrackingMDNodeRef { + public: + DebugLocKind Kind; + // Currently we only need to track the Origin of this DILoc when using a + // normal empty DebugLoc, so only collect the stack trace in those cases. + DbgLocOriginBacktrace Origin; + DILocAndCoverageTracking(bool NeedsStacktrace = true) + : TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal), + Origin(NeedsStacktrace) {} + // Valid or nullptr MDNode*, normal DebugLocKind + DILocAndCoverageTracking(const MDNode *Loc) + : TrackingMDNodeRef(const_cast(Loc)), + Kind(DebugLocKind::Normal), Origin(!Loc) {} + DILocAndCoverageTracking(const DILocation *Loc); + // Always nullptr MDNode*, any DebugLocKind + DILocAndCoverageTracking(DebugLocKind Kind) + : TrackingMDNodeRef(nullptr), Kind(Kind), + Origin(Kind == DebugLocKind::Normal) {} + }; + template <> struct simplify_type { + using SimpleType = MDNode *; + + static MDNode *getSimplifiedValue(DILocAndCoverageTracking &MD) { + return MD.get(); + } + }; + template <> struct simplify_type { + using SimpleType = MDNode *; + + static MDNode *getSimplifiedValue(const DILocAndCoverageTracking &MD) { + return MD.get(); + } + }; + + using DebugLocTrackingRef = DILocAndCoverageTracking; +#else + using DebugLocTrackingRef = TrackingMDNodeRef; +#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING /// A debug info location. /// @@ -31,7 +100,8 @@ namespace llvm { /// To avoid extra includes, \a DebugLoc doubles the \a DILocation API with a /// one based on relatively opaque \a MDNode pointers. class DebugLoc { - TrackingMDNodeRef Loc; + + DebugLocTrackingRef Loc; public: DebugLoc() = default; @@ -47,6 +117,26 @@ namespace llvm { /// IR. explicit DebugLoc(const MDNode *N); +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + DebugLoc(DebugLocKind Kind) : Loc(Kind) {} + DebugLocKind getKind() const { return Loc.Kind; } + DbgLocOriginBacktrace getOrigin() const { return Loc.Origin; } + DebugLoc getCopied() const { + DebugLoc NewDL = *this; + NewDL.Loc.Origin.addTrace(); + return NewDL; + } +#else + DebugLoc getCopied() const { return *this; } +#endif + + static DebugLoc getTemporary(); + static DebugLoc getUnknown(); + static DebugLoc getLineZero(); + + static DebugLoc getMergedLocations(ArrayRef Locs); + static DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB); + /// Get the underlying \a DILocation. /// /// \pre !*this or \c isa(getAsMDNode()). diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 31a1fef321995..337fca49f8d9d 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -90,12 +90,21 @@ class IRBuilderCallbackInserter : public IRBuilderDefaultInserter { /// Common base class shared among various IRBuilders. class IRBuilderBase { /// Pairs of (metadata kind, MDNode *) that should be added to all newly - /// created instructions, like !dbg metadata. + /// created instructions, excluding !dbg metadata, which is stored in the + // StoredDL field. SmallVector, 2> MetadataToCopy; + // The DebugLoc that will be applied to instructions inserted by this builder. + DebugLoc StoredDL; + // Tracks whether we have explicitly set a DebugLoc - valid or empty - in this + // builder, to determine whether to copy StoredDL to inserted instructions. + bool HasExplicitDL = false; /// Add or update the an entry (Kind, MD) to MetadataToCopy, if \p MD is not /// null. If \p MD is null, remove the entry with \p Kind. void AddOrRemoveMetadataToCopy(unsigned Kind, MDNode *MD) { + assert(Kind != LLVMContext::MD_dbg && + "MD_dbg metadata must be stored in StoredDL"); + if (!MD) { erase_if(MetadataToCopy, [Kind](const std::pair &KV) { return KV.first == Kind; @@ -215,7 +224,10 @@ class IRBuilderBase { /// Set location information used by debugging information. void SetCurrentDebugLocation(DebugLoc L) { - AddOrRemoveMetadataToCopy(LLVMContext::MD_dbg, L.getAsMDNode()); + // For !dbg metadata attachments, we use DebugLoc instead of the raw MDNode + // to include optional introspection data for use in Debugify. + StoredDL = std::move(L); + HasExplicitDL = true; } /// Set nosanitize metadata. @@ -229,8 +241,12 @@ class IRBuilderBase { /// not on \p Src will be dropped from MetadataToCopy. void CollectMetadataToCopy(Instruction *Src, ArrayRef MetadataKinds) { - for (unsigned K : MetadataKinds) - AddOrRemoveMetadataToCopy(K, Src->getMetadata(K)); + for (unsigned K : MetadataKinds) { + if (K == LLVMContext::MD_dbg) + SetCurrentDebugLocation(Src->getDebugLoc()); + else + AddOrRemoveMetadataToCopy(K, Src->getMetadata(K)); + } } /// Get location information used by debugging information. @@ -244,6 +260,11 @@ class IRBuilderBase { void AddMetadataToInst(Instruction *I) const { for (const auto &KV : MetadataToCopy) I->setMetadata(KV.first, KV.second); + // If I does not have an existing DebugLoc and no DebugLoc has been set + // here, we copy our DebugLoc to I anyway, because more likely than not I + // is a new instruction whose DL should originate from this builder. + if (HasExplicitDL || !I->getDebugLoc()) + I->setDebugLoc(StoredDL.getCopied()); } /// Get the return type of the current function that we're emitting diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index c27572300d506..16024dbcfdaad 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -647,6 +647,7 @@ class Instruction : public User, /// The DebugLoc attached to this instruction will be overwritten by the /// merged DebugLoc. void applyMergedLocation(DILocation *LocA, DILocation *LocB); + void applyMergedLocation(DebugLoc LocA, DebugLoc LocB); /// Updates the debug location given that the instruction has been hoisted /// from a block to a predecessor of that block. diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h index 70749ce30184a..113f9cdf1a723 100644 --- a/llvm/include/llvm/Support/Signals.h +++ b/llvm/include/llvm/Support/Signals.h @@ -14,6 +14,8 @@ #ifndef LLVM_SUPPORT_SIGNALS_H #define LLVM_SUPPORT_SIGNALS_H +#include "llvm/Config/config.h" +#include #include #include @@ -21,6 +23,20 @@ namespace llvm { class StringRef; class raw_ostream; +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +template struct DenseMapInfo; +template class DenseSet; +namespace detail { +template struct DenseMapPair; +} +template +class DenseMap; +using AddressSet = DenseSet>; +using SymbolizedAddressMap = + DenseMap, + detail::DenseMapPair>; +#endif + namespace sys { /// This function runs all the registered interrupt handlers, including the @@ -55,6 +71,28 @@ namespace sys { /// specified, the entire frame is printed. void PrintStackTrace(raw_ostream &OS, int Depth = 0); +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +#ifdef NDEBUG +#error DebugLoc Coverage Tracking should not be enabled in Release builds. +#endif + /// Populates the given array with a stacktrace of the current program, up to + /// MaxDepth frames. Returns the number of frames returned, which will be + /// inserted into \p StackTrace from index 0. All entries after the returned + /// depth will be unmodified. NB: This is only intended to be used for + /// introspection of LLVM by Debugify, will not be enabled in release builds, + /// and should not be relied on for other purposes. + template + int getStackTrace(std::array &StackTrace); + + /// Takes a set of \p Addresses, symbolizes them and stores the result in the + /// provided \p SymbolizedAddresses map. + /// NB: This is only intended to be used for introspection of LLVM by + /// Debugify, will not be enabled in release builds, and should not be relied + /// on for other purposes. + void symbolizeAddresses(AddressSet &Addresses, + SymbolizedAddressMap &SymbolizedAddresses); +#endif + // Run all registered signal handlers. void RunSignalHandlers(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f88653146cc6f..377f2300d72b2 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/config.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/IR/Constants.h" @@ -2080,6 +2081,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } if (!DL) { +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + assert(DL.getKind() != DebugLocKind::Temporary && + "Temporary DebugLocs should never be considered for emission!"); +#endif // We have an unspecified location, which might want to be line 0. // If we have already emitted a line-0 record, don't repeat it. if (LastAsmLine == 0) diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 92a03eb52e35d..d55be6c112ec7 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -841,7 +841,7 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { "Reached BB end within common tail"); } assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!"); - DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc()); + DL = DebugLoc::getMergedLocation(DL, Pos->getDebugLoc()); NextCommonInsts[i] = ++Pos; } MI.setDebugLoc(DL); @@ -915,7 +915,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Walk through equivalence sets looking for actual exact matches. while (MergePotentials.size() > 1) { unsigned CurHash = MergePotentials.back().getHash(); - const DebugLoc &BranchDL = MergePotentials.back().getBranchDebugLoc(); + const DebugLoc BranchDL = MergePotentials.back().getBranchDebugLoc(); // Build SameTails, identifying the set of blocks with this hash code // and with the maximum number of instructions in common. diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h index ff2bbe06c0488..5d05be78f60e5 100644 --- a/llvm/lib/CodeGen/BranchFolding.h +++ b/llvm/lib/CodeGen/BranchFolding.h @@ -50,11 +50,11 @@ class TargetRegisterInfo; class MergePotentialsElt { unsigned Hash; MachineBasicBlock *Block; - DebugLoc BranchDebugLoc; + MDNode *BranchDebugLoc; public: - MergePotentialsElt(unsigned h, MachineBasicBlock *b, DebugLoc bdl) - : Hash(h), Block(b), BranchDebugLoc(std::move(bdl)) {} + MergePotentialsElt(unsigned h, MachineBasicBlock *b, MDNode *bdl) + : Hash(h), Block(b), BranchDebugLoc(bdl) {} unsigned getHash() const { return Hash; } MachineBasicBlock *getBlock() const { return Block; } @@ -63,7 +63,7 @@ class TargetRegisterInfo; Block = MBB; } - const DebugLoc &getBranchDebugLoc() { return BranchDebugLoc; } + const DebugLoc getBranchDebugLoc() { return DebugLoc(BranchDebugLoc); } bool operator<(const MergePotentialsElt &) const; }; diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 547529bbe699a..b8f0bf76ed011 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -53,8 +53,7 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID, } else if (!dominates(MI, CurrPos)) { // Update the spliced machineinstr's debug location by merging it with the // debug location of the instruction at the insertion point. - auto *Loc = DILocation::getMergedLocation(getDebugLoc().get(), - MI->getDebugLoc().get()); + auto Loc = DebugLoc::getMergedLocation(getDebugLoc(), MI->getDebugLoc()); MI->setDebugLoc(Loc); CurMBB->splice(CurrPos, CurMBB, MI); } @@ -164,7 +163,7 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef DstOps, if (Observer) Observer->changingInstr(*MIB); MIB->setDebugLoc( - DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc())); + DebugLoc::getMergedLocation(MIB->getDebugLoc(), getDebugLoc())); if (Observer) Observer->changedInstr(*MIB); } diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index 0d0c093648eba..c6712fafebcb3 100644 --- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -373,7 +373,7 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl &Stores) { // For each store, compute pairwise merged debug locs. DebugLoc MergedLoc = Stores.front()->getDebugLoc(); for (auto *Store : drop_begin(Stores)) - MergedLoc = DILocation::getMergedLocation(MergedLoc, Store->getDebugLoc()); + MergedLoc = DebugLoc::getMergedLocation(MergedLoc, Store->getDebugLoc()); Builder.setInstr(*Stores.back()); Builder.setDebugLoc(MergedLoc); diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index d681d00b5d8c4..4dee67a7adeab 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1564,7 +1564,7 @@ MachineBasicBlock::findBranchDebugLoc() { DL = TI->getDebugLoc(); for (++TI ; TI != end() ; ++TI) if (TI->isBranch()) - DL = DILocation::getMergedLocation(DL, TI->getDebugLoc()); + DL = DebugLoc::getMergedLocation(DL, TI->getDebugLoc()); } return DL; } diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 4b3ff57fb478a..61d5aa5b99079 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1441,8 +1441,8 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, // location to prevent debug-info driven tools from potentially reporting // wrong location information. if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end()) - MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(), - InsertPos->getDebugLoc())); + MI.setDebugLoc(DebugLoc::getMergedLocation(MI.getDebugLoc(), + InsertPos->getDebugLoc())); else MI.setDebugLoc(DebugLoc()); diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 7fa1f9696d43b..a11d23fd7bb65 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -932,6 +932,9 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) { void Instruction::applyMergedLocation(DILocation *LocA, DILocation *LocB) { setDebugLoc(DILocation::getMergedLocation(LocA, LocB)); } +void Instruction::applyMergedLocation(DebugLoc LocA, DebugLoc LocB) { + setDebugLoc(DebugLoc::getMergedLocation(LocA, LocB)); +} void Instruction::mergeDIAssignID( ArrayRef SourceInstructions) { @@ -979,7 +982,7 @@ void Instruction::dropLocation() { } if (!MayLowerToCall) { - setDebugLoc(DebugLoc()); + setDebugLoc(DebugLoc::getLineZero()); return; } @@ -998,7 +1001,7 @@ void Instruction::dropLocation() { // // One alternative is to set a line 0 location with the existing scope and // inlinedAt info. The location might be sensitive to when inlining occurs. - setDebugLoc(DebugLoc()); + setDebugLoc(DebugLoc::getLineZero()); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp index bdea52180f74a..1f5c73015b92a 100644 --- a/llvm/lib/IR/DebugLoc.cpp +++ b/llvm/lib/IR/DebugLoc.cpp @@ -9,8 +9,43 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Function.h" + +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +#include "llvm/Support/Signals.h" + +using namespace llvm; + +DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L) + : TrackingMDNodeRef(const_cast(L)), + Kind(DebugLocKind::Normal), Origin(!L) {} + +DbgLocOriginBacktrace::DbgLocOriginBacktrace(bool ShouldCollectTrace) { + if (ShouldCollectTrace) { + auto &[Depth, Stacktrace] = Stacktraces.emplace_back(); + Depth = sys::getStackTrace(Stacktrace); + } +} +void DbgLocOriginBacktrace::addTrace() { + if (Stacktraces.empty()) + return; + auto &[Depth, Stacktrace] = Stacktraces.emplace_back(); + Depth = sys::getStackTrace(Stacktrace); +} + +DebugLoc DebugLoc::getTemporary() { return DebugLoc(DebugLocKind::Temporary); } +DebugLoc DebugLoc::getUnknown() { return DebugLoc(DebugLocKind::Unknown); } +DebugLoc DebugLoc::getLineZero() { return DebugLoc(DebugLocKind::LineZero); } + +#else + using namespace llvm; +DebugLoc DebugLoc::getTemporary() { return DebugLoc(); } +DebugLoc DebugLoc::getUnknown() { return DebugLoc(); } +DebugLoc DebugLoc::getLineZero() { return DebugLoc(); } +#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING + //===----------------------------------------------------------------------===// // DebugLoc Implementation //===----------------------------------------------------------------------===// @@ -136,6 +171,27 @@ DebugLoc DebugLoc::appendInlinedAt(const DebugLoc &DL, DILocation *InlinedAt, return Last; } +DebugLoc DebugLoc::getMergedLocations(ArrayRef Locs) { + if (Locs.empty()) + return DebugLoc(); + if (Locs.size() == 1) + return Locs[0]; + DebugLoc Merged = Locs[0]; + for (const DebugLoc &DL : llvm::drop_begin(Locs)) { + Merged = getMergedLocation(Merged, DL); + if (!Merged) + break; + } + return Merged; +} +DebugLoc DebugLoc::getMergedLocation(DebugLoc LocA, DebugLoc LocB) { + if (!LocA) + return LocA.getCopied(); + if (!LocB) + return LocB.getCopied(); + return DILocation::getMergedLocation(LocA, LocB); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void DebugLoc::dump() const { print(dbgs()); } #endif diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index e5cde875ab1d8..88cd401019156 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -61,19 +61,13 @@ Type *IRBuilderBase::getCurrentFunctionReturnType() const { return BB->getParent()->getReturnType(); } -DebugLoc IRBuilderBase::getCurrentDebugLocation() const { - for (auto &KV : MetadataToCopy) - if (KV.first == LLVMContext::MD_dbg) - return {cast(KV.second)}; - - return {}; -} +DebugLoc IRBuilderBase::getCurrentDebugLocation() const { return StoredDL; } void IRBuilderBase::SetInstDebugLocation(Instruction *I) const { - for (const auto &KV : MetadataToCopy) - if (KV.first == LLVMContext::MD_dbg) { - I->setDebugLoc(DebugLoc(KV.second)); - return; - } + // If I does not have an existing DebugLoc and no DebugLoc has been set + // here, we copy our DebugLoc to I anyway, because more likely than not I + // is a new instruction whose DL should originate from this builder. + if (HasExplicitDL || !I->getDebugLoc()) + I->setDebugLoc(StoredDL.getCopied()); } CallInst * diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 6f0f3f244c050..2c0713aa88641 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -1279,6 +1279,9 @@ void Instruction::swapProfMetadata() { void Instruction::copyMetadata(const Instruction &SrcInst, ArrayRef WL) { + if (WL.empty() || is_contained(WL, LLVMContext::MD_dbg)) + setDebugLoc(SrcInst.getDebugLoc()); + if (!SrcInst.hasMetadata()) return; @@ -1292,8 +1295,6 @@ void Instruction::copyMetadata(const Instruction &SrcInst, if (WL.empty() || WLS.count(MD.first)) setMetadata(MD.first, MD.second); } - if (WL.empty() || WLS.count(LLVMContext::MD_dbg)) - setDebugLoc(SrcInst.getDebugLoc()); } Instruction *Instruction::clone() const { @@ -1311,5 +1312,6 @@ Instruction *Instruction::clone() const { New->SubclassOptionalData = SubclassOptionalData; New->copyMetadata(*this); + New->setDebugLoc(getDebugLoc().getCopied()); return New; } diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp index 9f9030e79d104..df23afda6d583 100644 --- a/llvm/lib/Support/Signals.cpp +++ b/llvm/lib/Support/Signals.cpp @@ -253,6 +253,117 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, return true; } +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +void sys::symbolizeAddresses(AddressSet &Addresses, + SymbolizedAddressMap &SymbolizedAddresses) { + assert(!DisableSymbolicationFlag && !getenv(DisableSymbolizationEnv) && + "Debugify origin stacktraces require symbolization to be enabled."); + + // Convert Set of Addresses to ordered list. + SmallVector AddressList(Addresses.begin(), Addresses.end()); + if (AddressList.empty()) + return; + int NumAddresses = AddressList.size(); + llvm::sort(AddressList); + + // Use llvm-symbolizer tool to symbolize the stack traces. First look for it + // alongside our binary, then in $PATH. + ErrorOr LLVMSymbolizerPathOrErr = std::error_code(); + if (const char *Path = getenv(LLVMSymbolizerPathEnv)) { + LLVMSymbolizerPathOrErr = sys::findProgramByName(Path); + } + if (!LLVMSymbolizerPathOrErr) + LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer"); + assert(!!LLVMSymbolizerPathOrErr && + "Debugify origin stacktraces require llvm-symbolizer."); + const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr; + + // Try to guess the main executable name, since we don't have argv0 available + // here. + std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr); + + BumpPtrAllocator Allocator; + StringSaver StrPool(Allocator); + std::vector Modules(NumAddresses, nullptr); + std::vector Offsets(NumAddresses, 0); + if (!findModulesAndOffsets(AddressList.data(), NumAddresses, Modules.data(), + Offsets.data(), MainExecutableName.c_str(), + StrPool)) + return; + int InputFD; + SmallString<32> InputFile, OutputFile; + sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile); + sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile); + FileRemover InputRemover(InputFile.c_str()); + FileRemover OutputRemover(OutputFile.c_str()); + + { + raw_fd_ostream Input(InputFD, true); + for (int i = 0; i < NumAddresses; i++) { + if (Modules[i]) + Input << Modules[i] << " " << (void *)Offsets[i] << "\n"; + } + } + + std::optional Redirects[] = {InputFile.str(), OutputFile.str(), + StringRef("")}; + StringRef Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining", +#ifdef _WIN32 + // Pass --relative-address on Windows so that we don't + // have to add ImageBase from PE file. + // FIXME: Make this the default for llvm-symbolizer. + "--relative-address", +#endif + "--demangle"}; + int RunResult = + sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects); + if (RunResult != 0) + return; + + // This report format is based on the sanitizer stack trace printer. See + // sanitizer_stacktrace_printer.cc in compiler-rt. + auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str()); + if (!OutputBuf) + return; + StringRef Output = OutputBuf.get()->getBuffer(); + SmallVector Lines; + Output.split(Lines, "\n"); + auto CurLine = Lines.begin(); + for (int i = 0; i < NumAddresses; i++) { + assert(!SymbolizedAddresses.contains(AddressList[i])); + std::string &SymbolizedAddr = SymbolizedAddresses[AddressList[i]]; + raw_string_ostream OS(SymbolizedAddr); + auto PrintLineHeader = [&]() { OS << format_ptr(AddressList[i]) << ' '; }; + if (!Modules[i]) { + PrintLineHeader(); + OS << '\n'; + continue; + } + // Read pairs of lines (function name and file/line info) until we + // encounter empty line. + for (;;) { + if (CurLine == Lines.end()) + return; + StringRef FunctionName = *CurLine++; + if (FunctionName.empty()) + break; + PrintLineHeader(); + if (!FunctionName.starts_with("??")) + OS << FunctionName << ' '; + if (CurLine == Lines.end()) + return; + StringRef FileLineInfo = *CurLine++; + if (!FileLineInfo.starts_with("??")) + OS << FileLineInfo; + else + OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")"; + OS << "\n"; + } + } + return; +} +#endif + static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName); LLVM_ATTRIBUTE_USED diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc index 298fde1a387cc..913bacfe44803 100644 --- a/llvm/lib/Support/Unix/Signals.inc +++ b/llvm/lib/Support/Unix/Signals.inc @@ -499,6 +499,19 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) { return 0; } +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +namespace llvm { +namespace sys { +template +int getStackTrace(std::array &StackTrace) { + return backtrace(StackTrace.data(), MaxDepth); +} +template int getStackTrace<8ul>(std::array &); +template int getStackTrace<16ul>(std::array &); +} // namespace sys +} // namespace llvm +#endif + /// If this is an ELF platform, we can find all loaded modules and their virtual /// addresses with dl_iterate_phdr. static bool findModulesAndOffsets(void **StackTrace, int Depth, diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index 29ebf7c696e04..b385c226af865 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -9,6 +9,7 @@ // This file provides the Win32 specific implementation of the Signals class. // //===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ExitCodes.h" #include "llvm/Support/FileSystem.h" @@ -538,6 +539,67 @@ void sys::PrintStackTraceOnErrorSignal(StringRef Argv0, extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord); #endif +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +namespace llvm { +namespace sys { +template +int getStackTrace(std::array &StackTrace) { + STACKFRAME64 StackFrame{}; + CONTEXT Context{}; + ::RtlCaptureContext(&Context); +#if defined(_M_X64) + StackFrame.AddrPC.Offset = Context.Rip; + StackFrame.AddrStack.Offset = Context.Rsp; + StackFrame.AddrFrame.Offset = Context.Rbp; +#elif defined(_M_IX86) + StackFrame.AddrPC.Offset = Context.Eip; + StackFrame.AddrStack.Offset = Context.Esp; + StackFrame.AddrFrame.Offset = Context.Ebp; +#elif defined(_M_ARM64) + StackFrame.AddrPC.Offset = Context.Pc; + StackFrame.AddrStack.Offset = Context.Sp; + StackFrame.AddrFrame.Offset = Context.Fp; +#elif defined(_M_ARM) + StackFrame.AddrPC.Offset = Context.Pc; + StackFrame.AddrStack.Offset = Context.Sp; + StackFrame.AddrFrame.Offset = Context.R11; +#endif + StackFrame.AddrPC.Mode = AddrModeFlat; + StackFrame.AddrStack.Mode = AddrModeFlat; + StackFrame.AddrFrame.Mode = AddrModeFlat; + + HANDLE hProcess = GetCurrentProcess(); + HANDLE hThread = GetCurrentThread(); + + // It's possible that DbgHelp.dll hasn't been loaded yet (e.g. if this + // function is called before the main program called `llvm::InitLLVM`). + // In this case just return, not stacktrace will be printed. + assert( + isDebugHelpInitialized() && + "getStackTrace must not be called before DbgHelp.dll has been loaded."); + + // Initialize the symbol handler. + fSymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES); + fSymInitialize(hProcess, NULL, TRUE); + Context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; + + size_t WalkDepth = 0; + while (WalkDepth < MaxDepth && + fStackWalk64(NativeMachineType, hProcess, hThread, &StackFrame, + &Context, 0, fSymFunctionTableAccess64, + fSymGetModuleBase64, 0)) { + if (StackFrame.AddrFrame.Offset == 0) + break; + StackTrace[WalkDepth++] = (void *)(uintptr_t)StackFrame.AddrPC.Offset; + } + return WalkDepth; +} +template int llvm::sys::getStackTrace<8ul>(std::array &); +template int llvm::sys::getStackTrace<16ul>(std::array &); +} // namespace sys +} // namespace llvm +#endif + static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) { STACKFRAME64 StackFrame{}; CONTEXT Context{}; diff --git a/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp b/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp index 5d8339b4a44ce..7bdeb3f744a63 100644 --- a/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp +++ b/llvm/lib/Target/BPF/BPFPreserveStaticOffset.cpp @@ -228,7 +228,8 @@ static Instruction *makeGEPAndLoad(Module *M, GEPChainInfo &GEP, CallInst *Call = makeIntrinsicCall(M, Intrinsic::bpf_getelementptr_and_load, {Load->getType()}, Args); setParamElementType(Call, 0, GEP.SourceElementType); - Call->applyMergedLocation(mergeDILocations(GEP.Members), Load->getDebugLoc()); + Call->applyMergedLocation(DebugLoc(mergeDILocations(GEP.Members)), + Load->getDebugLoc()); Call->setName((*GEP.Members.rbegin())->getName()); if (Load->isUnordered()) { Call->setOnlyReadsMemory(); @@ -252,7 +253,7 @@ static Instruction *makeGEPAndStore(Module *M, GEPChainInfo &GEP, setParamElementType(Call, 1, GEP.SourceElementType); if (Store->getValueOperand()->getType()->isPointerTy()) setParamReadNone(Call, 0); - Call->applyMergedLocation(mergeDILocations(GEP.Members), + Call->applyMergedLocation(DebugLoc(mergeDILocations(GEP.Members)), Store->getDebugLoc()); if (Store->isUnordered()) { Call->setOnlyWritesMemory(); diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index 96c803c0186ef..94e05871a184e 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -731,7 +731,7 @@ static void moveFunctionData(Function &Old, Function &New, // other outlined instructions. if (!isa(&Val)) { // Remove the debug information for outlined functions. - Val.setDebugLoc(DebugLoc()); + Val.setDebugLoc(DebugLoc::getLineZero()); // Loop info metadata may contain line locations. Update them to have no // value in the new subprogram since the outlined code could be from @@ -1868,7 +1868,7 @@ replaceArgumentUses(OutlinableRegion &Region, Value *ValueOperand = SI->getValueOperand(); StoreInst *NewI = cast(I->clone()); - NewI->setDebugLoc(DebugLoc()); + NewI->setDebugLoc(DebugLoc::getLineZero()); BasicBlock *OutputBB = VBBIt->second; NewI->insertInto(OutputBB, OutputBB->end()); LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to " diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 1661fa564c65c..1bafcda612b05 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1574,8 +1574,8 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) { // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getValueOperand(); // The debug locations of the original instructions might differ. Merge them. - DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(), - OtherStore->getDebugLoc()); + DebugLoc MergedLoc = + DebugLoc::getMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc()); if (MergedVal != SI.getValueOperand()) { PHINode *PN = PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge"); diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index 4a6dedc93d306..3e0bc19c1b6f8 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -909,7 +909,7 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) { emitBaseConstants(Base, &R); ReBasesNum++; // Use the same debug location as the last user of the constant. - Base->setDebugLoc(DILocation::getMergedLocation( + Base->setDebugLoc(DebugLoc::getMergedLocation( Base->getDebugLoc(), R.User.Inst->getDebugLoc())); } assert(!Base->use_empty() && "The use list is empty!?"); diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index fe264503dee9e..1b6a6b000ab40 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2220,10 +2220,10 @@ bool llvm::promoteLoopAccessesToScalars( }); // Look at all the loop uses, and try to merge their locations. - std::vector LoopUsesLocs; - for (auto *U : LoopUses) - LoopUsesLocs.push_back(U->getDebugLoc().get()); - auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs)); + std::vector LoopUsesLocs; + for (auto U : LoopUses) + LoopUsesLocs.push_back(U->getDebugLoc()); + auto DL = DebugLoc::getMergedLocations(LoopUsesLocs); // We use the SSAUpdater interface to insert phi nodes as required. SmallVector NewPHIs; @@ -2243,7 +2243,7 @@ bool llvm::promoteLoopAccessesToScalars( if (SawUnorderedAtomic) PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); - PreheaderLoad->setDebugLoc(DebugLoc()); + PreheaderLoad->dropLocation(); if (AATags) PreheaderLoad->setAAMetadata(AATags); diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 11de37f7a7c10..76e534b872fab 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -124,7 +124,7 @@ performBlockTailMerging(Function &F, ArrayRef BBs, // Now, go through each block (with the current terminator type) // we've recorded, and rewrite it to branch to the new common block. - DILocation *CommonDebugLoc = nullptr; + DebugLoc CommonDebugLoc; for (BasicBlock *BB : BBs) { auto *Term = BB->getTerminator(); assert(Term->getOpcode() == CanonicalTerm->getOpcode() && @@ -141,7 +141,7 @@ performBlockTailMerging(Function &F, ArrayRef BBs, CommonDebugLoc = Term->getDebugLoc(); else CommonDebugLoc = - DILocation::getMergedLocation(CommonDebugLoc, Term->getDebugLoc()); + DebugLoc::getMergedLocation(CommonDebugLoc, Term->getDebugLoc()); // And turn BB into a block that just unconditionally branches // to the canonical block. diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index fcc82eadac36c..3cb4a6dc2af60 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -15,7 +15,10 @@ #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/InstIterator.h" @@ -28,6 +31,11 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" #include +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +// We need the Signals header to operate on stacktraces if we're using enhanced +// coverage tracking. +#include "llvm/Support/Signals.h" +#endif #define DEBUG_TYPE "debugify" @@ -57,6 +65,50 @@ cl::opt DebugifyLevel( raw_ostream &dbg() { return Quiet ? nulls() : errs(); } +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +// These maps refer to addresses in this instance of LLVM, so we can reuse them +// everywhere - therefore, we store them at file scope. +static DenseMap SymbolizedAddrs; +static DenseSet UnsymbolizedAddrs; + +std::string symbolizeStacktrace(const Instruction *I) { + // We flush the set of unsymbolized addresses at the latest possible moment, + // i.e. now. + if (!UnsymbolizedAddrs.empty()) { + sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs); + UnsymbolizedAddrs.clear(); + } + DbgLocOriginBacktrace ST = I->getDebugLoc().getOrigin(); + std::string Result; + raw_string_ostream OS(Result); + for (size_t TraceIdx = 0; TraceIdx < ST.Stacktraces.size(); ++TraceIdx) { + if (TraceIdx != 0) + OS << "========================================\n"; + auto &[Depth, Stacktrace] = ST.Stacktraces[TraceIdx]; + for (int Frame = 0; Frame < Depth; ++Frame) { + assert(SymbolizedAddrs.contains(Stacktrace[Frame]) && + "Expected each address to have been symbolized."); + OS << right_justify(formatv("#{0}", Frame).str(), std::log10(Depth) + 2) + << ' ' << SymbolizedAddrs[Stacktrace[Frame]]; + } + } + return Result; +} +void collectStackAddresses(Instruction &I) { + DbgLocOriginBacktrace ST = I.getDebugLoc().getOrigin(); + for (auto &[Depth, Stacktrace] : ST.Stacktraces) { + for (int Frame = 0; Frame < Depth; ++Frame) { + void *Addr = Stacktrace[Frame]; + if (!SymbolizedAddrs.contains(Addr)) + UnsymbolizedAddrs.insert(Addr); + } + } +} +#else +std::string symbolizeStacktrace(const Instruction *I) { return ""; } +void collectStackAddresses(Instruction &I) {} +#endif + uint64_t getAllocSizeInBits(Module &M, Type *Ty) { return Ty->isSized() ? M.getDataLayout().getTypeAllocSizeInBits(Ty) : 0; } @@ -292,6 +344,16 @@ bool llvm::stripDebugifyMetadata(Module &M) { return Changed; } +bool hasLoc(const Instruction &I) { + const DILocation *Loc = I.getDebugLoc().get(); +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + DebugLocKind Kind = I.getDebugLoc().getKind(); + return Loc || Kind != DebugLocKind::Normal; +#else + return Loc; +#endif +} + bool llvm::collectDebugInfoMetadata(Module &M, iterator_range Functions, DebugInfoPerPass &DebugInfoBeforePass, @@ -364,9 +426,9 @@ bool llvm::collectDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); DebugInfoBeforePass.InstToDelete.insert({&I, &I}); - const DILocation *Loc = I.getDebugLoc().get(); - bool HasLoc = Loc != nullptr; - DebugInfoBeforePass.DILocations.insert({&I, HasLoc}); + // Track the addresses to symbolize, if the feature is enabled. + collectStackAddresses(I); + DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)}); } } } @@ -440,15 +502,19 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore, auto BB = Instr->getParent(); auto BBName = BB->hasName() ? BB->getName() : "no-name"; auto InstName = Instruction::getOpcodeName(Instr->getOpcode()); + auto InstLabel = Instr->getNameOrAsOperand(); auto InstrIt = DILocsBefore.find(Instr); if (InstrIt == DILocsBefore.end()) { if (ShouldWriteIntoJSON) - Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, - {"fn-name", FnName.str()}, - {"bb-name", BBName.str()}, - {"instr", InstName}, - {"action", "not-generate"}})); + Bugs.push_back( + llvm::json::Object({{"metadata", "DILocation"}, + {"fn-name", FnName.str()}, + {"bb-name", BBName.str()}, + {"instr-name", InstLabel}, + {"instr", InstName}, + {"action", "not-generate"}, + {"origin", symbolizeStacktrace(Instr)}})); else dbg() << "WARNING: " << NameOfWrappedPass << " did not generate DILocation for " << *Instr @@ -461,11 +527,14 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore, // If the instr had the !dbg attached before the pass, consider it as // a debug info issue. if (ShouldWriteIntoJSON) - Bugs.push_back(llvm::json::Object({{"metadata", "DILocation"}, - {"fn-name", FnName.str()}, - {"bb-name", BBName.str()}, - {"instr", InstName}, - {"action", "drop"}})); + Bugs.push_back( + llvm::json::Object({{"metadata", "DILocation"}, + {"fn-name", FnName.str()}, + {"bb-name", BBName.str()}, + {"instr-name", InstLabel}, + {"instr", InstName}, + {"action", "drop"}, + {"origin", symbolizeStacktrace(Instr)}})); else dbg() << "WARNING: " << NameOfWrappedPass << " dropped DILocation of " << *Instr << " (BB: " << BBName << ", Fn: " << FnName @@ -609,10 +678,9 @@ bool llvm::checkDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); - const DILocation *Loc = I.getDebugLoc().get(); - bool HasLoc = Loc != nullptr; - - DebugInfoAfterPass.DILocations.insert({&I, HasLoc}); + // Track the addresses to symbolize, if the feature is enabled. + collectStackAddresses(I); + DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)}); } } } @@ -662,6 +730,13 @@ bool llvm::checkDebugInfoMetadata(Module &M, // again in the collectDebugInfoMetadata(), since as an input we can use // the debugging information from the previous pass. DebugInfoBeforePass = DebugInfoAfterPass; + // We should make this conditional on debugify-each, but this has to be done + // if we're reusing DebugInfoAfterPass. + for (const auto &L : DebugInfoBeforePass.DILocations) { + auto Instr = L.first; + DebugInfoBeforePass.InstToDelete.insert( + {const_cast(Instr), const_cast(Instr)}); + } LLVM_DEBUG(dbgs() << "\n\n"); return Result; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index f23e28888931d..1724cb292ff25 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1111,7 +1111,7 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( // branch, drop it. When we fold the bonus instructions we want to make // sure we reset their debug locations in order to avoid stepping on // dead code caused by folding dead branches. - NewBonusInst->setDebugLoc(DebugLoc()); + NewBonusInst->dropLocation(); } RemapInstruction(NewBonusInst, VMap, @@ -1840,11 +1840,11 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf( // Ensure terminator gets a debug location, even an unknown one, in case // it involves inlinable calls. - SmallVector Locs; + SmallVector Locs; Locs.push_back(I1->getDebugLoc()); for (auto *OtherSuccTI : OtherSuccTIs) Locs.push_back(OtherSuccTI->getDebugLoc()); - NT->setDebugLoc(DILocation::getMergedLocations(Locs)); + NT->setDebugLoc(DebugLoc::getMergedLocations(Locs)); // PHIs created below will adopt NT's merged DebugLoc. IRBuilder Builder(NT); @@ -2725,7 +2725,7 @@ static void MergeCompatibleInvokesImpl(ArrayRef Invokes, MergedDebugLoc = II->getDebugLoc(); else MergedDebugLoc = - DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc()); + DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc()); // And replace the old `invoke` with an unconditionally branch // to the block with the merged `invoke`. @@ -3183,7 +3183,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, if (!SpeculatedStoreValue || &I != SpeculatedStore) { // Don't update the DILocation of dbg.assign intrinsics. if (!isa(&I)) - I.setDebugLoc(DebugLoc()); + I.dropLocation(); } I.dropUBImplyingAttrsAndMetadata(); diff --git a/llvm/utils/llvm-original-di-preservation.py b/llvm/utils/llvm-original-di-preservation.py index dc1fa518ca8e6..36577cec07826 100755 --- a/llvm/utils/llvm-original-di-preservation.py +++ b/llvm/utils/llvm-original-di-preservation.py @@ -13,14 +13,16 @@ class DILocBug: - def __init__(self, action, bb_name, fn_name, instr): + def __init__(self, origin, action, instr_name, bb_name, fn_name, instr): + self.origin = origin self.action = action + self.instr_name = instr_name self.bb_name = bb_name self.fn_name = fn_name self.instr = instr def __str__(self): - return self.action + self.bb_name + self.fn_name + self.instr + return self.action + self.bb_name + self.fn_name + self.instr + self.origin class DISPBug: @@ -50,6 +52,7 @@ def generate_html_report( di_location_bugs_summary, di_sp_bugs_summary, di_var_bugs_summary, + di_file_args, html_file, ): fileout = open(html_file, "w") @@ -85,7 +88,9 @@ def generate_html_report( "LLVM IR Instruction", "Function Name", "Basic Block Name", + "Instruction Name", "Action", + "Origin", ] for column in header_di_loc: @@ -111,7 +116,9 @@ def generate_html_report( row.append(x.instr) row.append(x.fn_name) row.append(x.bb_name) + row.append(x.instr_name) row.append(x.action) + row.append(f"
View Origin Stacktrace
{x.origin}
") row.append(" \n") # Dump the bugs info into the table. for column in row: @@ -338,6 +345,29 @@ def generate_html_report( """ table_di_var_sum += "\n" + # Create the table for the compiler args for each file. + table_title_file_args = "Compiler arguments per file" + table_file_args = """ + + + """.format( + table_title_file_args + ) + header_file_args = ["File", "Args"] + + for column in header_file_args: + table_file_args += " \n".format(column.strip()) + table_file_args += " \n" + row = [] + for file, args in di_file_args.items(): + row.append(" \n") + row.append(" \n".format(file.strip())) + row.append(" \n".format(args.strip())) + row.append(" \n") + for column in row: + table_file_args += column + table_file_args += " \n" + # Finish the html page. html_footer = """""" @@ -358,6 +388,8 @@ def generate_html_report( fileout.writelines(table_di_var) fileout.writelines(new_line) fileout.writelines(table_di_var_sum) + fileout.writelines(new_line) + fileout.writelines(table_file_args) fileout.writelines(html_footer) fileout.close() @@ -427,10 +459,12 @@ def Main(): print("error: The output file must be '.html'.") sys.exit(1) + di_file_args = OrderedDict() + # Use the defaultdict in order to make multidim dicts. - di_location_bugs = defaultdict(lambda: defaultdict(dict)) - di_subprogram_bugs = defaultdict(lambda: defaultdict(dict)) - di_variable_bugs = defaultdict(lambda: defaultdict(dict)) + di_location_bugs = defaultdict(lambda: defaultdict(list)) + di_subprogram_bugs = defaultdict(lambda: defaultdict(list)) + di_variable_bugs = defaultdict(lambda: defaultdict(list)) # Use the ordered dict to make a summary. di_location_bugs_summary = OrderedDict() @@ -467,12 +501,17 @@ def Main(): bugs_pass = bugs_per_pass["pass"] bugs = bugs_per_pass["bugs"][0] except: - skipped_lines += 1 + try: + file = bugs_per_pass["file"] + args = bugs_per_pass["args"] + di_file_args[file] = args + except: + skipped_lines += 1 continue - di_loc_bugs = [] - di_sp_bugs = [] - di_var_bugs = [] + di_loc_bugs = di_location_bugs[bugs_file][bugs_pass] + di_sp_bugs = di_subprogram_bugs[bugs_file][bugs_pass] + di_var_bugs = di_variable_bugs[bugs_file][bugs_pass] # Omit duplicated bugs. di_loc_set = set() @@ -487,14 +526,16 @@ def Main(): if bugs_metadata == "DILocation": try: + origin = bug["origin"] action = bug["action"] + instr_name = bug["instr-name"] bb_name = bug["bb-name"] fn_name = bug["fn-name"] instr = bug["instr"] except: skipped_bugs += 1 continue - di_loc_bug = DILocBug(action, bb_name, fn_name, instr) + di_loc_bug = DILocBug(origin, action, instr_name, bb_name, fn_name, instr) if not str(di_loc_bug) in di_loc_set: di_loc_set.add(str(di_loc_bug)) if opts.compress: @@ -573,6 +614,7 @@ def Main(): di_location_bugs_summary, di_sp_bugs_summary, di_var_bugs_summary, + di_file_args, opts.html_file, )
{}
{0}
{0}{0}