Skip to content

Commit 6f28b4b

Browse files
authored
[GSYM] Add support for querying merged functions in llvm-gsymutil (#120991)
Adds the ability to lookup and display all merged functions for an address in llvm-gsymutil. Now, when `--merged-functions` is used in combination with `--address/--addresses-from-stdin`, lookup results will contain information about merged functions, if available. To support printing merged function information when using the `--verbose` option, the `LookupResult` data structure also had to be extended with pointers to the raw function data and raw merged function data. This is because merged functions share the same address range, so it's not easy to look up the raw merged function data for a particular `LookupResult` that is based on a merged function.
1 parent dc0e258 commit 6f28b4b

File tree

9 files changed

+181
-37
lines changed

9 files changed

+181
-37
lines changed

llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,13 +187,17 @@ struct FunctionInfo {
187187
///
188188
/// \param Addr The address to lookup.
189189
///
190+
/// \param MergedFuncsData A pointer to an optional DataExtractor that, if
191+
/// non-null, will be set to the raw data of the MergedFunctionInfo, if
192+
/// present.
193+
///
190194
/// \returns An LookupResult or an error describing the issue that was
191195
/// encountered during decoding. An error should only be returned if the
192196
/// address is not contained in the FunctionInfo or if the data is corrupted.
193-
static llvm::Expected<LookupResult> lookup(DataExtractor &Data,
194-
const GsymReader &GR,
195-
uint64_t FuncAddr,
196-
uint64_t Addr);
197+
static llvm::Expected<LookupResult>
198+
lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr,
199+
uint64_t Addr,
200+
std::optional<DataExtractor> *MergedFuncsData = nullptr);
197201

198202
uint64_t startAddress() const { return Range.start(); }
199203
uint64_t endAddress() const { return Range.end(); }

llvm/include/llvm/DebugInfo/GSYM/GsymReader.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,29 @@ class GsymReader {
127127
/// is much faster for lookups.
128128
///
129129
/// \param Addr A virtual address from the orignal object file to lookup.
130+
///
131+
/// \param MergedFuncsData A pointer to an optional DataExtractor that, if
132+
/// non-null, will be set to the raw data of the MergedFunctionInfo, if
133+
/// present.
134+
///
130135
/// \returns An expected LookupResult that contains only the information
131136
/// needed for the current address, or an error object that indicates reason
132137
/// for failing to lookup the address.
133-
llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
138+
llvm::Expected<LookupResult>
139+
lookup(uint64_t Addr,
140+
std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
141+
142+
/// Lookup all merged functions for a given address.
143+
///
144+
/// This function performs a lookup for the specified address and then
145+
/// retrieves additional LookupResults from any merged functions associated
146+
/// with the primary LookupResult.
147+
///
148+
/// \param Addr The address to lookup.
149+
///
150+
/// \returns A vector of LookupResult objects, where the first element is the
151+
/// primary result, followed by results for any merged functions
152+
llvm::Expected<std::vector<LookupResult>> lookupAll(uint64_t Addr) const;
134153

135154
/// Get a string from the string table.
136155
///

llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,18 @@ struct MergedFunctionsInfo {
3131
/// \returns A boolean indicating if this FunctionInfo is valid.
3232
bool isValid() { return !MergedFunctions.empty(); }
3333

34+
/// Get a vector of DataExtractor objects for the functions in this
35+
/// MergedFunctionsInfo object.
36+
///
37+
/// \param Data The binary stream to read the data from. This object must have
38+
/// the data for the MergedFunctionsInfo object starting at offset zero. The
39+
/// data can contain more data than needed.
40+
///
41+
/// \returns An llvm::Expected containing a vector of DataExtractor objects on
42+
/// success, or an error object if parsing fails.
43+
static llvm::Expected<std::vector<DataExtractor>>
44+
getFuncsDataExtractors(DataExtractor &Data);
45+
3446
/// Decode an MergedFunctionsInfo object from a binary data stream.
3547
///
3648
/// \param Data The binary stream to read the data from. This object must have

llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,10 +235,10 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
235235
return FuncInfoOffset;
236236
}
237237

238-
llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
239-
const GsymReader &GR,
240-
uint64_t FuncAddr,
241-
uint64_t Addr) {
238+
llvm::Expected<LookupResult>
239+
FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
240+
uint64_t FuncAddr, uint64_t Addr,
241+
std::optional<DataExtractor> *MergedFuncsData) {
242242
LookupResult LR;
243243
LR.LookupAddr = Addr;
244244
uint64_t Offset = 0;
@@ -289,6 +289,12 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
289289
return ExpectedLE.takeError();
290290
break;
291291

292+
case InfoType::MergedFunctionsInfo:
293+
// Store the merged functions data for later parsing, if needed.
294+
if (MergedFuncsData)
295+
*MergedFuncsData = InfoData;
296+
break;
297+
292298
case InfoType::InlineInfo:
293299
// We will parse the inline info after our line table, but only if
294300
// we have a line entry.

llvm/lib/DebugInfo/GSYM/GsymReader.cpp

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,14 +334,52 @@ GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
334334
return ExpectedData.takeError();
335335
}
336336

337-
llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
337+
llvm::Expected<LookupResult>
338+
GsymReader::lookup(uint64_t Addr,
339+
std::optional<DataExtractor> *MergedFunctionsData) const {
338340
uint64_t FuncStartAddr = 0;
339341
if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
340-
return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr);
342+
return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
343+
MergedFunctionsData);
341344
else
342345
return ExpectedData.takeError();
343346
}
344347

348+
llvm::Expected<std::vector<LookupResult>>
349+
GsymReader::lookupAll(uint64_t Addr) const {
350+
std::vector<LookupResult> Results;
351+
std::optional<DataExtractor> MergedFunctionsData;
352+
353+
// First perform a lookup to get the primary function info result.
354+
auto MainResult = lookup(Addr, &MergedFunctionsData);
355+
if (!MainResult)
356+
return MainResult.takeError();
357+
358+
// Add the main result as the first entry.
359+
Results.push_back(std::move(*MainResult));
360+
361+
// Now process any merged functions data that was found during the lookup.
362+
if (MergedFunctionsData) {
363+
// Get data extractors for each merged function.
364+
auto ExpectedMergedFuncExtractors =
365+
MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
366+
if (!ExpectedMergedFuncExtractors)
367+
return ExpectedMergedFuncExtractors.takeError();
368+
369+
// Process each merged function data.
370+
for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
371+
if (auto FI = FunctionInfo::lookup(MergedData, *this,
372+
MainResult->FuncRange.start(), Addr)) {
373+
Results.push_back(std::move(*FI));
374+
} else {
375+
return FI.takeError();
376+
}
377+
}
378+
}
379+
380+
return Results;
381+
}
382+
345383
void GsymReader::dump(raw_ostream &OS) {
346384
const auto &Header = getHeader();
347385
// Dump the GSYM header.

llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,59 @@ llvm::Error MergedFunctionsInfo::encode(FileWriter &Out) const {
3535
llvm::Expected<MergedFunctionsInfo>
3636
MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) {
3737
MergedFunctionsInfo MFI;
38+
auto FuncExtractorsOrError = MFI.getFuncsDataExtractors(Data);
39+
40+
if (!FuncExtractorsOrError)
41+
return FuncExtractorsOrError.takeError();
42+
43+
for (DataExtractor &FuncData : *FuncExtractorsOrError) {
44+
llvm::Expected<FunctionInfo> FI = FunctionInfo::decode(FuncData, BaseAddr);
45+
if (!FI)
46+
return FI.takeError();
47+
MFI.MergedFunctions.push_back(std::move(*FI));
48+
}
49+
50+
return MFI;
51+
}
52+
53+
llvm::Expected<std::vector<DataExtractor>>
54+
MergedFunctionsInfo::getFuncsDataExtractors(DataExtractor &Data) {
55+
std::vector<DataExtractor> Results;
3856
uint64_t Offset = 0;
57+
58+
// Ensure there is enough data to read the function count.
59+
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
60+
return createStringError(
61+
std::errc::io_error,
62+
"unable to read the function count at offset 0x%8.8" PRIx64, Offset);
63+
3964
uint32_t Count = Data.getU32(&Offset);
4065

4166
for (uint32_t i = 0; i < Count; ++i) {
67+
// Ensure there is enough data to read the function size.
68+
if (!Data.isValidOffsetForDataOfSize(Offset, 4))
69+
return createStringError(
70+
std::errc::io_error,
71+
"unable to read size of function %u at offset 0x%8.8" PRIx64, i,
72+
Offset);
73+
4274
uint32_t FnSize = Data.getU32(&Offset);
43-
DataExtractor FnData(Data.getData().substr(Offset, FnSize),
75+
76+
// Ensure there is enough data for the function content.
77+
if (!Data.isValidOffsetForDataOfSize(Offset, FnSize))
78+
return createStringError(
79+
std::errc::io_error,
80+
"function data is truncated for function %u at offset 0x%8.8" PRIx64
81+
", expected size %u",
82+
i, Offset, FnSize);
83+
84+
// Extract the function data.
85+
Results.emplace_back(Data.getData().substr(Offset, FnSize),
4486
Data.isLittleEndian(), Data.getAddressSize());
45-
llvm::Expected<FunctionInfo> FI =
46-
FunctionInfo::decode(FnData, BaseAddr + Offset);
47-
if (!FI)
48-
return FI.takeError();
49-
MFI.MergedFunctions.push_back(std::move(*FI));
87+
5088
Offset += FnSize;
5189
}
52-
53-
return MFI;
90+
return Results;
5491
}
5592

5693
bool operator==(const MergedFunctionsInfo &LHS,

llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@
6464
# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10
6565
# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6
6666

67+
## Test the lookup functionality for merged functions:
68+
# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 --merged-functions | FileCheck --check-prefix=CHECK-MERGED-LOOKUP %s
69+
# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 | FileCheck --check-prefix=CHECK-NORMAL-LOOKUP %s
70+
71+
# CHECK-MERGED-LOOKUP: Found 3 functions at address 0x0000000000000248:
72+
# CHECK-MERGED-LOOKUP-NEXT: 0x0000000000000248: my_func_02 @ /tmp/test_gsym_yaml/out/file_02.cpp:5
73+
# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
74+
# CHECK-MERGED-LOOKUP-NEXT-NEXT: 0x0000000000000248: my_func_03 @ /tmp/test_gsym_yaml/out/file_03.cpp:5
75+
76+
# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
6777

6878

6979
--- !mach-o

llvm/tools/llvm-gsymutil/Opts.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ defm convert :
1717
Eq<"convert",
1818
"Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
1919
def merged_functions :
20-
FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
20+
FF<"merged-functions", "When used with --convert, encodes merged function information for functions in debug info that have matching address ranges.\n"
21+
"Without this option one function per unique address range will be emitted.\n"
22+
"When used with --address/--addresses-from-stdin, all merged functions for a particular address will be displayed.\n"
23+
"Without this option only one function will be displayed.">;
2124
def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">;
2225
defm callsites_yaml_file :
2326
Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>;

llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ static uint64_t SegmentSize;
9898
static bool Quiet;
9999
static std::vector<uint64_t> LookupAddresses;
100100
static bool LookupAddressesFromStdin;
101-
static bool StoreMergedFunctionInfo = false;
101+
static bool UseMergedFunctions = false;
102102
static bool LoadDwarfCallSites = false;
103103
static std::string CallSiteYamlPath;
104104

@@ -181,7 +181,7 @@ static void parseArgs(int argc, char **argv) {
181181
}
182182

183183
LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin);
184-
StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions);
184+
UseMergedFunctions = Args.hasArg(OPT_merged_functions);
185185

186186
if (Args.hasArg(OPT_callsites_yaml_file_EQ)) {
187187
CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_yaml_file_EQ);
@@ -380,7 +380,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
380380
// functions in the first FunctionInfo with that address range. Do this right
381381
// after loading the DWARF data so we don't have to deal with functions from
382382
// the symbol table.
383-
if (StoreMergedFunctionInfo)
383+
if (UseMergedFunctions)
384384
Gsym.prepareMergedFunctions(Out);
385385

386386
// Get the UUID and convert symbol table to GSYM.
@@ -508,24 +508,39 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
508508
}
509509

510510
static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
511-
if (auto Result = Gsym.lookup(Addr)) {
512-
// If verbose is enabled dump the full function info for the address.
513-
if (Verbose) {
514-
if (auto FI = Gsym.getFunctionInfo(Addr)) {
515-
OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
516-
Gsym.dump(OS, *FI);
517-
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
511+
auto logError = [Addr, &OS](Error E) {
512+
OS << HEX64(Addr) << ": ";
513+
logAllUnhandledErrors(std::move(E), OS, "error: ");
514+
};
515+
516+
if (UseMergedFunctions) {
517+
if (auto Results = Gsym.lookupAll(Addr)) {
518+
OS << "Found " << Results->size() << " functions at address "
519+
<< HEX64(Addr) << ":\n";
520+
for (size_t i = 0; i < Results->size(); ++i) {
521+
OS << " " << Results->at(i);
522+
523+
if (i != Results->size() - 1)
524+
OS << "\n";
518525
}
519526
}
520-
OS << Result.get();
521-
} else {
522-
if (Verbose)
523-
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
524-
OS << HEX64(Addr) << ": ";
525-
logAllUnhandledErrors(Result.takeError(), OS, "error: ");
527+
} else { /* UseMergedFunctions == false */
528+
if (auto Result = Gsym.lookup(Addr)) {
529+
OS << Result.get();
530+
} else {
531+
logError(Result.takeError());
532+
return;
533+
}
526534
}
527-
if (Verbose)
535+
536+
if (Verbose) {
537+
if (auto FI = Gsym.getFunctionInfo(Addr)) {
538+
OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
539+
Gsym.dump(OS, *FI);
540+
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
541+
}
528542
OS << "\n";
543+
}
529544
}
530545

531546
int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {

0 commit comments

Comments
 (0)