-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[include-cleaner] Handle symbols from system headers. #66089
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,10 @@ | |
#include "llvm/ADT/StringRef.h" | ||
#include "llvm/Support/Casting.h" | ||
#include "llvm/Support/ErrorHandling.h" | ||
#include "llvm/Support/Path.h" | ||
#include <cassert> | ||
#include <cstddef> | ||
#include <map> | ||
#include <optional> | ||
#include <queue> | ||
#include <set> | ||
|
@@ -176,6 +180,104 @@ headersForSpecialSymbol(const Symbol &S, const SourceManager &SM, | |
return std::nullopt; | ||
} | ||
|
||
// A hand-mainained list of include mappings for system headers. | ||
// The first element is the suffix of the physical file path for the system | ||
// header, the second element is the final verbatim header spelling. | ||
const std::pair<llvm::StringRef, llvm::StringRef> IncludeMappings[] = { | ||
{"bits/types/struct_timeval.h", "<sys/time.h>"}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about extracting these into a |
||
{"bits/pthreadtypes.h", "<pthread.h>"}, | ||
{"bits/types/struct_iovec.h", "<sys/uio.h>"}, | ||
{"linux/limits.h", "<limits.h>"}, | ||
{"bits/getopt_core.h", "<getopt.h>"}, | ||
{"asm-generic/socket.h", "<sys/socket.h>"}, | ||
{"bits/posix1_lim.h", "<limits.h>"}, | ||
{"bits/time.h", "time.h"}, | ||
{"bits/getopt_ext.h", "<getopt.h>"}, | ||
{"bits/types/sigset_t.h", "<signal.h>"}, | ||
{"bits/types/siginfo_t.h", "<signal.h>"}, | ||
{"bits/types/FILE.h", "<stdio.h>"}, | ||
{"asm/unistd_64.h", "<asm/unistd.h>"}, | ||
{"bits/stat.h", "<sys/stat.h>"}, | ||
{"asm-generic/ioctls.h", "<sys/ioctls.h>"}, | ||
{"asm-generic/errno.h", "<errno.h>"}, | ||
{"asm-generic/int-ll64.h", "<asm/types.h>"}, | ||
{"bits/sigaction.h", "<signal.h>"}, | ||
{"bits/types/struct_rusage.h", "<sys/resource.h>"}, | ||
{"sys/syscall.h", "<syscall.h>"}, | ||
{"linux/prctl.h", "<sys/prctl.h>"}, | ||
{"sys/ucontext.h", "<ucontext.h>"}, | ||
{"bits/types/clockid_t.h", "<time.h>"}, | ||
{"bits/types/mbstate_t.h", "<uchar.h>"}, | ||
{"bits/types/mbstate_t.h", "<wchar.h>"}, | ||
{"bits/types/struct_itimerspec.h", "<time.h>"}, | ||
{"bits/types/time_t.h", "<time.h>"}, | ||
{"bits/sockaddr.h", "<time.h>"}, | ||
}; | ||
// The maximum number of path components in a key from SuffixHeaderMapping. | ||
// Used to minimize the number of lookups in suffix path mappings. | ||
constexpr int MaxSuffixComponents = 3; | ||
|
||
// A mapping for system headers based on a set of filename rules. | ||
class SystemIncludeMap { | ||
public: | ||
static const SystemIncludeMap &get() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think it's important that we also make these mappings different per language. most of them will likely be language independent, but at least libc extensions are likely to have different spellings for c vs c++. moreover these fell more like private -> public mappings we perform via PragmaIncludes, so what about just moving them there? that way we can also initialize the mapping based on language options and also benefit all the other users of pragma includes with similar mappings? |
||
static SystemIncludeMap Instance; | ||
static const auto *SystemHeaderMap = [] { | ||
const size_t Size = sizeof(IncludeMappings) / sizeof(IncludeMappings[0]); | ||
auto *HeaderMap = new std::multimap<llvm::StringRef, llvm::StringRef>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we have a multipmap here instead of a just StringMap? do we expect to have suffixes that map to multiple umbrella headers? |
||
for (size_t I = 0; I < Size; I++) { | ||
HeaderMap->insert(IncludeMappings[I]); | ||
} | ||
return HeaderMap; | ||
}(); | ||
|
||
// // Check MaxSuffixComponents constant is correct. | ||
assert(llvm::all_of(*SystemHeaderMap, [](const auto &KeyAndValue) { | ||
return std::distance( | ||
llvm::sys::path::begin(KeyAndValue.first, | ||
llvm::sys::path::Style::posix), | ||
llvm::sys::path::end(KeyAndValue.first)) <= | ||
MaxSuffixComponents; | ||
})); | ||
// ... and precise. | ||
assert(llvm::any_of(*SystemHeaderMap, [](const auto &KeyAndValue) { | ||
return std::distance( | ||
llvm::sys::path::begin(KeyAndValue.first, | ||
llvm::sys::path::Style::posix), | ||
llvm::sys::path::end(KeyAndValue.first)) == | ||
MaxSuffixComponents; | ||
})); | ||
Comment on lines
+235
to
+249
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think we can move these assertions into the lambda, no need to perform them every time we access the singleton |
||
|
||
Instance.SuffixHeaderMapping = SystemHeaderMap; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you're modifying a global with every call here, in a non-thread safe way. |
||
return Instance; | ||
} | ||
|
||
// Returns the overridden verbatim spellings for HeaderPath that can be | ||
// directly included. | ||
llvm::SmallVector<llvm::StringRef> | ||
mapHeader(llvm::StringRef HeaderPath) const { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about making this one static instead and getting rid of the extra singleton interface? |
||
if (!SuffixHeaderMapping) | ||
return {}; | ||
|
||
int Components = 1; | ||
llvm::SmallVector<llvm::StringRef> Headers; | ||
for (auto It = llvm::sys::path::rbegin(HeaderPath), | ||
End = llvm::sys::path::rend(HeaderPath); | ||
It != End && Components <= MaxSuffixComponents; ++It, ++Components) { | ||
auto SubPath = HeaderPath.substr(It->data() - HeaderPath.begin()); | ||
auto FoundRange = SuffixHeaderMapping->equal_range(SubPath); | ||
for (auto It = FoundRange.first; It != FoundRange.second; ++It) | ||
Headers.push_back(It->second); | ||
} | ||
return Headers;; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. extra |
||
} | ||
|
||
private: | ||
// A map from a suffix path to a verbatim header spelling (with <>). | ||
const std::multimap<llvm::StringRef, llvm::StringRef> | ||
*SuffixHeaderMapping = nullptr; | ||
}; | ||
|
||
} // namespace | ||
|
||
llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc, | ||
|
@@ -188,6 +290,20 @@ llvm::SmallVector<Hinted<Header>> findHeaders(const SymbolLocation &Loc, | |
OptionalFileEntryRef FE = SM.getFileEntryRefForID(FID); | ||
if (!FE) | ||
return {}; | ||
|
||
if (SrcMgr::isSystem( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i am afraid this check might be overly strict, maybe drop this initially and see if we have too many false-positives? |
||
SM.getSLocEntry(FID).getFile().getFileCharacteristic())) { | ||
if (auto MappingHeader = SystemIncludeMap::get().mapHeader(FE->getName()); | ||
!MappingHeader.empty()) { | ||
for (auto &Header : MappingHeader) | ||
Results.emplace_back(Header, Hints::PublicHeader); | ||
assert(!Results.empty()); | ||
Results.front().Hint |= Hints::PreferredHeader; | ||
// FIXME: should we include the original header as well? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, i think we should treat these similar to IWYU private -> public mappings. |
||
return Results; | ||
} | ||
} | ||
|
||
if (!PI) | ||
return {{*FE, Hints::PublicHeader | Hints::OriginHeader}}; | ||
bool IsOrigin = true; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we discussed offline that this list is mostly for demonstration purposes, but wanted to remind it once again. we probably want to start with at least what we have in clangd, right?