From d8efcec0b00742e8c8ea9be0721e0fb67c255f66 Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Thu, 8 May 2025 12:51:17 +0000 Subject: [PATCH] fix: Strip common arch-specific flags before processing --- indexer/CommandLineCleaner.cc | 102 +++++++++++++++++++++++++++++ indexer/CommandLineCleaner.h | 37 +++++++++++ indexer/CompilationDatabase.cc | 115 ++++++++++----------------------- test/test_main.cc | 28 ++++++++ 4 files changed, 200 insertions(+), 82 deletions(-) create mode 100644 indexer/CommandLineCleaner.cc create mode 100644 indexer/CommandLineCleaner.h diff --git a/indexer/CommandLineCleaner.cc b/indexer/CommandLineCleaner.cc new file mode 100644 index 00000000..df6a83a7 --- /dev/null +++ b/indexer/CommandLineCleaner.cc @@ -0,0 +1,102 @@ +#include "llvm/ADT/StringRef.h" + +#include "indexer/CommandLineCleaner.h" +#include "indexer/Enforce.h" + +namespace { + +enum class Action { + Keep, + ZapOne, + ZapTwo, +}; + +template +void zap(std::vector &vs, absl::FunctionRef check) { + bool dropFromBefore = false; + auto zappedBegin = + std::stable_partition(vs.begin(), vs.end(), [&](const T &v) -> bool { + if (dropFromBefore) { + dropFromBefore = false; + return false; + } + switch (check(v)) { + case Action::Keep: + return true; + case Action::ZapOne: + return false; + case Action::ZapTwo: + dropFromBefore = true; + return false; + } + }); + vs.resize(std::distance(vs.begin(), zappedBegin)); +} + +// Strip out architecture specific flags, because scip-clang may +// be used to index code which relies on architectures known only +// to GCC, or only to some proprietary compilers. +constexpr const char *clangGccSkipOptionsWithArgs[] = { + "-march", + "-mcpu", + "-mtune", +}; + +// Patterns of arg-less options to strip out. +// +// For example, Clang supports -mfix-cortex-a53-835769 (so does GCC) +// but GCC supports -mfix-cortex-a53-843419 which is not supported by Clang. +// +// In practice, options starting with '-m' seem to all correspond to +// ABI-related options (which ~never affect codenav). However, we cannot +// simply use '-m.*' as the pattern here, because some options with '-m' +// take an argument and some do not, and there isn't an easy programmatic +// way to determine which ones do/do not. +constexpr const char *clangGccSkipOptionsNoArgsPattern = "-m(no-)?fix-.*"; + +} // namespace + +namespace scip_clang::compdb { + +void CommandLineCleaner::clean(std::vector &commandLine) const { + zap(commandLine, [this](const std::string &arg) -> Action { + if (!arg.starts_with('-')) { + return Action::Keep; + } + std::string_view flag = arg; + auto eqIndex = arg.find('='); + if (eqIndex != std::string::npos) { + flag = std::string_view(arg.data(), eqIndex); + } else if (this->noArgumentMatcher + && this->noArgumentMatcher->match(llvm::StringRef(arg))) { + return Action::ZapOne; + } + auto it = this->toZap.find(flag); + if (it == this->toZap.end()) { + return Action::Keep; + } + switch (it->second) { + case CliOptionKind::NoArgument: + return Action::ZapOne; + case CliOptionKind::OneArgument: + if (flag.size() == arg.size()) { + return Action::ZapTwo; + } + return Action::ZapOne; + } + ENFORCE(false, "should've exited earlier"); + }); +} + +std::unique_ptr CommandLineCleaner::forClangOrGcc() { + CommandLineCleaner::MapType toZap; + for (auto s : clangGccSkipOptionsWithArgs) { + toZap.emplace(std::string_view(s), CliOptionKind::NoArgument); + } + CommandLineCleaner cleaner{ + .toZap = std::move(toZap), + .noArgumentMatcher = {llvm::Regex(clangGccSkipOptionsNoArgsPattern)}}; + return std::make_unique(std::move(cleaner)); +} + +} // namespace scip_clang::compdb \ No newline at end of file diff --git a/indexer/CommandLineCleaner.h b/indexer/CommandLineCleaner.h new file mode 100644 index 00000000..41fac24a --- /dev/null +++ b/indexer/CommandLineCleaner.h @@ -0,0 +1,37 @@ +#ifndef SCIP_CLANG_COMMAND_LINE_CLEANER_H +#define SCIP_CLANG_COMMAND_LINE_CLEANER_H + +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "llvm/Support/Regex.h" + +namespace scip_clang::compdb { + +enum class CliOptionKind { + NoArgument, + OneArgument, +}; + +struct CommandLineCleaner { + using MapType = absl::flat_hash_map; + // Fixed list of options for which the command-line arguments should be + // zapped. If CliOptionKind is NoArgument, then only one string will be + // zapped. If CliOptionKind is OneArgument, then two successive strings will + // be zapped. + MapType toZap; + // Optional matcher for zapping arguments more flexibly. + // This is to allow for handling unknown flags which match a particular + // pattern. For known flags, put them in toZap. + std::optional noArgumentMatcher; + + void clean(std::vector &commandLine) const; + + static std::unique_ptr forClangOrGcc(); +}; + +} // namespace scip_clang::compdb + +#endif \ No newline at end of file diff --git a/indexer/CompilationDatabase.cc b/indexer/CompilationDatabase.cc index c9d96125..1a62ccd1 100644 --- a/indexer/CompilationDatabase.cc +++ b/indexer/CompilationDatabase.cc @@ -21,6 +21,7 @@ #include "rapidjson/reader.h" #include "spdlog/fmt/fmt.h" +#include "indexer/CommandLineCleaner.h" #include "indexer/CompilationDatabase.h" #include "indexer/FileSystem.h" #include "indexer/LlvmCommandLineParsing.h" @@ -73,13 +74,16 @@ namespace { using AbsolutePath = scip_clang::AbsolutePath; using ToolchainInfo = scip_clang::compdb::ToolchainInfo; +using CommandLineCleaner = scip_clang::compdb::CommandLineCleaner; using CompilerKind = scip_clang::compdb::CompilerKind; +using CliOptionKind = scip_clang::compdb::CliOptionKind; struct ClangToolchainInfo : public ToolchainInfo { std::string resourceDir; std::vector findResourceDirInvocation; std::string compilerDriverPath; std::vector findDriverInvocation; + std::unique_ptr cleaner; // All strings and vectors above should be non-empty for // a valid toolchain. @@ -87,11 +91,13 @@ struct ClangToolchainInfo : public ToolchainInfo { ClangToolchainInfo(std::string resourceDir, std::vector findResourceDirInvocation, std::string compilerDriverPath, - std::vector findDriverInvocation) + std::vector findDriverInvocation, + std::unique_ptr cleaner) : ToolchainInfo(), resourceDir(resourceDir), findResourceDirInvocation(findResourceDirInvocation), compilerDriverPath(compilerDriverPath), - findDriverInvocation(findDriverInvocation){}; + findDriverInvocation(findDriverInvocation), + cleaner(std::move(cleaner)){}; virtual CompilerKind kind() const override { return CompilerKind::Clang; @@ -116,6 +122,7 @@ struct ClangToolchainInfo : public ToolchainInfo { virtual void adjustCommandLine(std::vector &commandLine) const override { commandLine[0] = this->compilerDriverPath; + this->cleaner->clean(commandLine); commandLine.push_back("-resource-dir"); commandLine.push_back(this->resourceDir); } @@ -165,18 +172,21 @@ struct ClangToolchainInfo : public ToolchainInfo { return std::make_unique( resourceDir, findResourceDirInvocation, compilerDriverPath, - findDriverInvocation); + findDriverInvocation, CommandLineCleaner::forClangOrGcc()); } }; struct GccToolchainInfo : public ToolchainInfo { std::string installDir; std::vector findInstallDirInvocation; + std::unique_ptr cleaner; GccToolchainInfo(std::string installDir, - std::vector findInstallDirInvocation) + std::vector findInstallDirInvocation, + std::unique_ptr cleaner) : ToolchainInfo(), installDir(installDir), - findInstallDirInvocation(findInstallDirInvocation) {} + findInstallDirInvocation(findInstallDirInvocation), + cleaner(std::move(cleaner)) {} virtual CompilerKind kind() const override { return CompilerKind::Gcc; @@ -194,6 +204,7 @@ struct GccToolchainInfo : public ToolchainInfo { virtual void adjustCommandLine(std::vector &commandLine) const override { + this->cleaner->clean(commandLine); commandLine.push_back("-resource-dir"); commandLine.push_back(this->installDir); // gcc-7 adds headers like limits.h and syslimits.h in include-fixed @@ -227,21 +238,17 @@ struct GccToolchainInfo : public ToolchainInfo { return nullptr; } spdlog::debug("found gcc install directory at {}", installDir); - return std::make_unique(installDir, - findSearchDirsInvocation); + return std::make_unique( + installDir, findSearchDirsInvocation, + CommandLineCleaner::forClangOrGcc()); } }; -enum class NvccOptionType { - NoArgument, - OneArgument, -}; - // Based on nvcc --help from nvcc version V12.2.140 // Build cuda_12.2.r12.2/compiler.33191640_0 // clang-format off -constexpr const char* skipOptionsNoArgs[] = { +constexpr const char* nvccSkipOptionsNoArgs[] = { "--cuda", "-cuda", "--cubin", "-cubin", "--fatbin", "-fatbin", @@ -303,7 +310,7 @@ constexpr const char* skipOptionsNoArgs[] = { "--host-relocatable-link", "-r" }; -constexpr const char* skipOptionsWithArgs[] = { +constexpr const char* nvccSkipOptionsWithArgs[] = { "--cudart", "-cudart", "--cudadevrt", "-cudadevrt", "--libdevice-directory", "-ldir", @@ -361,18 +368,19 @@ struct NvccToolchainInfo : public ToolchainInfo { /// doesn't even construct the appropriate CUDAKernelCallExpr values. std::unique_ptr clangInfo; - absl::flat_hash_map toBeSkipped; + CommandLineCleaner cleaner; NvccToolchainInfo(AbsolutePath cudaDir) : ToolchainInfo(), cudaDir(cudaDir), clangInfo(nullptr) { - for (auto s : skipOptionsNoArgs) { - this->toBeSkipped.emplace(std::string_view(s), - NvccOptionType::NoArgument); + CommandLineCleaner::MapType toZap; + for (auto s : nvccSkipOptionsNoArgs) { + toZap.emplace(std::string_view(s), CliOptionKind::NoArgument); } - for (auto s : skipOptionsWithArgs) { - this->toBeSkipped.emplace(std::string_view(s), - NvccOptionType::OneArgument); + for (auto s : nvccSkipOptionsWithArgs) { + toZap.emplace(std::string_view(s), CliOptionKind::OneArgument); } + this->cleaner = + CommandLineCleaner{.toZap = toZap, .noArgumentMatcher = std::nullopt}; // TODO: In principle, we could pick up Clang from -ccbin but that // requires more plumbing; it would require using the -ccbin arg @@ -412,72 +420,15 @@ struct NvccToolchainInfo : public ToolchainInfo { return true; } - enum class ArgumentProcessing { - Keep, - DropCurrent, - DropCurrentAndNextIffBothPresent, - }; - - ArgumentProcessing handleArgument(const std::string &arg) const { - if (!arg.starts_with('-')) { - return ArgumentProcessing::Keep; - } - std::string_view substr = arg; - auto eqIndex = arg.find('='); - if (eqIndex != std::string::npos) { - substr = std::string_view(arg.data(), eqIndex); - } - auto it = this->toBeSkipped.find(substr); - if (it == this->toBeSkipped.end()) { - return ArgumentProcessing::Keep; - } - switch (it->second) { - case NvccOptionType::NoArgument: - return ArgumentProcessing::DropCurrent; - case NvccOptionType::OneArgument: - if (substr.size() == arg.size()) { - return ArgumentProcessing::DropCurrentAndNextIffBothPresent; - } - return ArgumentProcessing::DropCurrent; - } - ENFORCE(false, "should've exited earlier"); - } - - void removeUnknownArguments(std::vector &commandLine) const { - absl::flat_hash_set drop{}; - for (size_t i = 0; i < commandLine.size(); ++i) { - switch (this->handleArgument(commandLine[i])) { - case ArgumentProcessing::Keep: - continue; - case ArgumentProcessing::DropCurrent: - drop.insert(i); - continue; - case ArgumentProcessing::DropCurrentAndNextIffBothPresent: - if (i + 1 < commandLine.size()) { - drop.insert(i); - drop.insert(i + 1); - } - } - } - std::vector tmp; - tmp.reserve(commandLine.size() - drop.size()); - for (size_t i = 0; i < commandLine.size(); ++i) { - if (!drop.contains(i)) { - tmp.push_back(std::move(commandLine[i])); - } - } - std::swap(tmp, commandLine); - } - virtual void adjustCommandLine(std::vector &commandLine) const override { - this->removeUnknownArguments(commandLine); - commandLine.push_back( - fmt::format("-isystem{}{}include", this->cudaDir.asStringRef(), - std::filesystem::path::preferred_separator)); + this->cleaner.clean(commandLine); if (this->clangInfo) { this->clangInfo->adjustCommandLine(commandLine); } + commandLine.push_back( + fmt::format("-isystem{}{}include", this->cudaDir.asStringRef(), + std::filesystem::path::preferred_separator)); } static std::unique_ptr diff --git a/test/test_main.cc b/test/test_main.cc index 2dd1f3f7..2df6b998 100644 --- a/test/test_main.cc +++ b/test/test_main.cc @@ -24,6 +24,7 @@ #include "scip/scip.pb.h" #include "indexer/CliOptions.h" +#include "indexer/CommandLineCleaner.h" #include "indexer/CompilationDatabase.h" #include "indexer/Enforce.h" #include "indexer/FileSystem.h" @@ -149,6 +150,33 @@ TEST_CASE("UNIT_TESTS") { fmt::join(gotPrefixes, ", "))); } } + + { + struct CommandLineCleaningTestCase { + std::vector before; + std::vector after; + }; + std::vector testCases{ + {{ + .before = {"gcc", "-mcpu=arm7tdmi", "-mthumb", "-mtune=arm7tdmi", + "-march=armv4t", "tmp1.c"}, + .after = {"gcc", "-mthumb", "tmp1.c"}, + }, + { + .before = {"gcc", "-mfix-cortex-a53-843419", "tmp2.c"}, + .after = {"gcc", "tmp2.c"}, + }}}; + auto cleaner = scip_clang::compdb::CommandLineCleaner::forClangOrGcc(); + for (auto &testCase : testCases) { + std::vector input = testCase.before; + cleaner->clean(input); + CHECK_MESSAGE(absl::c_equal(testCase.after, input), + fmt::format("cleaned command-line invocation:\n expected: " + "{}\n actual: {}", + fmt::join(testCase.after, " "), + fmt::join(input, " "))); + } + } }; TEST_CASE("COMPDB_PARSING") {