Skip to content

fix: Strip common arch-specific flags before processing #488

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions indexer/CommandLineCleaner.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#include "llvm/ADT/StringRef.h"

#include "indexer/CommandLineCleaner.h"
#include "indexer/Enforce.h"

namespace {

enum class Action {
Keep,
ZapOne,
ZapTwo,
};

template <typename T>
void zap(std::vector<T> &vs, absl::FunctionRef<Action(const T &)> check) {
bool dropFromBefore = false;
auto zappedBegin =
std::stable_partition(vs.begin(), vs.end(), [&](const T &v) -> bool {
if (dropFromBefore) {
dropFromBefore = false;
return false;
}
switch (check(v)) {
case Action::Keep:
return true;
case Action::ZapOne:
return false;
case Action::ZapTwo:
dropFromBefore = true;
return false;
}
});
vs.resize(std::distance(vs.begin(), zappedBegin));
}

// Strip out architecture specific flags, because scip-clang may
// be used to index code which relies on architectures known only
// to GCC, or only to some proprietary compilers.
constexpr const char *clangGccSkipOptionsWithArgs[] = {
"-march",
"-mcpu",
"-mtune",
};

// Patterns of arg-less options to strip out.
//
// For example, Clang supports -mfix-cortex-a53-835769 (so does GCC)
// but GCC supports -mfix-cortex-a53-843419 which is not supported by Clang.
//
// In practice, options starting with '-m' seem to all correspond to
// ABI-related options (which ~never affect codenav). However, we cannot
// simply use '-m.*' as the pattern here, because some options with '-m'
// take an argument and some do not, and there isn't an easy programmatic
// way to determine which ones do/do not.
constexpr const char *clangGccSkipOptionsNoArgsPattern = "-m(no-)?fix-.*";

} // namespace

namespace scip_clang::compdb {

void CommandLineCleaner::clean(std::vector<std::string> &commandLine) const {
zap<std::string>(commandLine, [this](const std::string &arg) -> Action {
if (!arg.starts_with('-')) {
return Action::Keep;
}
std::string_view flag = arg;
auto eqIndex = arg.find('=');
if (eqIndex != std::string::npos) {
flag = std::string_view(arg.data(), eqIndex);
} else if (this->noArgumentMatcher
&& this->noArgumentMatcher->match(llvm::StringRef(arg))) {
return Action::ZapOne;
}
auto it = this->toZap.find(flag);
if (it == this->toZap.end()) {
return Action::Keep;
}
switch (it->second) {
case CliOptionKind::NoArgument:
return Action::ZapOne;
case CliOptionKind::OneArgument:
if (flag.size() == arg.size()) {
return Action::ZapTwo;
}
return Action::ZapOne;
}
ENFORCE(false, "should've exited earlier");
});
}

std::unique_ptr<CommandLineCleaner> CommandLineCleaner::forClangOrGcc() {
CommandLineCleaner::MapType toZap;
for (auto s : clangGccSkipOptionsWithArgs) {
toZap.emplace(std::string_view(s), CliOptionKind::NoArgument);
}
CommandLineCleaner cleaner{
.toZap = std::move(toZap),
.noArgumentMatcher = {llvm::Regex(clangGccSkipOptionsNoArgsPattern)}};
return std::make_unique<CommandLineCleaner>(std::move(cleaner));
}

} // namespace scip_clang::compdb
37 changes: 37 additions & 0 deletions indexer/CommandLineCleaner.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef SCIP_CLANG_COMMAND_LINE_CLEANER_H
#define SCIP_CLANG_COMMAND_LINE_CLEANER_H

#include <memory>
#include <string>
#include <vector>

#include "absl/container/flat_hash_map.h"
#include "llvm/Support/Regex.h"

namespace scip_clang::compdb {

enum class CliOptionKind {
NoArgument,
OneArgument,
};

struct CommandLineCleaner {
using MapType = absl::flat_hash_map<std::string_view, CliOptionKind>;
// Fixed list of options for which the command-line arguments should be
// zapped. If CliOptionKind is NoArgument, then only one string will be
// zapped. If CliOptionKind is OneArgument, then two successive strings will
// be zapped.
MapType toZap;
// Optional matcher for zapping arguments more flexibly.
// This is to allow for handling unknown flags which match a particular
// pattern. For known flags, put them in toZap.
std::optional<llvm::Regex> noArgumentMatcher;

void clean(std::vector<std::string> &commandLine) const;

static std::unique_ptr<CommandLineCleaner> forClangOrGcc();
};

} // namespace scip_clang::compdb

#endif
115 changes: 33 additions & 82 deletions indexer/CompilationDatabase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "rapidjson/reader.h"
#include "spdlog/fmt/fmt.h"

#include "indexer/CommandLineCleaner.h"
#include "indexer/CompilationDatabase.h"
#include "indexer/FileSystem.h"
#include "indexer/LlvmCommandLineParsing.h"
Expand Down Expand Up @@ -73,25 +74,30 @@ namespace {

using AbsolutePath = scip_clang::AbsolutePath;
using ToolchainInfo = scip_clang::compdb::ToolchainInfo;
using CommandLineCleaner = scip_clang::compdb::CommandLineCleaner;
using CompilerKind = scip_clang::compdb::CompilerKind;
using CliOptionKind = scip_clang::compdb::CliOptionKind;

struct ClangToolchainInfo : public ToolchainInfo {
std::string resourceDir;
std::vector<std::string> findResourceDirInvocation;
std::string compilerDriverPath;
std::vector<std::string> findDriverInvocation;
std::unique_ptr<CommandLineCleaner> cleaner;

// All strings and vectors above should be non-empty for
// a valid toolchain.

ClangToolchainInfo(std::string resourceDir,
std::vector<std::string> findResourceDirInvocation,
std::string compilerDriverPath,
std::vector<std::string> findDriverInvocation)
std::vector<std::string> findDriverInvocation,
std::unique_ptr<CommandLineCleaner> cleaner)
: ToolchainInfo(), resourceDir(resourceDir),
findResourceDirInvocation(findResourceDirInvocation),
compilerDriverPath(compilerDriverPath),
findDriverInvocation(findDriverInvocation){};
findDriverInvocation(findDriverInvocation),
cleaner(std::move(cleaner)){};

virtual CompilerKind kind() const override {
return CompilerKind::Clang;
Expand All @@ -116,6 +122,7 @@ struct ClangToolchainInfo : public ToolchainInfo {
virtual void
adjustCommandLine(std::vector<std::string> &commandLine) const override {
commandLine[0] = this->compilerDriverPath;
this->cleaner->clean(commandLine);
commandLine.push_back("-resource-dir");
commandLine.push_back(this->resourceDir);
}
Expand Down Expand Up @@ -165,18 +172,21 @@ struct ClangToolchainInfo : public ToolchainInfo {

return std::make_unique<ClangToolchainInfo>(
resourceDir, findResourceDirInvocation, compilerDriverPath,
findDriverInvocation);
findDriverInvocation, CommandLineCleaner::forClangOrGcc());
}
};

struct GccToolchainInfo : public ToolchainInfo {
std::string installDir;
std::vector<std::string> findInstallDirInvocation;
std::unique_ptr<CommandLineCleaner> cleaner;

GccToolchainInfo(std::string installDir,
std::vector<std::string> findInstallDirInvocation)
std::vector<std::string> findInstallDirInvocation,
std::unique_ptr<CommandLineCleaner> cleaner)
: ToolchainInfo(), installDir(installDir),
findInstallDirInvocation(findInstallDirInvocation) {}
findInstallDirInvocation(findInstallDirInvocation),
cleaner(std::move(cleaner)) {}

virtual CompilerKind kind() const override {
return CompilerKind::Gcc;
Expand All @@ -194,6 +204,7 @@ struct GccToolchainInfo : public ToolchainInfo {

virtual void
adjustCommandLine(std::vector<std::string> &commandLine) const override {
this->cleaner->clean(commandLine);
commandLine.push_back("-resource-dir");
commandLine.push_back(this->installDir);
// gcc-7 adds headers like limits.h and syslimits.h in include-fixed
Expand Down Expand Up @@ -227,21 +238,17 @@ struct GccToolchainInfo : public ToolchainInfo {
return nullptr;
}
spdlog::debug("found gcc install directory at {}", installDir);
return std::make_unique<GccToolchainInfo>(installDir,
findSearchDirsInvocation);
return std::make_unique<GccToolchainInfo>(
installDir, findSearchDirsInvocation,
CommandLineCleaner::forClangOrGcc());
}
};

enum class NvccOptionType {
NoArgument,
OneArgument,
};

// Based on nvcc --help from nvcc version V12.2.140
// Build cuda_12.2.r12.2/compiler.33191640_0

// clang-format off
constexpr const char* skipOptionsNoArgs[] = {
constexpr const char* nvccSkipOptionsNoArgs[] = {
"--cuda", "-cuda",
"--cubin", "-cubin",
"--fatbin", "-fatbin",
Expand Down Expand Up @@ -303,7 +310,7 @@ constexpr const char* skipOptionsNoArgs[] = {
"--host-relocatable-link", "-r"
};

constexpr const char* skipOptionsWithArgs[] = {
constexpr const char* nvccSkipOptionsWithArgs[] = {
"--cudart", "-cudart",
"--cudadevrt", "-cudadevrt",
"--libdevice-directory", "-ldir",
Expand Down Expand Up @@ -361,18 +368,19 @@ struct NvccToolchainInfo : public ToolchainInfo {
/// doesn't even construct the appropriate CUDAKernelCallExpr values.
std::unique_ptr<ClangToolchainInfo> clangInfo;

absl::flat_hash_map<std::string_view, NvccOptionType> toBeSkipped;
CommandLineCleaner cleaner;

NvccToolchainInfo(AbsolutePath cudaDir)
: ToolchainInfo(), cudaDir(cudaDir), clangInfo(nullptr) {
for (auto s : skipOptionsNoArgs) {
this->toBeSkipped.emplace(std::string_view(s),
NvccOptionType::NoArgument);
CommandLineCleaner::MapType toZap;
for (auto s : nvccSkipOptionsNoArgs) {
toZap.emplace(std::string_view(s), CliOptionKind::NoArgument);
}
for (auto s : skipOptionsWithArgs) {
this->toBeSkipped.emplace(std::string_view(s),
NvccOptionType::OneArgument);
for (auto s : nvccSkipOptionsWithArgs) {
toZap.emplace(std::string_view(s), CliOptionKind::OneArgument);
}
this->cleaner =
CommandLineCleaner{.toZap = toZap, .noArgumentMatcher = std::nullopt};

// TODO: In principle, we could pick up Clang from -ccbin but that
// requires more plumbing; it would require using the -ccbin arg
Expand Down Expand Up @@ -412,72 +420,15 @@ struct NvccToolchainInfo : public ToolchainInfo {
return true;
}

enum class ArgumentProcessing {
Keep,
DropCurrent,
DropCurrentAndNextIffBothPresent,
};

ArgumentProcessing handleArgument(const std::string &arg) const {
if (!arg.starts_with('-')) {
return ArgumentProcessing::Keep;
}
std::string_view substr = arg;
auto eqIndex = arg.find('=');
if (eqIndex != std::string::npos) {
substr = std::string_view(arg.data(), eqIndex);
}
auto it = this->toBeSkipped.find(substr);
if (it == this->toBeSkipped.end()) {
return ArgumentProcessing::Keep;
}
switch (it->second) {
case NvccOptionType::NoArgument:
return ArgumentProcessing::DropCurrent;
case NvccOptionType::OneArgument:
if (substr.size() == arg.size()) {
return ArgumentProcessing::DropCurrentAndNextIffBothPresent;
}
return ArgumentProcessing::DropCurrent;
}
ENFORCE(false, "should've exited earlier");
}

void removeUnknownArguments(std::vector<std::string> &commandLine) const {
absl::flat_hash_set<size_t> drop{};
for (size_t i = 0; i < commandLine.size(); ++i) {
switch (this->handleArgument(commandLine[i])) {
case ArgumentProcessing::Keep:
continue;
case ArgumentProcessing::DropCurrent:
drop.insert(i);
continue;
case ArgumentProcessing::DropCurrentAndNextIffBothPresent:
if (i + 1 < commandLine.size()) {
drop.insert(i);
drop.insert(i + 1);
}
}
}
std::vector<std::string> tmp;
tmp.reserve(commandLine.size() - drop.size());
for (size_t i = 0; i < commandLine.size(); ++i) {
if (!drop.contains(i)) {
tmp.push_back(std::move(commandLine[i]));
}
}
std::swap(tmp, commandLine);
}

virtual void
adjustCommandLine(std::vector<std::string> &commandLine) const override {
this->removeUnknownArguments(commandLine);
commandLine.push_back(
fmt::format("-isystem{}{}include", this->cudaDir.asStringRef(),
std::filesystem::path::preferred_separator));
this->cleaner.clean(commandLine);
if (this->clangInfo) {
this->clangInfo->adjustCommandLine(commandLine);
}
commandLine.push_back(
fmt::format("-isystem{}{}include", this->cudaDir.asStringRef(),
std::filesystem::path::preferred_separator));
}

static std::unique_ptr<NvccToolchainInfo>
Expand Down
Loading