Skip to content

Commit 62dc778

Browse files
fix: Strip common arch-specific flags before processing
1 parent e00ddf5 commit 62dc778

File tree

4 files changed

+202
-82
lines changed

4 files changed

+202
-82
lines changed

indexer/CommandLineCleaner.cc

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
2+
#include "llvm/ADT/StringRef.h"
3+
4+
#include "indexer/CommandLineCleaner.h"
5+
#include "indexer/Enforce.h"
6+
7+
namespace {
8+
9+
enum class Action {
10+
Keep,
11+
ZapOne,
12+
ZapTwo,
13+
};
14+
15+
template <typename T>
16+
void zapVector(std::vector<T> &vs,
17+
absl::FunctionRef<Action(const T &)> zapAction) {
18+
bool dropFromBefore = false;
19+
auto zappedBegin =
20+
std::stable_partition(vs.begin(), vs.end(), [&](const T &v) -> bool {
21+
if (dropFromBefore) {
22+
dropFromBefore = false;
23+
return false;
24+
}
25+
switch (zapAction(v)) {
26+
case Action::Keep:
27+
return true;
28+
case Action::ZapOne:
29+
return false;
30+
case Action::ZapTwo:
31+
dropFromBefore = true;
32+
return false;
33+
}
34+
});
35+
vs.resize(std::distance(vs.begin(), zappedBegin));
36+
}
37+
38+
// Strip out architecture specific flags, because scip-clang may
39+
// be used to index code which relies on architectures known only
40+
// to GCC, or only to some proprietary compilers.
41+
constexpr const char *clangGccSkipOptionsWithArgs[] = {
42+
"-march",
43+
"-mcpu",
44+
"-mtune",
45+
};
46+
47+
// Patterns of arg-less options to strip out.
48+
//
49+
// For example, Clang supports -mfix-cortex-a53-835769 (so does GCC)
50+
// but GCC supports -mfix-cortex-a53-843419 which is not supported by Clang.
51+
//
52+
// In practice, options starting with '-m' seem to all correspond to
53+
// ABI-related options (which ~never affect codenav). However, we cannot
54+
// simply use '-m.*' as the pattern here, because some options with '-m'
55+
// take an argument and some do not, and there isn't an easy programmatic
56+
// way to determine which ones do/do not.
57+
constexpr const char *clangGccSkipOptionsNoArgsPattern = "-m(no-)?fix-.*";
58+
59+
} // namespace
60+
61+
namespace scip_clang::compdb {
62+
63+
void CommandLineCleaner::clean(std::vector<std::string> &commandLine) const {
64+
zapVector<std::string>(commandLine, [this](const std::string &arg) -> Action {
65+
if (!arg.starts_with('-')) {
66+
return Action::Keep;
67+
}
68+
std::string_view flag = arg;
69+
auto eqIndex = arg.find('=');
70+
if (eqIndex != std::string::npos) {
71+
flag = std::string_view(arg.data(), eqIndex);
72+
} else if (this->noArgumentMatcher
73+
&& this->noArgumentMatcher->match(llvm::StringRef(arg))) {
74+
return Action::ZapOne;
75+
}
76+
auto it = this->toZap.find(flag);
77+
if (it == this->toZap.end()) {
78+
return Action::Keep;
79+
}
80+
switch (it->second) {
81+
case CliOptionKind::NoArgument:
82+
return Action::ZapOne;
83+
case CliOptionKind::OneArgument:
84+
if (flag.size() == arg.size()) {
85+
return Action::ZapTwo;
86+
}
87+
return Action::ZapOne;
88+
}
89+
ENFORCE(false, "should've exited earlier");
90+
});
91+
}
92+
93+
std::unique_ptr<CommandLineCleaner> CommandLineCleaner::forClangOrGcc() {
94+
CommandLineCleaner::MapType toZap;
95+
for (auto s : clangGccSkipOptionsWithArgs) {
96+
toZap.emplace(std::string_view(s), CliOptionKind::NoArgument);
97+
}
98+
CommandLineCleaner cleaner{
99+
.toZap = std::move(toZap),
100+
.noArgumentMatcher = {llvm::Regex(clangGccSkipOptionsNoArgsPattern)}};
101+
return std::make_unique<CommandLineCleaner>(std::move(cleaner));
102+
}
103+
104+
} // namespace scip_clang::compdb

indexer/CommandLineCleaner.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#ifndef SCIP_CLANG_COMMAND_LINE_CLEANER_H
2+
#define SCIP_CLANG_COMMAND_LINE_CLEANER_H
3+
4+
#include <memory>
5+
#include <string>
6+
#include <vector>
7+
8+
#include "absl/container/flat_hash_map.h"
9+
#include "llvm/Support/Regex.h"
10+
11+
namespace scip_clang::compdb {
12+
13+
enum class CliOptionKind {
14+
NoArgument,
15+
OneArgument,
16+
};
17+
18+
struct CommandLineCleaner {
19+
using MapType = absl::flat_hash_map<std::string_view, CliOptionKind>;
20+
// Fixed list of options for which the command-line arguments should be
21+
// zapped. If CliOptionKind is NoArgument, then only one string will be
22+
// zapped. If CliOptionKind is OneArgument, then two successive strings will
23+
// be zapped.
24+
MapType toZap;
25+
// Optional matcher for zapping arguments more flexibly.
26+
// This is to allow for handling unknown flags which match a particular
27+
// pattern. For known flags, put them in toZap.
28+
std::optional<llvm::Regex> noArgumentMatcher;
29+
30+
void clean(std::vector<std::string> &commandLine) const;
31+
32+
static std::unique_ptr<CommandLineCleaner> forClangOrGcc();
33+
};
34+
35+
} // namespace scip_clang::compdb
36+
37+
#endif

indexer/CompilationDatabase.cc

Lines changed: 33 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "rapidjson/reader.h"
2222
#include "spdlog/fmt/fmt.h"
2323

24+
#include "indexer/CommandLineCleaner.h"
2425
#include "indexer/CompilationDatabase.h"
2526
#include "indexer/FileSystem.h"
2627
#include "indexer/LlvmCommandLineParsing.h"
@@ -73,25 +74,30 @@ namespace {
7374

7475
using AbsolutePath = scip_clang::AbsolutePath;
7576
using ToolchainInfo = scip_clang::compdb::ToolchainInfo;
77+
using CommandLineCleaner = scip_clang::compdb::CommandLineCleaner;
7678
using CompilerKind = scip_clang::compdb::CompilerKind;
79+
using CliOptionKind = scip_clang::compdb::CliOptionKind;
7780

7881
struct ClangToolchainInfo : public ToolchainInfo {
7982
std::string resourceDir;
8083
std::vector<std::string> findResourceDirInvocation;
8184
std::string compilerDriverPath;
8285
std::vector<std::string> findDriverInvocation;
86+
std::unique_ptr<CommandLineCleaner> cleaner;
8387

8488
// All strings and vectors above should be non-empty for
8589
// a valid toolchain.
8690

8791
ClangToolchainInfo(std::string resourceDir,
8892
std::vector<std::string> findResourceDirInvocation,
8993
std::string compilerDriverPath,
90-
std::vector<std::string> findDriverInvocation)
94+
std::vector<std::string> findDriverInvocation,
95+
std::unique_ptr<CommandLineCleaner> cleaner)
9196
: ToolchainInfo(), resourceDir(resourceDir),
9297
findResourceDirInvocation(findResourceDirInvocation),
9398
compilerDriverPath(compilerDriverPath),
94-
findDriverInvocation(findDriverInvocation){};
99+
findDriverInvocation(findDriverInvocation),
100+
cleaner(std::move(cleaner)){};
95101

96102
virtual CompilerKind kind() const override {
97103
return CompilerKind::Clang;
@@ -116,6 +122,7 @@ struct ClangToolchainInfo : public ToolchainInfo {
116122
virtual void
117123
adjustCommandLine(std::vector<std::string> &commandLine) const override {
118124
commandLine[0] = this->compilerDriverPath;
125+
this->cleaner->clean(commandLine);
119126
commandLine.push_back("-resource-dir");
120127
commandLine.push_back(this->resourceDir);
121128
}
@@ -165,18 +172,21 @@ struct ClangToolchainInfo : public ToolchainInfo {
165172

166173
return std::make_unique<ClangToolchainInfo>(
167174
resourceDir, findResourceDirInvocation, compilerDriverPath,
168-
findDriverInvocation);
175+
findDriverInvocation, CommandLineCleaner::forClangOrGcc());
169176
}
170177
};
171178

172179
struct GccToolchainInfo : public ToolchainInfo {
173180
std::string installDir;
174181
std::vector<std::string> findInstallDirInvocation;
182+
std::unique_ptr<CommandLineCleaner> cleaner;
175183

176184
GccToolchainInfo(std::string installDir,
177-
std::vector<std::string> findInstallDirInvocation)
185+
std::vector<std::string> findInstallDirInvocation,
186+
std::unique_ptr<CommandLineCleaner> cleaner)
178187
: ToolchainInfo(), installDir(installDir),
179-
findInstallDirInvocation(findInstallDirInvocation) {}
188+
findInstallDirInvocation(findInstallDirInvocation),
189+
cleaner(std::move(cleaner)) {}
180190

181191
virtual CompilerKind kind() const override {
182192
return CompilerKind::Gcc;
@@ -194,6 +204,7 @@ struct GccToolchainInfo : public ToolchainInfo {
194204

195205
virtual void
196206
adjustCommandLine(std::vector<std::string> &commandLine) const override {
207+
this->cleaner->clean(commandLine);
197208
commandLine.push_back("-resource-dir");
198209
commandLine.push_back(this->installDir);
199210
// gcc-7 adds headers like limits.h and syslimits.h in include-fixed
@@ -227,21 +238,17 @@ struct GccToolchainInfo : public ToolchainInfo {
227238
return nullptr;
228239
}
229240
spdlog::debug("found gcc install directory at {}", installDir);
230-
return std::make_unique<GccToolchainInfo>(installDir,
231-
findSearchDirsInvocation);
241+
return std::make_unique<GccToolchainInfo>(
242+
installDir, findSearchDirsInvocation,
243+
CommandLineCleaner::forClangOrGcc());
232244
}
233245
};
234246

235-
enum class NvccOptionType {
236-
NoArgument,
237-
OneArgument,
238-
};
239-
240247
// Based on nvcc --help from nvcc version V12.2.140
241248
// Build cuda_12.2.r12.2/compiler.33191640_0
242249

243250
// clang-format off
244-
constexpr const char* skipOptionsNoArgs[] = {
251+
constexpr const char* nvccSkipOptionsNoArgs[] = {
245252
"--cuda", "-cuda",
246253
"--cubin", "-cubin",
247254
"--fatbin", "-fatbin",
@@ -303,7 +310,7 @@ constexpr const char* skipOptionsNoArgs[] = {
303310
"--host-relocatable-link", "-r"
304311
};
305312

306-
constexpr const char* skipOptionsWithArgs[] = {
313+
constexpr const char* nvccSkipOptionsWithArgs[] = {
307314
"--cudart", "-cudart",
308315
"--cudadevrt", "-cudadevrt",
309316
"--libdevice-directory", "-ldir",
@@ -361,18 +368,19 @@ struct NvccToolchainInfo : public ToolchainInfo {
361368
/// doesn't even construct the appropriate CUDAKernelCallExpr values.
362369
std::unique_ptr<ClangToolchainInfo> clangInfo;
363370

364-
absl::flat_hash_map<std::string_view, NvccOptionType> toBeSkipped;
371+
CommandLineCleaner cleaner;
365372

366373
NvccToolchainInfo(AbsolutePath cudaDir)
367374
: ToolchainInfo(), cudaDir(cudaDir), clangInfo(nullptr) {
368-
for (auto s : skipOptionsNoArgs) {
369-
this->toBeSkipped.emplace(std::string_view(s),
370-
NvccOptionType::NoArgument);
375+
CommandLineCleaner::MapType toZap;
376+
for (auto s : nvccSkipOptionsNoArgs) {
377+
toZap.emplace(std::string_view(s), CliOptionKind::NoArgument);
371378
}
372-
for (auto s : skipOptionsWithArgs) {
373-
this->toBeSkipped.emplace(std::string_view(s),
374-
NvccOptionType::OneArgument);
379+
for (auto s : nvccSkipOptionsWithArgs) {
380+
toZap.emplace(std::string_view(s), CliOptionKind::OneArgument);
375381
}
382+
this->cleaner =
383+
CommandLineCleaner{.toZap = toZap, .noArgumentMatcher = std::nullopt};
376384

377385
// TODO: In principle, we could pick up Clang from -ccbin but that
378386
// requires more plumbing; it would require using the -ccbin arg
@@ -412,72 +420,15 @@ struct NvccToolchainInfo : public ToolchainInfo {
412420
return true;
413421
}
414422

415-
enum class ArgumentProcessing {
416-
Keep,
417-
DropCurrent,
418-
DropCurrentAndNextIffBothPresent,
419-
};
420-
421-
ArgumentProcessing handleArgument(const std::string &arg) const {
422-
if (!arg.starts_with('-')) {
423-
return ArgumentProcessing::Keep;
424-
}
425-
std::string_view substr = arg;
426-
auto eqIndex = arg.find('=');
427-
if (eqIndex != std::string::npos) {
428-
substr = std::string_view(arg.data(), eqIndex);
429-
}
430-
auto it = this->toBeSkipped.find(substr);
431-
if (it == this->toBeSkipped.end()) {
432-
return ArgumentProcessing::Keep;
433-
}
434-
switch (it->second) {
435-
case NvccOptionType::NoArgument:
436-
return ArgumentProcessing::DropCurrent;
437-
case NvccOptionType::OneArgument:
438-
if (substr.size() == arg.size()) {
439-
return ArgumentProcessing::DropCurrentAndNextIffBothPresent;
440-
}
441-
return ArgumentProcessing::DropCurrent;
442-
}
443-
ENFORCE(false, "should've exited earlier");
444-
}
445-
446-
void removeUnknownArguments(std::vector<std::string> &commandLine) const {
447-
absl::flat_hash_set<size_t> drop{};
448-
for (size_t i = 0; i < commandLine.size(); ++i) {
449-
switch (this->handleArgument(commandLine[i])) {
450-
case ArgumentProcessing::Keep:
451-
continue;
452-
case ArgumentProcessing::DropCurrent:
453-
drop.insert(i);
454-
continue;
455-
case ArgumentProcessing::DropCurrentAndNextIffBothPresent:
456-
if (i + 1 < commandLine.size()) {
457-
drop.insert(i);
458-
drop.insert(i + 1);
459-
}
460-
}
461-
}
462-
std::vector<std::string> tmp;
463-
tmp.reserve(commandLine.size() - drop.size());
464-
for (size_t i = 0; i < commandLine.size(); ++i) {
465-
if (!drop.contains(i)) {
466-
tmp.push_back(std::move(commandLine[i]));
467-
}
468-
}
469-
std::swap(tmp, commandLine);
470-
}
471-
472423
virtual void
473424
adjustCommandLine(std::vector<std::string> &commandLine) const override {
474-
this->removeUnknownArguments(commandLine);
475-
commandLine.push_back(
476-
fmt::format("-isystem{}{}include", this->cudaDir.asStringRef(),
477-
std::filesystem::path::preferred_separator));
425+
this->cleaner.clean(commandLine);
478426
if (this->clangInfo) {
479427
this->clangInfo->adjustCommandLine(commandLine);
480428
}
429+
commandLine.push_back(
430+
fmt::format("-isystem{}{}include", this->cudaDir.asStringRef(),
431+
std::filesystem::path::preferred_separator));
481432
}
482433

483434
static std::unique_ptr<NvccToolchainInfo>

0 commit comments

Comments
 (0)