Skip to content

Commit f07c00d

Browse files
WIP: Add support for manually specifiying gems for cross-repo.
1 parent 0e21b96 commit f07c00d

File tree

10 files changed

+252
-38
lines changed

10 files changed

+252
-38
lines changed

common/common.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ bool sorbet::FileOps::ensureDir(string_view path) {
9090

9191
std::string sorbet::FileOps::getCurrentDir() {
9292
char buf[MAXPATHLEN + 1];
93-
getcwd(buf, sizeof(buf));
93+
(void)getcwd(buf, sizeof(buf));
9494
return std::string(buf);
9595
}
9696

core/GlobalState.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1663,6 +1663,7 @@ FileRef GlobalState::enterFile(const shared_ptr<File> &file) {
16631663
}
16641664

16651665
files.emplace_back(file);
1666+
// GlobalState initialization guarantees the 0 slot will be taken, so this is OK.
16661667
auto ret = FileRef(filesUsed() - 1);
16671668
fileRefByPath[string(file->path())] = ret;
16681669
return ret;

docs/scip-ruby/CLI.md

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,51 @@ For example, with Git, you can use the last tag
2020
string. For repos which index every commit, you could also use the SHA
2121
instead (`git rev-parse HEAD`).
2222

23+
## `--gem-map-path <arg>`
24+
25+
At the moment, scip-ruby requires an extra step for cross-repo
26+
code navigation; you need to supply information about which.
27+
file belongs to which gem explicitly, using a newline-delimited
28+
JSON file in the following format:
29+
30+
<!--
31+
TODO: Uncomment this
32+
By default, `scip-ruby` will attempt to identify which gems the
33+
ingested files belong to based on the standard layout of paths
34+
as used by Bundler. If it can't identify the gems for certain files,
35+
it will print a warning. This may happen if you're using a custom
36+
build system or different filesystem layout.
37+
38+
To get correct cross-repo code navigation, you can explicitly
39+
supply information about files and gems using a supplementary
40+
newline-delimited JSON file in the following format:
41+
-->
42+
```json
43+
{"path": "a/b/c.rb", "gem": "[email protected]"}
44+
{"path": "a/b/d.rb", "gem": "[email protected]"}
45+
{"path": "a/x/y.rb", "gem": "[email protected]"}
46+
...
47+
```
48+
49+
Then pass the path to the JSON file:
50+
51+
```bash
52+
scip-ruby --gem-map-path path/to/cross-repo-metadata.json
53+
```
54+
55+
If information about the gem being indexed is not present in the JSON file,
56+
and cannot be inferred from the filesystem, then you can supply
57+
the `--gem-metadata` argument as described earlier.
58+
59+
## `--index-file <arg>`
60+
61+
The path for emitting the SCIP index. Defaults to `index.scip`.
62+
2363
## `--unquiet-errors`
2464

2565
scip-ruby defaults to running in Sorbet's quiet mode, as scip-ruby supports
2666
indexing `# typed: false` files on a best-effort basis, but Sorbet may
2767
rightfully flag many errors in those files. The number of errors can be
2868
overwhelming if there is a large amount of untyped code.
2969

30-
This flag restores Sorbet's default behavior.
70+
This flag restores Sorbet's default behavior.

scip_indexer/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ cc_library(
6161
"@com_google_absl//absl/strings",
6262
"@com_google_absl//absl/synchronization",
6363
"@cxxopts",
64+
"@rapidjson",
6465
"@spdlog",
6566
],
6667
)

scip_indexer/SCIPGemMetadata.cc

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@
88
#include <vector>
99

1010
#include "absl/strings/str_replace.h"
11+
12+
#include "common/common.h"
13+
#include "core/FileRef.h"
14+
15+
// Uses ENFORCE as defined by common/common.h, so put this later
16+
#include "rapidjson/document.h"
17+
1118
#include "scip_indexer/SCIPGemMetadata.h"
1219

1320
using namespace std;
@@ -169,4 +176,61 @@ pair<GemMetadata, vector<GemMetadataError>> GemMetadata::readFromConfig(const Fi
169176
return {GemMetadata(name.value_or(currentDirName()), version.value_or("latest")), errors};
170177
}
171178

179+
GemMapping::GemMapping() {
180+
// The 'ruby' namespace is reserved by RubyGems.org, so we can use that here.
181+
// Unfortunately, there are already gems which use the names stdlib and ruby-stdlib,
182+
// so not using those here.
183+
// FIXME: How do we get the version reliably?
184+
this->stdlibGem = make_shared<GemMetadata>(GemMetadata::tryParse("[email protected]").value());
185+
}
186+
187+
optional<shared_ptr<GemMetadata>> GemMapping::lookupGemForFile(const core::GlobalState &gs, core::FileRef file) const {
188+
auto it = this->map.find(file);
189+
if (it != this->map.end()) {
190+
return it->second;
191+
}
192+
auto filepath = file.data(gs).path();
193+
if (absl::StartsWith(filepath, core::File::URL_PREFIX)) {
194+
return this->stdlibGem;
195+
}
196+
if (this->currentGem.has_value()) {
197+
// TODO Should we enforce here in debug builds?
198+
// Fallback to this if set, to avoid collisions with other gems.
199+
return this->currentGem.value();
200+
}
201+
return nullopt;
202+
}
203+
204+
void GemMapping::populateFromNDJSON(const core::GlobalState &gs, const std::string &ndjsonText) {
205+
istringstream input(ndjsonText);
206+
for (string line; getline(input, line);) {
207+
auto jsonLine = std::string(absl::StripAsciiWhitespace(line));
208+
if (jsonLine.empty()) {
209+
continue;
210+
}
211+
rapidjson::Document document;
212+
document.Parse(jsonLine);
213+
if (document.HasParseError() || !document.IsObject()) {
214+
continue;
215+
}
216+
if (document.HasMember("path") && document["path"].IsString() && document.HasMember("gem") &&
217+
document["gem"].IsString()) {
218+
// TODO: handle path normalization
219+
auto fileRef = gs.findFileByPath(document["path"].GetString());
220+
if (!fileRef.exists()) {
221+
continue;
222+
}
223+
auto gemMetadata = GemMetadata::tryParse(document["gem"].GetString());
224+
if (!gemMetadata.has_value()) {
225+
continue;
226+
}
227+
this->map[fileRef] = make_shared<GemMetadata>(move(gemMetadata.value()));
228+
}
229+
}
230+
}
231+
232+
void GemMapping::markCurrentGem(GemMetadata gem) {
233+
this->currentGem = make_shared<GemMetadata>(gem);
234+
}
235+
172236
} // namespace sorbet::scip_indexer

scip_indexer/SCIPGemMetadata.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
#ifndef SORBET_SCIP_GEM_METADATA
22
#define SORBET_SCIP_GEM_METADATA
33

4+
#include <memory>
45
#include <optional>
56
#include <string>
67
#include <utility>
78

89
#include "absl/strings/str_split.h"
910

1011
#include "common/FileSystem.h"
12+
#include "common/common.h"
13+
#include "core/FileRef.h"
14+
#include "core/GlobalState.h"
1115

1216
namespace sorbet::scip_indexer {
1317

@@ -71,6 +75,23 @@ class GemMetadata final {
7175
static std::pair<GemMetadata, std::vector<GemMetadataError>> readFromGemspec(const std::string &);
7276
};
7377

78+
// Type carrying gem information for each file, which is used during
79+
// symbol emission to ensure correct symbol names for cross-repo.
80+
class GemMapping final {
81+
std::shared_ptr<GemMetadata> stdlibGem;
82+
std::optional<std::shared_ptr<GemMetadata>> currentGem;
83+
UnorderedMap<core::FileRef, std::shared_ptr<GemMetadata>> map;
84+
85+
public:
86+
GemMapping();
87+
88+
std::optional<std::shared_ptr<GemMetadata>> lookupGemForFile(const core::GlobalState &gs, core::FileRef file) const;
89+
90+
void populateFromNDJSON(const core::GlobalState &, const std::string &ndjsonContents);
91+
92+
void markCurrentGem(GemMetadata gem);
93+
};
94+
7495
} // namespace sorbet::scip_indexer
7596

7697
#endif // SORBET_SCIP_GEM_METADATA

scip_indexer/SCIPIndexer.cc

Lines changed: 68 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ class SCIPState {
181181
//
182182
// FIXME(varun): This seems redundant, get rid of it.
183183

184-
GemMetadata gemMetadata;
184+
GemMapping gemMap;
185185

186186
public:
187187
UnorderedMap<core::FileRef, vector<scip::Occurrence>> occurrenceMap;
@@ -207,9 +207,9 @@ class SCIPState {
207207
vector<scip::SymbolInformation> externalSymbols;
208208

209209
public:
210-
SCIPState(GemMetadata metadata)
210+
SCIPState(GemMapping gemMap)
211211
: symbolStringCache(), localOccurrenceCache(), symbolOccurrenceCache(), potentialRefOnlySymbols(),
212-
gemMetadata(metadata), occurrenceMap(), emittedSymbols(), symbolMap(), documents(), externalSymbols() {}
212+
gemMap(gemMap), occurrenceMap(), emittedSymbols(), symbolMap(), documents(), externalSymbols() {}
213213
~SCIPState() = default;
214214
SCIPState(SCIPState &&) = default;
215215
SCIPState &operator=(SCIPState &&other) = default;
@@ -241,7 +241,7 @@ class SCIPState {
241241
status = scip::utils::emitSymbolString(*symbol, output);
242242
} else {
243243
scip::Symbol symbol;
244-
status = symRef.symbolForExpr(gs, this->gemMetadata, {}, symbol);
244+
status = symRef.symbolForExpr(gs, this->gemMap, {}, symbol);
245245
if (!status.ok()) {
246246
return status;
247247
}
@@ -402,7 +402,7 @@ class SCIPState {
402402
auto occLoc = loc.has_value() ? core::Loc(file, loc.value()) : symRef.symbolLoc(gs);
403403
scip::Symbol symbol;
404404
auto untypedSymRef = symRef.withoutType();
405-
auto status = untypedSymRef.symbolForExpr(gs, this->gemMetadata, occLoc, symbol);
405+
auto status = untypedSymRef.symbolForExpr(gs, this->gemMap, occLoc, symbol);
406406
if (!status.ok()) {
407407
return status;
408408
}
@@ -1114,18 +1114,40 @@ namespace sorbet::pipeline::semantic_extension {
11141114
using LocalSymbolTable = UnorderedMap<core::LocalVariable, core::Loc>;
11151115

11161116
class SCIPSemanticExtension : public SemanticExtension {
1117-
public:
11181117
string indexFilePath;
1119-
string gemMetadataString;
1120-
scip_indexer::GemMetadata gemMetadata;
1118+
string gemMetadataArg;
1119+
string gemMapPath;
1120+
scip_indexer::GemMapping gemMap;
11211121

11221122
using SCIPState = sorbet::scip_indexer::SCIPState;
11231123

1124+
public:
1125+
using StateMap = UnorderedMap<thread::id, shared_ptr<SCIPState>>;
1126+
1127+
private:
11241128
mutable struct {
1125-
UnorderedMap<thread::id, shared_ptr<SCIPState>> states;
1129+
StateMap states;
11261130
absl::Mutex mtx;
11271131
} mutableState;
11281132

1133+
public:
1134+
SCIPSemanticExtension(string indexFilePath, string gemMetadataArg, string gemMapPath,
1135+
scip_indexer::GemMapping gemMap, StateMap states)
1136+
: indexFilePath(indexFilePath), gemMetadataArg(gemMetadataArg), gemMapPath(gemMapPath),
1137+
gemMap(gemMap), mutableState{states, {}} {}
1138+
1139+
~SCIPSemanticExtension() {}
1140+
1141+
virtual unique_ptr<SemanticExtension> deepCopy(const core::GlobalState &from, core::GlobalState &to) override {
1142+
// FIXME: Technically, this would copy the state too, but we haven't implemented
1143+
// deep-copying for it because it doesn't matter for scip-ruby.
1144+
StateMap map;
1145+
return make_unique<SCIPSemanticExtension>(this->indexFilePath, this->gemMetadataArg, this->gemMapPath,
1146+
this->gemMap, map);
1147+
};
1148+
virtual void merge(const core::GlobalState &from, core::GlobalState &to, core::NameSubstitution &subst) override{};
1149+
1150+
private:
11291151
// Return a shared_ptr here as we need a stable address to avoid an invalid reference
11301152
// on table resizing, and we need to maintain at least two pointers,
11311153
// an "owning ref" from the table and a mutable borrow from the caller.
@@ -1141,24 +1163,26 @@ class SCIPSemanticExtension : public SemanticExtension {
11411163

11421164
// We will move the state out later, so use a no-op deleter.
11431165
return mutableState.states[this_thread::get_id()] =
1144-
shared_ptr<SCIPState>(new SCIPState(gemMetadata), [](SCIPState *) {});
1166+
shared_ptr<SCIPState>(new SCIPState(gemMap), [](SCIPState *) {});
11451167
}
11461168
}
11471169

11481170
bool doNothing() const {
11491171
return this->indexFilePath.empty();
11501172
}
11511173

1174+
public:
11521175
void run(core::MutableContext &ctx, ast::ClassDef *cd) const override {}
11531176

11541177
virtual void prepareForTypechecking(const core::GlobalState &gs) override {
1155-
auto maybeMetadata = scip_indexer::GemMetadata::tryParse(this->gemMetadataString);
1178+
auto maybeMetadata = scip_indexer::GemMetadata::tryParse(this->gemMetadataArg);
1179+
scip_indexer::GemMetadata currentGem;
11561180
if (maybeMetadata.has_value()) {
1157-
this->gemMetadata = maybeMetadata.value();
1181+
currentGem = maybeMetadata.value();
11581182
} // TODO: Issue error for incorrect format in string...
1159-
if (this->gemMetadata.name().empty() || this->gemMetadata.version().empty()) {
1160-
auto [metadata, errors] = scip_indexer::GemMetadata::readFromConfig(OSFileSystem());
1161-
this->gemMetadata = metadata;
1183+
if (currentGem.name().empty() || currentGem.version().empty()) {
1184+
auto [gem, errors] = scip_indexer::GemMetadata::readFromConfig(OSFileSystem());
1185+
currentGem = gem;
11621186
for (auto &error : errors) {
11631187
if (auto e = gs.beginError(core::Loc(), scip_indexer::SCIPRubyDebug)) {
11641188
e.setHeader("{}: {}",
@@ -1167,6 +1191,10 @@ class SCIPSemanticExtension : public SemanticExtension {
11671191
}
11681192
}
11691193
}
1194+
this->gemMap.markCurrentGem(currentGem);
1195+
if (!this->gemMapPath.empty()) {
1196+
this->gemMap.populateFromNDJSON(gs, OSFileSystem().readFile(this->gemMapPath));
1197+
}
11701198
};
11711199

11721200
virtual void finishTypecheckFile(const core::GlobalState &gs, const core::FileRef &file) const override {
@@ -1269,27 +1297,25 @@ class SCIPSemanticExtension : public SemanticExtension {
12691297
traversal.traverse(cfg);
12701298
scipStateRef.clearFunctionLocalCaches();
12711299
}
1272-
1273-
virtual unique_ptr<SemanticExtension> deepCopy(const core::GlobalState &from, core::GlobalState &to) override {
1274-
return make_unique<SCIPSemanticExtension>(this->indexFilePath, this->gemMetadataString, this->gemMetadata);
1275-
};
1276-
virtual void merge(const core::GlobalState &from, core::GlobalState &to, core::NameSubstitution &subst) override{};
1277-
1278-
SCIPSemanticExtension(string indexFilePath, string gemMetadataString, scip_indexer::GemMetadata gemMetadata)
1279-
: indexFilePath(indexFilePath), gemMetadataString(gemMetadataString), gemMetadata(gemMetadata), mutableState() {
1280-
}
1281-
~SCIPSemanticExtension() {}
12821300
};
12831301

12841302
class SCIPSemanticExtensionProvider : public SemanticExtensionProvider {
12851303
public:
12861304
void injectOptions(cxxopts::Options &optsBuilder) const override {
1287-
optsBuilder.add_options("indexer")("index-file", "Output SCIP index to a directory, which must already exist",
1288-
cxxopts::value<string>())(
1289-
"gem-metadata",
1305+
// clang-format off
1306+
optsBuilder.add_options("indexer")
1307+
("index-file",
1308+
"Output SCIP index to a directory, which must already exist",
1309+
cxxopts::value<string>())
1310+
("gem-metadata",
12901311
"Metadata in 'name@version' format to be used for cross-repository code navigation. For repositories which "
12911312
"index every commit, the SHA should be used for the version instead of a git tag (or equivalent).",
1313+
cxxopts::value<string>())
1314+
("gem-map-path",
1315+
"Path to newline delimited JSON file which describes how to map paths to gem name and versions. See the"
1316+
" scip-ruby docs on GitHub for information about the JSON schema.",
12921317
cxxopts::value<string>());
1318+
// clang-format on
12931319
};
12941320
unique_ptr<SemanticExtension> readOptions(cxxopts::ParseResult &providedOptions) const override {
12951321
if (providedOptions.count("version") > 0) {
@@ -1308,12 +1334,22 @@ class SCIPSemanticExtensionProvider : public SemanticExtensionProvider {
13081334
} else {
13091335
indexFilePath = "index.scip";
13101336
}
1311-
auto gemMetadataString =
1312-
providedOptions.count("gem-metadata") > 0 ? providedOptions["gem-metadata"].as<string>() : "";
1313-
return make_unique<SCIPSemanticExtension>(indexFilePath, gemMetadataString, scip_indexer::GemMetadata());
1337+
string gemMapPath{};
1338+
if (providedOptions.count("gem-map-path") > 0) {
1339+
gemMapPath = providedOptions["gem-map-path"].as<string>();
1340+
}
1341+
string gemMetadataArg{};
1342+
if (providedOptions.count("gem-metadata") > 0) {
1343+
gemMetadataArg = providedOptions["gem-metadata"].as<string>();
1344+
}
1345+
scip_indexer::GemMapping gemMap{};
1346+
SCIPSemanticExtension::StateMap stateMap;
1347+
return make_unique<SCIPSemanticExtension>(indexFilePath, gemMetadataArg, gemMapPath, gemMap, stateMap);
13141348
};
13151349
virtual unique_ptr<SemanticExtension> defaultInstance() const override {
1316-
return make_unique<SCIPSemanticExtension>("index.scip", "", scip_indexer::GemMetadata());
1350+
scip_indexer::GemMapping gemMap{};
1351+
SCIPSemanticExtension::StateMap stateMap;
1352+
return make_unique<SCIPSemanticExtension>("index.scip", "", "", gemMap, stateMap);
13171353
};
13181354
static vector<SemanticExtensionProvider *> getProviders();
13191355
virtual ~SCIPSemanticExtensionProvider() = default;

0 commit comments

Comments
 (0)