Skip to content

Commit 3e37a95

Browse files
feat: Add support for loops and conditionals (#9)
* test:(wip) Add not-fully-working test for ifs. * fix: Propagate locals correctly across basic blocks This fixes an issue where writes inside an if-else chain on a previously assigned variable were marked as definitions. * optimization: De-duplicate emitted local lvalues. It leads to cleaner snapshot output and reduces payload size. * test: Add tests for unless and case. * optimization: De-duplicate emitted local rvalues too. Snapshot output for case scrutinees is verbose without it. * test: Add test cases using for loops. * fix: Remove spurious defs/refs for break statements * test: Add tests for while and until loops. * test: Add test for flip-flops.
1 parent 330ee66 commit 3e37a95

File tree

6 files changed

+472
-35
lines changed

6 files changed

+472
-35
lines changed

core/LocOffsets.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ class MutableContext;
1111

1212
constexpr int INVALID_POS_LOC = 0xfffffff;
1313
struct LocOffsets {
14+
template <typename H> friend H AbslHashValue(H h, const LocOffsets &m);
15+
1416
uint32_t beginLoc = INVALID_POS_LOC;
1517
uint32_t endLoc = INVALID_POS_LOC;
1618
uint32_t beginPos() const {
@@ -51,6 +53,9 @@ struct LocOffsets {
5153
};
5254
CheckSize(LocOffsets, 8, 4);
5355

56+
template <typename H> H AbslHashValue(H h, const LocOffsets &m) {
57+
return H::combine(std::move(h), m.beginLoc, m.endLoc);
58+
}
5459
} // namespace sorbet::core
5560

5661
#endif // SORBET_CORE_LOCOFFSETS_H

scip_indexer/SCIPIndexer.cc

Lines changed: 96 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "cfg/CFG.h"
2121
#include "common/common.h"
2222
#include "common/sort.h"
23+
#include "core/ErrorQueue.h"
2324
#include "core/Loc.h"
2425
#include "core/SymbolRef.h"
2526
#include "core/Symbols.h"
@@ -67,11 +68,6 @@ template <typename T, typename Fn> static string vec_to_string(const vector<T> v
6768
return out.str();
6869
}
6970

70-
static string bbvec_to_string(const vector<sorbet::cfg::BasicBlock *> &parents) {
71-
return vec_to_string(
72-
parents, [](const auto &ptr) -> auto { return fmt::format("(id: {}, ptr: {})", ptr->id, (void *)ptr); });
73-
};
74-
7571
template <typename T> static void drain(vector<T> &input, vector<T> &output) {
7672
output.reserve(output.size() + input.size());
7773
for (auto &v : input) {
@@ -109,7 +105,7 @@ static bool isTemporary(const core::GlobalState &gs, const core::LocalVariable &
109105
}
110106
auto n = var._name;
111107
return n == Names::blockPreCallTemp() || n == Names::blockTemp() || n == Names::blockPassTemp() ||
112-
n == Names::forTemp() || n == Names::blkArg() || n == Names::blockCall() ||
108+
n == Names::blkArg() || n == Names::blockCall() || n == Names::blockBreakAssign() || n == Names::forTemp() ||
113109
// Insert checks because sometimes temporaries are initialized with a 0 unique value. 😬
114110
n == Names::finalReturn() || n == NameRef::noName() || n == Names::blockCall() || n == Names::selfLocal() ||
115111
n == Names::unconditional();
@@ -198,6 +194,16 @@ class SCIPState {
198194
public:
199195
UnorderedMap<core::FileRef, vector<scip::Occurrence>> occurrenceMap;
200196
UnorderedMap<core::FileRef, vector<scip::SymbolInformation>> symbolMap;
197+
// Cache of occurrences for locals that have been emitted in this function.
198+
//
199+
// Note that the SymbolRole is a part of the key too, because we can
200+
// have a read-reference and write-reference at the same location
201+
// (we don't merge those for now).
202+
//
203+
// The 'value' in the map is purely for sanity-checking. It's a bit
204+
// cumbersome to conditionalize the type to be a set in non-debug and
205+
// map in debug, so keeping it a map.
206+
UnorderedMap<std::pair<core::LocOffsets, /*SymbolRole*/ int32_t>, uint32_t> localOccurrenceCache;
201207
vector<scip::Document> documents;
202208
vector<scip::SymbolInformation> externalSymbols;
203209

@@ -271,14 +277,52 @@ class SCIPState {
271277
return absl::OkStatus();
272278
}
273279

280+
// Returns true if there was a cache hit.
281+
//
282+
// Otherwise, inserts the location into the cache and returns false.
283+
bool cacheOccurrence(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbolRoles) {
284+
// Optimization:
285+
// Avoid emitting duplicate defs/refs for locals.
286+
// This can happen with constructs like:
287+
// z = if cond then expr else expr end
288+
// When this is lowered to a CFG, we will end up with
289+
// multiple bindings with the same LHS location.
290+
//
291+
// Repeated reads for the same occurrence can happen for rvalues too.
292+
// If the compiler has proof that a scrutinee variable is not modified,
293+
// each comparison in a case will perform a read.
294+
// case x
295+
// when 0 then <stuff>
296+
// when 1 then <stuff>
297+
// else <stuff>
298+
// end
299+
// The CFG for this involves two reads for x in calls to ==.
300+
// (This wouldn't have happened if x was always stashed away into
301+
// a temporary first, but the temporary only appears in the CFG if
302+
// evaluating one of the cases has a chance to modify x.)
303+
auto [it, inserted] = this->localOccurrenceCache.insert({{occ.offsets, symbolRoles}, occ.counter});
304+
if (inserted) {
305+
return false;
306+
}
307+
auto savedCounter = it->second;
308+
ENFORCE(occ.counter == savedCounter, "cannot have distinct local variable {} at same location {}",
309+
(symbolRoles & scip::SymbolRole::Definition) ? "definitions" : "references",
310+
core::Loc(file, occ.offsets).showRaw(gs));
311+
return true;
312+
}
313+
274314
public:
275315
absl::Status saveDefinition(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ) {
316+
if (this->cacheOccurrence(gs, file, occ, scip::SymbolRole::Definition)) {
317+
return absl::OkStatus();
318+
}
276319
return this->saveDefinitionImpl(gs, file, occ.toString(gs, file), core::Loc(file, occ.offsets));
277320
}
278321

279322
// Save definition when you have a sorbet Symbol.
280323
// Meant for methods, fields etc., but not local variables.
281324
absl::Status saveDefinition(const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef) {
325+
// TODO:(varun) Should we cache here too to avoid emitting duplicate definitions?
282326
scip::Symbol symbol;
283327
auto status = symbolForExpr(gs, symRef, symbol);
284328
if (!status.ok()) {
@@ -297,11 +341,15 @@ class SCIPState {
297341
}
298342

299343
absl::Status saveReference(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbol_roles) {
344+
if (this->cacheOccurrence(gs, file, occ, symbol_roles)) {
345+
return absl::OkStatus();
346+
}
300347
return this->saveReferenceImpl(gs, file, occ.toString(gs, file), occ.offsets, symbol_roles);
301348
}
302349

303350
absl::Status saveReference(const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef,
304351
core::LocOffsets occLoc, int32_t symbol_roles) {
352+
// TODO:(varun) Should we cache here to to avoid emitting duplicate references?
305353
absl::StatusOr<string *> valueOrStatus(this->saveSymbolString(gs, symRef, nullptr));
306354
if (!valueOrStatus.ok()) {
307355
return valueOrStatus.status();
@@ -357,7 +405,29 @@ core::SymbolRef lookupRecursive(const core::GlobalState &gs, const core::SymbolR
357405
}
358406

359407
class CFGTraversal final {
360-
UnorderedMap<const cfg::BasicBlock *, UnorderedMap<cfg::LocalRef, core::Loc>> blockLocals;
408+
// A map from each basic block to the locals in it.
409+
//
410+
// The locals may be coming from the parents, or they may be defined in the
411+
// block. Locals coming from the parents may be in the form of basic block
412+
// arguments or they may be "directly referenced."
413+
//
414+
// For example, if you have code like:
415+
//
416+
// def f(x):
417+
// y = 0
418+
// if cond:
419+
// y = x + $z
420+
//
421+
// Then x and $z will be passed as arguments to the basic block for the
422+
// true branch, whereas 'y' won't be. However, 'y' is still technically
423+
// coming from the parent basic block, otherwise we'd end up marking the
424+
// assignment as a definition instead of a (write) reference.
425+
//
426+
// At the start of the traversal of a basic block, the entry for a basic
427+
// block is populated with the locals coming from the parents. Then,
428+
// we traverse each instruction and populate it with the locals defined
429+
// in the block.
430+
UnorderedMap<const cfg::BasicBlock *, UnorderedSet<cfg::LocalRef>> blockLocals;
361431
UnorderedMap<cfg::LocalRef, uint32_t> functionLocals;
362432

363433
// Local variable counter that is reset for every function.
@@ -370,8 +440,8 @@ class CFGTraversal final {
370440
: blockLocals(), functionLocals(), scipState(scipState), ctx(ctx) {}
371441

372442
private:
373-
void addLocal(const cfg::BasicBlock *bb, cfg::LocalRef localRef, core::Loc loc) {
374-
this->blockLocals[bb][localRef] = loc;
443+
void addLocal(const cfg::BasicBlock *bb, cfg::LocalRef localRef) {
444+
this->blockLocals[bb].insert(localRef);
375445
this->functionLocals[localRef] = ++this->counter;
376446
}
377447

@@ -405,7 +475,7 @@ class CFGTraversal final {
405475
if (!this->functionLocals.contains(localRef)) {
406476
isDefinition = true; // If we're seeing this for the first time in topological order,
407477
// The current block must have a definition for the variable.
408-
this->addLocal(bb, localRef, this->ctx.locAt(local.loc));
478+
this->addLocal(bb, localRef);
409479
}
410480
// The variable wasn't passed in as an argument, and hasn't already been recorded
411481
// as a local in the block. So this must be a definition line.
@@ -419,7 +489,7 @@ class CFGTraversal final {
419489
// Ill-formed code where we're trying to access a variable
420490
// without setting it first. Emit a local as a best-effort.
421491
// TODO(varun): Will Sorbet error out before we get here?
422-
this->addLocal(bb, localRef, this->ctx.locAt(local.loc));
492+
this->addLocal(bb, localRef);
423493
}
424494
// TODO(varun): Will Sorbet error out before we get here?
425495
// It's possible that we have ill-formed code where the variable
@@ -448,42 +518,34 @@ class CFGTraversal final {
448518
return true;
449519
}
450520

451-
void addArgLocals(cfg::BasicBlock *bb, const cfg::CFG &cfg) {
452-
this->blockLocals[bb] = {};
453-
for (auto &bbArgs : bb->args) {
454-
bool found = false;
455-
for (auto parentBB : bb->backEdges) {
456-
if (this->blockLocals[parentBB].contains(bbArgs.variable)) {
457-
this->blockLocals[bb][bbArgs.variable] = this->blockLocals[parentBB][bbArgs.variable];
458-
found = true;
459-
break;
460-
}
521+
void copyLocalsFromParents(cfg::BasicBlock *bb, const cfg::CFG &cfg) {
522+
UnorderedSet<cfg::LocalRef> bbLocals{};
523+
for (auto parentBB : bb->backEdges) {
524+
if (!this->blockLocals.contains(parentBB)) { // e.g. loops
525+
continue;
461526
}
462-
if (!found) {
463-
print_dbg("# basic block argument {} did not come from parents {}\n",
464-
bbArgs.toString(this->ctx.state, cfg), bbvec_to_string(bb->backEdges));
527+
auto &parentLocals = this->blockLocals[parentBB];
528+
if (bbLocals.size() + parentLocals.size() > bbLocals.capacity()) {
529+
bbLocals.reserve(bbLocals.size() + parentLocals.size());
530+
}
531+
for (auto local : parentLocals) {
532+
bbLocals.insert(local);
465533
}
466534
}
535+
ENFORCE(!this->blockLocals.contains(bb));
536+
this->blockLocals[bb] = std::move(bbLocals);
467537
}
468538

469539
public:
470540
void traverse(const cfg::CFG &cfg) {
471541
auto &gs = this->ctx.state;
472542
auto method = this->ctx.owner;
473543

474-
auto print_map = [&cfg, &gs](const UnorderedMap<cfg::LocalRef, core::Loc> &map, cfg::BasicBlock *ptr) {
475-
print_dbg("# blockLocals (id: {}, ptr: {}) = {}\n", ptr->id, (void *)ptr,
476-
map_to_string(
477-
map, [&](const auto &localref, const auto &loc) -> auto {
478-
return fmt::format("{} {}", localref.data(cfg).toString(gs), loc.showRaw(gs));
479-
}));
480-
};
481-
482544
// I don't fully understand the doc comment for forwardsTopoSort; it seems backwards in practice.
483545
for (auto it = cfg.forwardsTopoSort.rbegin(); it != cfg.forwardsTopoSort.rend(); ++it) {
484546
cfg::BasicBlock *bb = *it;
485547
print_dbg("# Looking at block id: {} ptr: {}\n", bb->id, (void *)bb);
486-
this->addArgLocals(bb, cfg);
548+
this->copyLocalsFromParents(bb, cfg);
487549
for (auto &binding : bb->exprs) {
488550
if (auto *aliasInstr = cfg::cast_instruction<cfg::Alias>(binding.value)) {
489551
auto aliasName = aliasInstr->name;
@@ -497,7 +559,7 @@ class CFGTraversal final {
497559
method.toString(gs));
498560
continue;
499561
}
500-
this->addLocal(bb, binding.bind.variable, aliasedSym.loc(gs));
562+
this->addLocal(bb, binding.bind.variable);
501563
continue;
502564
}
503565
if (!binding.loc.exists() || binding.loc.empty()) { // TODO(varun): When can each case happen?
@@ -599,7 +661,6 @@ class CFGTraversal final {
599661
}
600662
}
601663
}
602-
print_map(this->blockLocals.at(bb), bb);
603664
}
604665
}
605666
};
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# typed: true
2+
3+
def if_elsif_else()
4+
x = 0
5+
y = 0
6+
# Basic stuff
7+
if x == 1
8+
y = 2
9+
elsif x == 2
10+
y = 3
11+
else
12+
y = x
13+
end
14+
15+
# More complex expressiosn
16+
z =
17+
if if x == 0 then x+1 else x+2 end == 1
18+
x
19+
else
20+
x+1
21+
end
22+
z = z if z != 10
23+
24+
return
25+
end
26+
27+
def unless()
28+
z = 0
29+
x = 1
30+
unless z == 9
31+
z = 9
32+
end
33+
34+
unless x == 10
35+
x = 3
36+
else
37+
x = 2
38+
end
39+
return
40+
end
41+
42+
def case(x, y)
43+
case x
44+
when 0
45+
x = 3
46+
when y
47+
x = 2
48+
when (3 == (x = 1))
49+
x = 0
50+
else
51+
x = 1
52+
end
53+
return
54+
end
55+
56+
def for(xs)
57+
for e in xs
58+
puts e
59+
end
60+
61+
for f in xs
62+
g = f+1
63+
next if g == 0
64+
next g+1 if g == 1
65+
break if g == 2
66+
break g+1 if g == 3
67+
# NOTE: redo is unsupported (https://srb.help/3003)
68+
# but emitting a reference here does work
69+
redo if g == 4
70+
end
71+
end
72+
73+
def while(xs)
74+
i = 0
75+
while i < 10
76+
puts xs[i]
77+
end
78+
79+
j = 0
80+
while j < 10
81+
g = xs[j]
82+
next if g == 0
83+
next g+1 if g == 1
84+
break if g == 2
85+
break g+1 if g == 3
86+
# NOTE: redo is unsupported (https://srb.help/3003)
87+
# but emitting a reference here does work
88+
redo if g == 4
89+
end
90+
end
91+
92+
def until(xs)
93+
i = 0
94+
until i > 10
95+
puts xs[i]
96+
end
97+
98+
j = 0
99+
until j > 10
100+
g = xs[j]
101+
next if g == 0
102+
next g+1 if g == 1
103+
break if g == 2
104+
break g+1 if g == 3
105+
# NOTE: redo is unsupported (https://srb.help/3003)
106+
# but emitting a reference here does work
107+
redo if g == 4
108+
end
109+
end
110+
111+
def flip_flop(xs)
112+
# NOTE: flip-flops are unsupported (https://srb.help/3003)
113+
# Unlike redo, which somehow works, we fail to emit references
114+
# for the conditions.
115+
# Keep this test anyways to check that we don't crash/mess something up
116+
for x in xs
117+
puts x if x==2..x==8
118+
puts x+1 if x==4...x==6
119+
end
120+
end

0 commit comments

Comments
 (0)