Skip to content

feat: Add support for loops and conditionals #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 10, 2022
5 changes: 5 additions & 0 deletions core/LocOffsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class MutableContext;

constexpr int INVALID_POS_LOC = 0xfffffff;
struct LocOffsets {
template <typename H> friend H AbslHashValue(H h, const LocOffsets &m);

uint32_t beginLoc = INVALID_POS_LOC;
uint32_t endLoc = INVALID_POS_LOC;
uint32_t beginPos() const {
Expand Down Expand Up @@ -51,6 +53,9 @@ struct LocOffsets {
};
CheckSize(LocOffsets, 8, 4);

template <typename H> H AbslHashValue(H h, const LocOffsets &m) {
return H::combine(std::move(h), m.beginLoc, m.endLoc);
}
} // namespace sorbet::core

#endif // SORBET_CORE_LOCOFFSETS_H
131 changes: 96 additions & 35 deletions scip_indexer/SCIPIndexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "cfg/CFG.h"
#include "common/common.h"
#include "common/sort.h"
#include "core/ErrorQueue.h"
#include "core/Loc.h"
#include "core/SymbolRef.h"
#include "core/Symbols.h"
Expand Down Expand Up @@ -67,11 +68,6 @@ template <typename T, typename Fn> static string vec_to_string(const vector<T> v
return out.str();
}

static string bbvec_to_string(const vector<sorbet::cfg::BasicBlock *> &parents) {
return vec_to_string(
parents, [](const auto &ptr) -> auto { return fmt::format("(id: {}, ptr: {})", ptr->id, (void *)ptr); });
};

template <typename T> static void drain(vector<T> &input, vector<T> &output) {
output.reserve(output.size() + input.size());
for (auto &v : input) {
Expand Down Expand Up @@ -109,7 +105,7 @@ static bool isTemporary(const core::GlobalState &gs, const core::LocalVariable &
}
auto n = var._name;
return n == Names::blockPreCallTemp() || n == Names::blockTemp() || n == Names::blockPassTemp() ||
n == Names::forTemp() || n == Names::blkArg() || n == Names::blockCall() ||
n == Names::blkArg() || n == Names::blockCall() || n == Names::blockBreakAssign() || n == Names::forTemp() ||
// Insert checks because sometimes temporaries are initialized with a 0 unique value. 😬
n == Names::finalReturn() || n == NameRef::noName() || n == Names::blockCall() || n == Names::selfLocal() ||
n == Names::unconditional();
Expand Down Expand Up @@ -198,6 +194,16 @@ class SCIPState {
public:
UnorderedMap<core::FileRef, vector<scip::Occurrence>> occurrenceMap;
UnorderedMap<core::FileRef, vector<scip::SymbolInformation>> symbolMap;
// Cache of occurrences for locals that have been emitted in this function.
//
// Note that the SymbolRole is a part of the key too, because we can
// have a read-reference and write-reference at the same location
// (we don't merge those for now).
//
// The 'value' in the map is purely for sanity-checking. It's a bit
// cumbersome to conditionalize the type to be a set in non-debug and
// map in debug, so keeping it a map.
UnorderedMap<std::pair<core::LocOffsets, /*SymbolRole*/ int32_t>, uint32_t> localOccurrenceCache;
vector<scip::Document> documents;
vector<scip::SymbolInformation> externalSymbols;

Expand Down Expand Up @@ -271,14 +277,52 @@ class SCIPState {
return absl::OkStatus();
}

// Returns true if there was a cache hit.
//
// Otherwise, inserts the location into the cache and returns false.
bool cacheOccurrence(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbolRoles) {
// Optimization:
// Avoid emitting duplicate defs/refs for locals.
// This can happen with constructs like:
// z = if cond then expr else expr end
// When this is lowered to a CFG, we will end up with
// multiple bindings with the same LHS location.
//
// Repeated reads for the same occurrence can happen for rvalues too.
// If the compiler has proof that a scrutinee variable is not modified,
// each comparison in a case will perform a read.
// case x
// when 0 then <stuff>
// when 1 then <stuff>
// else <stuff>
// end
// The CFG for this involves two reads for x in calls to ==.
// (This wouldn't have happened if x was always stashed away into
// a temporary first, but the temporary only appears in the CFG if
// evaluating one of the cases has a chance to modify x.)
auto [it, inserted] = this->localOccurrenceCache.insert({{occ.offsets, symbolRoles}, occ.counter});
if (inserted) {
return false;
}
auto savedCounter = it->second;
ENFORCE(occ.counter == savedCounter, "cannot have distinct local variable {} at same location {}",
(symbolRoles & scip::SymbolRole::Definition) ? "definitions" : "references",
core::Loc(file, occ.offsets).showRaw(gs));
return true;
}

public:
absl::Status saveDefinition(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ) {
if (this->cacheOccurrence(gs, file, occ, scip::SymbolRole::Definition)) {
return absl::OkStatus();
}
return this->saveDefinitionImpl(gs, file, occ.toString(gs, file), core::Loc(file, occ.offsets));
}

// Save definition when you have a sorbet Symbol.
// Meant for methods, fields etc., but not local variables.
absl::Status saveDefinition(const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef) {
// TODO:(varun) Should we cache here too to avoid emitting duplicate definitions?
scip::Symbol symbol;
auto status = symbolForExpr(gs, symRef, symbol);
if (!status.ok()) {
Expand All @@ -297,11 +341,15 @@ class SCIPState {
}

absl::Status saveReference(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbol_roles) {
if (this->cacheOccurrence(gs, file, occ, symbol_roles)) {
return absl::OkStatus();
}
return this->saveReferenceImpl(gs, file, occ.toString(gs, file), occ.offsets, symbol_roles);
}

absl::Status saveReference(const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef,
core::LocOffsets occLoc, int32_t symbol_roles) {
// TODO:(varun) Should we cache here to to avoid emitting duplicate references?
absl::StatusOr<string *> valueOrStatus(this->saveSymbolString(gs, symRef, nullptr));
if (!valueOrStatus.ok()) {
return valueOrStatus.status();
Expand Down Expand Up @@ -357,7 +405,29 @@ core::SymbolRef lookupRecursive(const core::GlobalState &gs, const core::SymbolR
}

class CFGTraversal final {
UnorderedMap<const cfg::BasicBlock *, UnorderedMap<cfg::LocalRef, core::Loc>> blockLocals;
// A map from each basic block to the locals in it.
//
// The locals may be coming from the parents, or they may be defined in the
// block. Locals coming from the parents may be in the form of basic block
// arguments or they may be "directly referenced."
//
// For example, if you have code like:
//
// def f(x):
// y = 0
// if cond:
// y = x + $z
//
// Then x and $z will be passed as arguments to the basic block for the
// true branch, whereas 'y' won't be. However, 'y' is still technically
// coming from the parent basic block, otherwise we'd end up marking the
// assignment as a definition instead of a (write) reference.
//
// At the start of the traversal of a basic block, the entry for a basic
// block is populated with the locals coming from the parents. Then,
// we traverse each instruction and populate it with the locals defined
// in the block.
UnorderedMap<const cfg::BasicBlock *, UnorderedSet<cfg::LocalRef>> blockLocals;
UnorderedMap<cfg::LocalRef, uint32_t> functionLocals;

// Local variable counter that is reset for every function.
Expand All @@ -370,8 +440,8 @@ class CFGTraversal final {
: blockLocals(), functionLocals(), scipState(scipState), ctx(ctx) {}

private:
void addLocal(const cfg::BasicBlock *bb, cfg::LocalRef localRef, core::Loc loc) {
this->blockLocals[bb][localRef] = loc;
void addLocal(const cfg::BasicBlock *bb, cfg::LocalRef localRef) {
this->blockLocals[bb].insert(localRef);
this->functionLocals[localRef] = ++this->counter;
}

Expand Down Expand Up @@ -405,7 +475,7 @@ class CFGTraversal final {
if (!this->functionLocals.contains(localRef)) {
isDefinition = true; // If we're seeing this for the first time in topological order,
// The current block must have a definition for the variable.
this->addLocal(bb, localRef, this->ctx.locAt(local.loc));
this->addLocal(bb, localRef);
}
// The variable wasn't passed in as an argument, and hasn't already been recorded
// as a local in the block. So this must be a definition line.
Expand All @@ -419,7 +489,7 @@ class CFGTraversal final {
// Ill-formed code where we're trying to access a variable
// without setting it first. Emit a local as a best-effort.
// TODO(varun): Will Sorbet error out before we get here?
this->addLocal(bb, localRef, this->ctx.locAt(local.loc));
this->addLocal(bb, localRef);
}
// TODO(varun): Will Sorbet error out before we get here?
// It's possible that we have ill-formed code where the variable
Expand Down Expand Up @@ -448,42 +518,34 @@ class CFGTraversal final {
return true;
}

void addArgLocals(cfg::BasicBlock *bb, const cfg::CFG &cfg) {
this->blockLocals[bb] = {};
for (auto &bbArgs : bb->args) {
bool found = false;
for (auto parentBB : bb->backEdges) {
if (this->blockLocals[parentBB].contains(bbArgs.variable)) {
this->blockLocals[bb][bbArgs.variable] = this->blockLocals[parentBB][bbArgs.variable];
found = true;
break;
}
void copyLocalsFromParents(cfg::BasicBlock *bb, const cfg::CFG &cfg) {
UnorderedSet<cfg::LocalRef> bbLocals{};
for (auto parentBB : bb->backEdges) {
if (!this->blockLocals.contains(parentBB)) { // e.g. loops
continue;
}
if (!found) {
print_dbg("# basic block argument {} did not come from parents {}\n",
bbArgs.toString(this->ctx.state, cfg), bbvec_to_string(bb->backEdges));
auto &parentLocals = this->blockLocals[parentBB];
if (bbLocals.size() + parentLocals.size() > bbLocals.capacity()) {
bbLocals.reserve(bbLocals.size() + parentLocals.size());
}
for (auto local : parentLocals) {
bbLocals.insert(local);
}
}
ENFORCE(!this->blockLocals.contains(bb));
this->blockLocals[bb] = std::move(bbLocals);
}

public:
void traverse(const cfg::CFG &cfg) {
auto &gs = this->ctx.state;
auto method = this->ctx.owner;

auto print_map = [&cfg, &gs](const UnorderedMap<cfg::LocalRef, core::Loc> &map, cfg::BasicBlock *ptr) {
print_dbg("# blockLocals (id: {}, ptr: {}) = {}\n", ptr->id, (void *)ptr,
map_to_string(
map, [&](const auto &localref, const auto &loc) -> auto {
return fmt::format("{} {}", localref.data(cfg).toString(gs), loc.showRaw(gs));
}));
};

// I don't fully understand the doc comment for forwardsTopoSort; it seems backwards in practice.
for (auto it = cfg.forwardsTopoSort.rbegin(); it != cfg.forwardsTopoSort.rend(); ++it) {
cfg::BasicBlock *bb = *it;
print_dbg("# Looking at block id: {} ptr: {}\n", bb->id, (void *)bb);
this->addArgLocals(bb, cfg);
this->copyLocalsFromParents(bb, cfg);
for (auto &binding : bb->exprs) {
if (auto *aliasInstr = cfg::cast_instruction<cfg::Alias>(binding.value)) {
auto aliasName = aliasInstr->name;
Expand All @@ -497,7 +559,7 @@ class CFGTraversal final {
method.toString(gs));
continue;
}
this->addLocal(bb, binding.bind.variable, aliasedSym.loc(gs));
this->addLocal(bb, binding.bind.variable);
continue;
}
if (!binding.loc.exists() || binding.loc.empty()) { // TODO(varun): When can each case happen?
Expand Down Expand Up @@ -599,7 +661,6 @@ class CFGTraversal final {
}
}
}
print_map(this->blockLocals.at(bb), bb);
}
}
};
Expand Down
120 changes: 120 additions & 0 deletions test/scip/testdata/loops_and_conditionals.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# typed: true

def if_elsif_else()
x = 0
y = 0
# Basic stuff
if x == 1
y = 2
elsif x == 2
y = 3
else
y = x
end

# More complex expressiosn
z =
if if x == 0 then x+1 else x+2 end == 1
x
else
x+1
end
z = z if z != 10

return
end

def unless()
z = 0
x = 1
unless z == 9
z = 9
end

unless x == 10
x = 3
else
x = 2
end
return
end

def case(x, y)
case x
when 0
x = 3
when y
x = 2
when (3 == (x = 1))
x = 0
else
x = 1
end
return
end

def for(xs)
for e in xs
puts e
end

for f in xs
g = f+1
next if g == 0
next g+1 if g == 1
break if g == 2
break g+1 if g == 3
# NOTE: redo is unsupported (https://srb.help/3003)
# but emitting a reference here does work
redo if g == 4
end
end

def while(xs)
i = 0
while i < 10
puts xs[i]
end

j = 0
while j < 10
g = xs[j]
next if g == 0
next g+1 if g == 1
break if g == 2
break g+1 if g == 3
# NOTE: redo is unsupported (https://srb.help/3003)
# but emitting a reference here does work
redo if g == 4
end
end

def until(xs)
i = 0
until i > 10
puts xs[i]
end

j = 0
until j > 10
g = xs[j]
next if g == 0
next g+1 if g == 1
break if g == 2
break g+1 if g == 3
# NOTE: redo is unsupported (https://srb.help/3003)
# but emitting a reference here does work
redo if g == 4
end
end

def flip_flop(xs)
# NOTE: flip-flops are unsupported (https://srb.help/3003)
# Unlike redo, which somehow works, we fail to emit references
# for the conditions.
# Keep this test anyways to check that we don't crash/mess something up
for x in xs
puts x if x==2..x==8
puts x+1 if x==4...x==6
end
end
Loading