diff --git a/core/LocOffsets.h b/core/LocOffsets.h index 0359da16d2..94d421aec8 100644 --- a/core/LocOffsets.h +++ b/core/LocOffsets.h @@ -11,6 +11,8 @@ class MutableContext; constexpr int INVALID_POS_LOC = 0xfffffff; struct LocOffsets { + template friend H AbslHashValue(H h, const LocOffsets &m); + uint32_t beginLoc = INVALID_POS_LOC; uint32_t endLoc = INVALID_POS_LOC; uint32_t beginPos() const { @@ -51,6 +53,9 @@ struct LocOffsets { }; CheckSize(LocOffsets, 8, 4); +template H AbslHashValue(H h, const LocOffsets &m) { + return H::combine(std::move(h), m.beginLoc, m.endLoc); +} } // namespace sorbet::core #endif // SORBET_CORE_LOCOFFSETS_H diff --git a/scip_indexer/SCIPIndexer.cc b/scip_indexer/SCIPIndexer.cc index 8e79f857b6..c808867f35 100644 --- a/scip_indexer/SCIPIndexer.cc +++ b/scip_indexer/SCIPIndexer.cc @@ -20,6 +20,7 @@ #include "cfg/CFG.h" #include "common/common.h" #include "common/sort.h" +#include "core/ErrorQueue.h" #include "core/Loc.h" #include "core/SymbolRef.h" #include "core/Symbols.h" @@ -67,11 +68,6 @@ template static string vec_to_string(const vector v return out.str(); } -static string bbvec_to_string(const vector &parents) { - return vec_to_string( - parents, [](const auto &ptr) -> auto { return fmt::format("(id: {}, ptr: {})", ptr->id, (void *)ptr); }); -}; - template static void drain(vector &input, vector &output) { output.reserve(output.size() + input.size()); for (auto &v : input) { @@ -109,7 +105,7 @@ static bool isTemporary(const core::GlobalState &gs, const core::LocalVariable & } auto n = var._name; return n == Names::blockPreCallTemp() || n == Names::blockTemp() || n == Names::blockPassTemp() || - n == Names::forTemp() || n == Names::blkArg() || n == Names::blockCall() || + n == Names::blkArg() || n == Names::blockCall() || n == Names::blockBreakAssign() || n == Names::forTemp() || // Insert checks because sometimes temporaries are initialized with a 0 unique value. 😬 n == Names::finalReturn() || n == NameRef::noName() || n == Names::blockCall() || n == Names::selfLocal() || n == Names::unconditional(); @@ -198,6 +194,16 @@ class SCIPState { public: UnorderedMap> occurrenceMap; UnorderedMap> symbolMap; + // Cache of occurrences for locals that have been emitted in this function. + // + // Note that the SymbolRole is a part of the key too, because we can + // have a read-reference and write-reference at the same location + // (we don't merge those for now). + // + // The 'value' in the map is purely for sanity-checking. It's a bit + // cumbersome to conditionalize the type to be a set in non-debug and + // map in debug, so keeping it a map. + UnorderedMap, uint32_t> localOccurrenceCache; vector documents; vector externalSymbols; @@ -271,14 +277,52 @@ class SCIPState { return absl::OkStatus(); } + // Returns true if there was a cache hit. + // + // Otherwise, inserts the location into the cache and returns false. + bool cacheOccurrence(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbolRoles) { + // Optimization: + // Avoid emitting duplicate defs/refs for locals. + // This can happen with constructs like: + // z = if cond then expr else expr end + // When this is lowered to a CFG, we will end up with + // multiple bindings with the same LHS location. + // + // Repeated reads for the same occurrence can happen for rvalues too. + // If the compiler has proof that a scrutinee variable is not modified, + // each comparison in a case will perform a read. + // case x + // when 0 then + // when 1 then + // else + // end + // The CFG for this involves two reads for x in calls to ==. + // (This wouldn't have happened if x was always stashed away into + // a temporary first, but the temporary only appears in the CFG if + // evaluating one of the cases has a chance to modify x.) + auto [it, inserted] = this->localOccurrenceCache.insert({{occ.offsets, symbolRoles}, occ.counter}); + if (inserted) { + return false; + } + auto savedCounter = it->second; + ENFORCE(occ.counter == savedCounter, "cannot have distinct local variable {} at same location {}", + (symbolRoles & scip::SymbolRole::Definition) ? "definitions" : "references", + core::Loc(file, occ.offsets).showRaw(gs)); + return true; + } + public: absl::Status saveDefinition(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ) { + if (this->cacheOccurrence(gs, file, occ, scip::SymbolRole::Definition)) { + return absl::OkStatus(); + } return this->saveDefinitionImpl(gs, file, occ.toString(gs, file), core::Loc(file, occ.offsets)); } // Save definition when you have a sorbet Symbol. // Meant for methods, fields etc., but not local variables. absl::Status saveDefinition(const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef) { + // TODO:(varun) Should we cache here too to avoid emitting duplicate definitions? scip::Symbol symbol; auto status = symbolForExpr(gs, symRef, symbol); if (!status.ok()) { @@ -297,11 +341,15 @@ class SCIPState { } absl::Status saveReference(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbol_roles) { + if (this->cacheOccurrence(gs, file, occ, symbol_roles)) { + return absl::OkStatus(); + } return this->saveReferenceImpl(gs, file, occ.toString(gs, file), occ.offsets, symbol_roles); } absl::Status saveReference(const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef, core::LocOffsets occLoc, int32_t symbol_roles) { + // TODO:(varun) Should we cache here to to avoid emitting duplicate references? absl::StatusOr valueOrStatus(this->saveSymbolString(gs, symRef, nullptr)); if (!valueOrStatus.ok()) { return valueOrStatus.status(); @@ -357,7 +405,29 @@ core::SymbolRef lookupRecursive(const core::GlobalState &gs, const core::SymbolR } class CFGTraversal final { - UnorderedMap> blockLocals; + // A map from each basic block to the locals in it. + // + // The locals may be coming from the parents, or they may be defined in the + // block. Locals coming from the parents may be in the form of basic block + // arguments or they may be "directly referenced." + // + // For example, if you have code like: + // + // def f(x): + // y = 0 + // if cond: + // y = x + $z + // + // Then x and $z will be passed as arguments to the basic block for the + // true branch, whereas 'y' won't be. However, 'y' is still technically + // coming from the parent basic block, otherwise we'd end up marking the + // assignment as a definition instead of a (write) reference. + // + // At the start of the traversal of a basic block, the entry for a basic + // block is populated with the locals coming from the parents. Then, + // we traverse each instruction and populate it with the locals defined + // in the block. + UnorderedMap> blockLocals; UnorderedMap functionLocals; // Local variable counter that is reset for every function. @@ -370,8 +440,8 @@ class CFGTraversal final { : blockLocals(), functionLocals(), scipState(scipState), ctx(ctx) {} private: - void addLocal(const cfg::BasicBlock *bb, cfg::LocalRef localRef, core::Loc loc) { - this->blockLocals[bb][localRef] = loc; + void addLocal(const cfg::BasicBlock *bb, cfg::LocalRef localRef) { + this->blockLocals[bb].insert(localRef); this->functionLocals[localRef] = ++this->counter; } @@ -405,7 +475,7 @@ class CFGTraversal final { if (!this->functionLocals.contains(localRef)) { isDefinition = true; // If we're seeing this for the first time in topological order, // The current block must have a definition for the variable. - this->addLocal(bb, localRef, this->ctx.locAt(local.loc)); + this->addLocal(bb, localRef); } // The variable wasn't passed in as an argument, and hasn't already been recorded // as a local in the block. So this must be a definition line. @@ -419,7 +489,7 @@ class CFGTraversal final { // Ill-formed code where we're trying to access a variable // without setting it first. Emit a local as a best-effort. // TODO(varun): Will Sorbet error out before we get here? - this->addLocal(bb, localRef, this->ctx.locAt(local.loc)); + this->addLocal(bb, localRef); } // TODO(varun): Will Sorbet error out before we get here? // It's possible that we have ill-formed code where the variable @@ -448,22 +518,22 @@ class CFGTraversal final { return true; } - void addArgLocals(cfg::BasicBlock *bb, const cfg::CFG &cfg) { - this->blockLocals[bb] = {}; - for (auto &bbArgs : bb->args) { - bool found = false; - for (auto parentBB : bb->backEdges) { - if (this->blockLocals[parentBB].contains(bbArgs.variable)) { - this->blockLocals[bb][bbArgs.variable] = this->blockLocals[parentBB][bbArgs.variable]; - found = true; - break; - } + void copyLocalsFromParents(cfg::BasicBlock *bb, const cfg::CFG &cfg) { + UnorderedSet bbLocals{}; + for (auto parentBB : bb->backEdges) { + if (!this->blockLocals.contains(parentBB)) { // e.g. loops + continue; } - if (!found) { - print_dbg("# basic block argument {} did not come from parents {}\n", - bbArgs.toString(this->ctx.state, cfg), bbvec_to_string(bb->backEdges)); + auto &parentLocals = this->blockLocals[parentBB]; + if (bbLocals.size() + parentLocals.size() > bbLocals.capacity()) { + bbLocals.reserve(bbLocals.size() + parentLocals.size()); + } + for (auto local : parentLocals) { + bbLocals.insert(local); } } + ENFORCE(!this->blockLocals.contains(bb)); + this->blockLocals[bb] = std::move(bbLocals); } public: @@ -471,19 +541,11 @@ class CFGTraversal final { auto &gs = this->ctx.state; auto method = this->ctx.owner; - auto print_map = [&cfg, &gs](const UnorderedMap &map, cfg::BasicBlock *ptr) { - print_dbg("# blockLocals (id: {}, ptr: {}) = {}\n", ptr->id, (void *)ptr, - map_to_string( - map, [&](const auto &localref, const auto &loc) -> auto { - return fmt::format("{} {}", localref.data(cfg).toString(gs), loc.showRaw(gs)); - })); - }; - // I don't fully understand the doc comment for forwardsTopoSort; it seems backwards in practice. for (auto it = cfg.forwardsTopoSort.rbegin(); it != cfg.forwardsTopoSort.rend(); ++it) { cfg::BasicBlock *bb = *it; print_dbg("# Looking at block id: {} ptr: {}\n", bb->id, (void *)bb); - this->addArgLocals(bb, cfg); + this->copyLocalsFromParents(bb, cfg); for (auto &binding : bb->exprs) { if (auto *aliasInstr = cfg::cast_instruction(binding.value)) { auto aliasName = aliasInstr->name; @@ -497,7 +559,7 @@ class CFGTraversal final { method.toString(gs)); continue; } - this->addLocal(bb, binding.bind.variable, aliasedSym.loc(gs)); + this->addLocal(bb, binding.bind.variable); continue; } if (!binding.loc.exists() || binding.loc.empty()) { // TODO(varun): When can each case happen? @@ -599,7 +661,6 @@ class CFGTraversal final { } } } - print_map(this->blockLocals.at(bb), bb); } } }; diff --git a/test/scip/testdata/loops_and_conditionals.rb b/test/scip/testdata/loops_and_conditionals.rb new file mode 100644 index 0000000000..36914c61c6 --- /dev/null +++ b/test/scip/testdata/loops_and_conditionals.rb @@ -0,0 +1,120 @@ +# typed: true + +def if_elsif_else() + x = 0 + y = 0 + # Basic stuff + if x == 1 + y = 2 + elsif x == 2 + y = 3 + else + y = x + end + + # More complex expressiosn + z = + if if x == 0 then x+1 else x+2 end == 1 + x + else + x+1 + end + z = z if z != 10 + + return +end + +def unless() + z = 0 + x = 1 + unless z == 9 + z = 9 + end + + unless x == 10 + x = 3 + else + x = 2 + end + return +end + +def case(x, y) + case x + when 0 + x = 3 + when y + x = 2 + when (3 == (x = 1)) + x = 0 + else + x = 1 + end + return +end + +def for(xs) + for e in xs + puts e + end + + for f in xs + g = f+1 + next if g == 0 + next g+1 if g == 1 + break if g == 2 + break g+1 if g == 3 + # NOTE: redo is unsupported (https://srb.help/3003) + # but emitting a reference here does work + redo if g == 4 + end +end + +def while(xs) + i = 0 + while i < 10 + puts xs[i] + end + + j = 0 + while j < 10 + g = xs[j] + next if g == 0 + next g+1 if g == 1 + break if g == 2 + break g+1 if g == 3 + # NOTE: redo is unsupported (https://srb.help/3003) + # but emitting a reference here does work + redo if g == 4 + end +end + +def until(xs) + i = 0 + until i > 10 + puts xs[i] + end + + j = 0 + until j > 10 + g = xs[j] + next if g == 0 + next g+1 if g == 1 + break if g == 2 + break g+1 if g == 3 + # NOTE: redo is unsupported (https://srb.help/3003) + # but emitting a reference here does work + redo if g == 4 + end +end + +def flip_flop(xs) + # NOTE: flip-flops are unsupported (https://srb.help/3003) + # Unlike redo, which somehow works, we fail to emit references + # for the conditions. + # Keep this test anyways to check that we don't crash/mess something up + for x in xs + puts x if x==2..x==8 + puts x+1 if x==4...x==6 + end +end diff --git a/test/scip/testdata/loops_and_conditionals.snapshot.rb b/test/scip/testdata/loops_and_conditionals.snapshot.rb new file mode 100644 index 0000000000..eec439230a --- /dev/null +++ b/test/scip/testdata/loops_and_conditionals.snapshot.rb @@ -0,0 +1,216 @@ + # typed: true + + def if_elsif_else() +#^^^^^^^^^^^^^^^^^^^ definition scip-ruby gem TODO TODO if_elsif_else(). +#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ definition scip-ruby gem TODO TODO (). + x = 0 +# ^ definition local 1~#2393773952 + y = 0 +# ^ definition local 2~#2393773952 + # Basic stuff + if x == 1 +# ^ reference local 1~#2393773952 + y = 2 +# ^ reference (write) local 2~#2393773952 + elsif x == 2 +# ^ reference local 1~#2393773952 + y = 3 +# ^ reference (write) local 2~#2393773952 + else + y = x +# ^ reference (write) local 2~#2393773952 +# ^ reference local 1~#2393773952 + end + + # More complex expressiosn + z = +# ^ definition local 3~#2393773952 + if if x == 0 then x+1 else x+2 end == 1 +# ^ reference local 1~#2393773952 +# ^ reference local 1~#2393773952 +# ^ reference local 1~#2393773952 + x +# ^ reference local 1~#2393773952 + else + x+1 +# ^ reference local 1~#2393773952 + end + z = z if z != 10 +# ^ reference (write) local 3~#2393773952 +# ^ reference local 3~#2393773952 +# ^ reference local 3~#2393773952 + + return + end + + def unless() +#^^^^^^^^^^^^ definition scip-ruby gem TODO TODO unless(). + z = 0 +# ^ definition local 1~#2827997891 + x = 1 +# ^ definition local 2~#2827997891 + unless z == 9 +# ^ reference local 1~#2827997891 + z = 9 +# ^ reference (write) local 1~#2827997891 + end + + unless x == 10 +# ^ reference local 2~#2827997891 + x = 3 +# ^ reference (write) local 2~#2827997891 + else + x = 2 +# ^ reference (write) local 2~#2827997891 + end + return + end + + def case(x, y) +#^^^^^^^^^^^^^^ definition scip-ruby gem TODO TODO case(). +# ^ definition local 1~#2602907825 +# ^ definition local 2~#2602907825 + case x +# ^ reference local 1~#2602907825 + when 0 + x = 3 +# ^ reference (write) local 1~#2602907825 + when y +# ^ reference local 2~#2602907825 + x = 2 +# ^ reference (write) local 1~#2602907825 + when (3 == (x = 1)) +# ^ reference (write) local 1~#2602907825 +# ^^^^^ reference local 1~#2602907825 + x = 0 +# ^ reference (write) local 1~#2602907825 + else + x = 1 +# ^ reference (write) local 1~#2602907825 + end + return + end + + def for(xs) +#^^^^^^^^^^^ definition scip-ruby gem TODO TODO for(). +# ^^ definition local 1~#2901640080 + for e in xs +# ^ definition local 2~#2901640080 +# ^^ reference local 1~#2901640080 + puts e +# ^ reference local 2~#2901640080 + end + + for f in xs +# ^ definition local 3~#2901640080 +# ^^ reference local 1~#2901640080 + g = f+1 +# ^ definition local 4~#2901640080 +# ^ reference local 3~#2901640080 + next if g == 0 +# ^ reference local 4~#2901640080 + next g+1 if g == 1 +# ^ reference local 4~#2901640080 +# ^ reference local 4~#2901640080 + break if g == 2 +# ^ reference local 4~#2901640080 + break g+1 if g == 3 +# ^ reference local 4~#2901640080 +# ^ reference local 4~#2901640080 + # NOTE: redo is unsupported (https://srb.help/3003) + # but emitting a reference here does work + redo if g == 4 +# ^ reference local 4~#2901640080 + end + end + + def while(xs) +#^^^^^^^^^^^^^ definition scip-ruby gem TODO TODO while(). +# ^^ definition local 1~#231090382 + i = 0 +# ^ definition local 2~#231090382 + while i < 10 +# ^ reference local 2~#231090382 + puts xs[i] +# ^^ reference local 1~#231090382 +# ^ reference local 2~#231090382 + end + + j = 0 +# ^ definition local 3~#231090382 + while j < 10 +# ^ reference local 3~#231090382 + g = xs[j] +# ^ definition local 4~#231090382 +# ^^ reference local 1~#231090382 +# ^ reference local 3~#231090382 + next if g == 0 +# ^ reference local 4~#231090382 + next g+1 if g == 1 +# ^ reference local 4~#231090382 +# ^ reference local 4~#231090382 + break if g == 2 +# ^ reference local 4~#231090382 + break g+1 if g == 3 +# ^ reference local 4~#231090382 +# ^ reference local 4~#231090382 + # NOTE: redo is unsupported (https://srb.help/3003) + # but emitting a reference here does work + redo if g == 4 +# ^ reference local 4~#231090382 + end + end + + def until(xs) +#^^^^^^^^^^^^^ definition scip-ruby gem TODO TODO until(). +# ^^ definition local 1~#3132432719 + i = 0 +# ^ definition local 2~#3132432719 + until i > 10 +# ^ reference local 2~#3132432719 + puts xs[i] +# ^^ reference local 1~#3132432719 +# ^ reference local 2~#3132432719 + end + + j = 0 +# ^ definition local 3~#3132432719 + until j > 10 +# ^ reference local 3~#3132432719 + g = xs[j] +# ^ definition local 4~#3132432719 +# ^^ reference local 1~#3132432719 +# ^ reference local 3~#3132432719 + next if g == 0 +# ^ reference local 4~#3132432719 + next g+1 if g == 1 +# ^ reference local 4~#3132432719 +# ^ reference local 4~#3132432719 + break if g == 2 +# ^ reference local 4~#3132432719 + break g+1 if g == 3 +# ^ reference local 4~#3132432719 +# ^ reference local 4~#3132432719 + # NOTE: redo is unsupported (https://srb.help/3003) + # but emitting a reference here does work + redo if g == 4 +# ^ reference local 4~#3132432719 + end + end + + def flip_flop(xs) +#^^^^^^^^^^^^^^^^^ definition scip-ruby gem TODO TODO flip_flop(). +# ^^ definition local 1~#2191960030 + # NOTE: flip-flops are unsupported (https://srb.help/3003) + # Unlike redo, which somehow works, we fail to emit references + # for the conditions. + # Keep this test anyways to check that we don't crash/mess something up + for x in xs +# ^ definition local 2~#2191960030 +# ^^ reference local 1~#2191960030 + puts x if x==2..x==8 +# ^ reference local 2~#2191960030 + puts x+1 if x==4...x==6 +# ^ reference local 2~#2191960030 + end + end diff --git a/test/scip/testdata/s.rb b/test/scip/testdata/s.rb new file mode 100644 index 0000000000..967b67ec48 --- /dev/null +++ b/test/scip/testdata/s.rb @@ -0,0 +1,13 @@ +# typed: true + +def case(x, y) + case x + when 0 + x = 3 + when (3 == (x = 1)) + x = 0 + else + x = 1 + end + return +end diff --git a/test/scip/testdata/s.snapshot.rb b/test/scip/testdata/s.snapshot.rb new file mode 100644 index 0000000000..9d7da15176 --- /dev/null +++ b/test/scip/testdata/s.snapshot.rb @@ -0,0 +1,22 @@ + # typed: true + + def case(x, y) +#^^^^^^^^^^^^^^ definition scip-ruby gem TODO TODO case(). +#^^^^^^^^^^^^ definition scip-ruby gem TODO TODO (). +# ^ definition local 1~#2602907825 + case x +# ^ reference local 1~#2602907825 + when 0 + x = 3 +# ^ reference (write) local 1~#2602907825 + when (3 == (x = 1)) +# ^ reference (write) local 1~#2602907825 +# ^^^^^ reference local 1~#2602907825 + x = 0 +# ^ reference (write) local 1~#2602907825 + else + x = 1 +# ^ reference (write) local 1~#2602907825 + end + return + end