20
20
#include " cfg/CFG.h"
21
21
#include " common/common.h"
22
22
#include " common/sort.h"
23
+ #include " core/ErrorQueue.h"
23
24
#include " core/Loc.h"
24
25
#include " core/SymbolRef.h"
25
26
#include " core/Symbols.h"
@@ -67,11 +68,6 @@ template <typename T, typename Fn> static string vec_to_string(const vector<T> v
67
68
return out.str ();
68
69
}
69
70
70
- static string bbvec_to_string (const vector<sorbet::cfg::BasicBlock *> &parents) {
71
- return vec_to_string (
72
- parents, [](const auto &ptr) -> auto { return fmt::format (" (id: {}, ptr: {})" , ptr->id , (void *)ptr); });
73
- };
74
-
75
71
template <typename T> static void drain (vector<T> &input, vector<T> &output) {
76
72
output.reserve (output.size () + input.size ());
77
73
for (auto &v : input) {
@@ -109,7 +105,7 @@ static bool isTemporary(const core::GlobalState &gs, const core::LocalVariable &
109
105
}
110
106
auto n = var._name ;
111
107
return n == Names::blockPreCallTemp () || n == Names::blockTemp () || n == Names::blockPassTemp () ||
112
- n == Names::forTemp () || n == Names::blkArg () || n == Names::blockCall () ||
108
+ n == Names::blkArg () || n == Names::blockCall () || n == Names::blockBreakAssign () || n == Names::forTemp () ||
113
109
// Insert checks because sometimes temporaries are initialized with a 0 unique value. 😬
114
110
n == Names::finalReturn () || n == NameRef::noName () || n == Names::blockCall () || n == Names::selfLocal () ||
115
111
n == Names::unconditional ();
@@ -198,6 +194,16 @@ class SCIPState {
198
194
public:
199
195
UnorderedMap<core::FileRef, vector<scip::Occurrence>> occurrenceMap;
200
196
UnorderedMap<core::FileRef, vector<scip::SymbolInformation>> symbolMap;
197
+ // Cache of occurrences for locals that have been emitted in this function.
198
+ //
199
+ // Note that the SymbolRole is a part of the key too, because we can
200
+ // have a read-reference and write-reference at the same location
201
+ // (we don't merge those for now).
202
+ //
203
+ // The 'value' in the map is purely for sanity-checking. It's a bit
204
+ // cumbersome to conditionalize the type to be a set in non-debug and
205
+ // map in debug, so keeping it a map.
206
+ UnorderedMap<std::pair<core::LocOffsets, /* SymbolRole*/ int32_t >, uint32_t > localOccurrenceCache;
201
207
vector<scip::Document> documents;
202
208
vector<scip::SymbolInformation> externalSymbols;
203
209
@@ -271,14 +277,52 @@ class SCIPState {
271
277
return absl::OkStatus ();
272
278
}
273
279
280
+ // Returns true if there was a cache hit.
281
+ //
282
+ // Otherwise, inserts the location into the cache and returns false.
283
+ bool cacheOccurrence (const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbolRoles) {
284
+ // Optimization:
285
+ // Avoid emitting duplicate defs/refs for locals.
286
+ // This can happen with constructs like:
287
+ // z = if cond then expr else expr end
288
+ // When this is lowered to a CFG, we will end up with
289
+ // multiple bindings with the same LHS location.
290
+ //
291
+ // Repeated reads for the same occurrence can happen for rvalues too.
292
+ // If the compiler has proof that a scrutinee variable is not modified,
293
+ // each comparison in a case will perform a read.
294
+ // case x
295
+ // when 0 then <stuff>
296
+ // when 1 then <stuff>
297
+ // else <stuff>
298
+ // end
299
+ // The CFG for this involves two reads for x in calls to ==.
300
+ // (This wouldn't have happened if x was always stashed away into
301
+ // a temporary first, but the temporary only appears in the CFG if
302
+ // evaluating one of the cases has a chance to modify x.)
303
+ auto [it, inserted] = this ->localOccurrenceCache .insert ({{occ.offsets , symbolRoles}, occ.counter });
304
+ if (inserted) {
305
+ return false ;
306
+ }
307
+ auto savedCounter = it->second ;
308
+ ENFORCE (occ.counter == savedCounter, " cannot have distinct local variable {} at same location {}" ,
309
+ (symbolRoles & scip::SymbolRole::Definition) ? " definitions" : " references" ,
310
+ core::Loc (file, occ.offsets ).showRaw (gs));
311
+ return true ;
312
+ }
313
+
274
314
public:
275
315
absl::Status saveDefinition (const core::GlobalState &gs, core::FileRef file, OwnedLocal occ) {
316
+ if (this ->cacheOccurrence (gs, file, occ, scip::SymbolRole::Definition)) {
317
+ return absl::OkStatus ();
318
+ }
276
319
return this ->saveDefinitionImpl (gs, file, occ.toString (gs, file), core::Loc (file, occ.offsets ));
277
320
}
278
321
279
322
// Save definition when you have a sorbet Symbol.
280
323
// Meant for methods, fields etc., but not local variables.
281
324
absl::Status saveDefinition (const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef) {
325
+ // TODO:(varun) Should we cache here too to avoid emitting duplicate definitions?
282
326
scip::Symbol symbol;
283
327
auto status = symbolForExpr (gs, symRef, symbol);
284
328
if (!status.ok ()) {
@@ -297,11 +341,15 @@ class SCIPState {
297
341
}
298
342
299
343
absl::Status saveReference (const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbol_roles) {
344
+ if (this ->cacheOccurrence (gs, file, occ, symbol_roles)) {
345
+ return absl::OkStatus ();
346
+ }
300
347
return this ->saveReferenceImpl (gs, file, occ.toString (gs, file), occ.offsets , symbol_roles);
301
348
}
302
349
303
350
absl::Status saveReference (const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef,
304
351
core::LocOffsets occLoc, int32_t symbol_roles) {
352
+ // TODO:(varun) Should we cache here to to avoid emitting duplicate references?
305
353
absl::StatusOr<string *> valueOrStatus (this ->saveSymbolString (gs, symRef, nullptr ));
306
354
if (!valueOrStatus.ok ()) {
307
355
return valueOrStatus.status ();
@@ -357,7 +405,29 @@ core::SymbolRef lookupRecursive(const core::GlobalState &gs, const core::SymbolR
357
405
}
358
406
359
407
class CFGTraversal final {
360
- UnorderedMap<const cfg::BasicBlock *, UnorderedMap<cfg::LocalRef, core::Loc>> blockLocals;
408
+ // A map from each basic block to the locals in it.
409
+ //
410
+ // The locals may be coming from the parents, or they may be defined in the
411
+ // block. Locals coming from the parents may be in the form of basic block
412
+ // arguments or they may be "directly referenced."
413
+ //
414
+ // For example, if you have code like:
415
+ //
416
+ // def f(x):
417
+ // y = 0
418
+ // if cond:
419
+ // y = x + $z
420
+ //
421
+ // Then x and $z will be passed as arguments to the basic block for the
422
+ // true branch, whereas 'y' won't be. However, 'y' is still technically
423
+ // coming from the parent basic block, otherwise we'd end up marking the
424
+ // assignment as a definition instead of a (write) reference.
425
+ //
426
+ // At the start of the traversal of a basic block, the entry for a basic
427
+ // block is populated with the locals coming from the parents. Then,
428
+ // we traverse each instruction and populate it with the locals defined
429
+ // in the block.
430
+ UnorderedMap<const cfg::BasicBlock *, UnorderedSet<cfg::LocalRef>> blockLocals;
361
431
UnorderedMap<cfg::LocalRef, uint32_t > functionLocals;
362
432
363
433
// Local variable counter that is reset for every function.
@@ -370,8 +440,8 @@ class CFGTraversal final {
370
440
: blockLocals(), functionLocals(), scipState(scipState), ctx(ctx) {}
371
441
372
442
private:
373
- void addLocal (const cfg::BasicBlock *bb, cfg::LocalRef localRef, core::Loc loc ) {
374
- this ->blockLocals [bb][ localRef] = loc ;
443
+ void addLocal (const cfg::BasicBlock *bb, cfg::LocalRef localRef) {
444
+ this ->blockLocals [bb]. insert ( localRef) ;
375
445
this ->functionLocals [localRef] = ++this ->counter ;
376
446
}
377
447
@@ -405,7 +475,7 @@ class CFGTraversal final {
405
475
if (!this ->functionLocals .contains (localRef)) {
406
476
isDefinition = true ; // If we're seeing this for the first time in topological order,
407
477
// The current block must have a definition for the variable.
408
- this ->addLocal (bb, localRef, this -> ctx . locAt (local. loc ) );
478
+ this ->addLocal (bb, localRef);
409
479
}
410
480
// The variable wasn't passed in as an argument, and hasn't already been recorded
411
481
// as a local in the block. So this must be a definition line.
@@ -419,7 +489,7 @@ class CFGTraversal final {
419
489
// Ill-formed code where we're trying to access a variable
420
490
// without setting it first. Emit a local as a best-effort.
421
491
// TODO(varun): Will Sorbet error out before we get here?
422
- this ->addLocal (bb, localRef, this -> ctx . locAt (local. loc ) );
492
+ this ->addLocal (bb, localRef);
423
493
}
424
494
// TODO(varun): Will Sorbet error out before we get here?
425
495
// It's possible that we have ill-formed code where the variable
@@ -448,42 +518,34 @@ class CFGTraversal final {
448
518
return true ;
449
519
}
450
520
451
- void addArgLocals (cfg::BasicBlock *bb, const cfg::CFG &cfg) {
452
- this ->blockLocals [bb] = {};
453
- for (auto &bbArgs : bb->args ) {
454
- bool found = false ;
455
- for (auto parentBB : bb->backEdges ) {
456
- if (this ->blockLocals [parentBB].contains (bbArgs.variable )) {
457
- this ->blockLocals [bb][bbArgs.variable ] = this ->blockLocals [parentBB][bbArgs.variable ];
458
- found = true ;
459
- break ;
460
- }
521
+ void copyLocalsFromParents (cfg::BasicBlock *bb, const cfg::CFG &cfg) {
522
+ UnorderedSet<cfg::LocalRef> bbLocals{};
523
+ for (auto parentBB : bb->backEdges ) {
524
+ if (!this ->blockLocals .contains (parentBB)) { // e.g. loops
525
+ continue ;
461
526
}
462
- if (!found) {
463
- print_dbg (" # basic block argument {} did not come from parents {}\n " ,
464
- bbArgs.toString (this ->ctx .state , cfg), bbvec_to_string (bb->backEdges ));
527
+ auto &parentLocals = this ->blockLocals [parentBB];
528
+ if (bbLocals.size () + parentLocals.size () > bbLocals.capacity ()) {
529
+ bbLocals.reserve (bbLocals.size () + parentLocals.size ());
530
+ }
531
+ for (auto local : parentLocals) {
532
+ bbLocals.insert (local);
465
533
}
466
534
}
535
+ ENFORCE (!this ->blockLocals .contains (bb));
536
+ this ->blockLocals [bb] = std::move (bbLocals);
467
537
}
468
538
469
539
public:
470
540
void traverse (const cfg::CFG &cfg) {
471
541
auto &gs = this ->ctx .state ;
472
542
auto method = this ->ctx .owner ;
473
543
474
- auto print_map = [&cfg, &gs](const UnorderedMap<cfg::LocalRef, core::Loc> &map, cfg::BasicBlock *ptr) {
475
- print_dbg (" # blockLocals (id: {}, ptr: {}) = {}\n " , ptr->id , (void *)ptr,
476
- map_to_string (
477
- map, [&](const auto &localref, const auto &loc) -> auto {
478
- return fmt::format (" {} {}" , localref.data (cfg).toString (gs), loc.showRaw (gs));
479
- }));
480
- };
481
-
482
544
// I don't fully understand the doc comment for forwardsTopoSort; it seems backwards in practice.
483
545
for (auto it = cfg.forwardsTopoSort .rbegin (); it != cfg.forwardsTopoSort .rend (); ++it) {
484
546
cfg::BasicBlock *bb = *it;
485
547
print_dbg (" # Looking at block id: {} ptr: {}\n " , bb->id , (void *)bb);
486
- this ->addArgLocals (bb, cfg);
548
+ this ->copyLocalsFromParents (bb, cfg);
487
549
for (auto &binding : bb->exprs ) {
488
550
if (auto *aliasInstr = cfg::cast_instruction<cfg::Alias>(binding.value )) {
489
551
auto aliasName = aliasInstr->name ;
@@ -497,7 +559,7 @@ class CFGTraversal final {
497
559
method.toString (gs));
498
560
continue ;
499
561
}
500
- this ->addLocal (bb, binding.bind .variable , aliasedSym. loc (gs) );
562
+ this ->addLocal (bb, binding.bind .variable );
501
563
continue ;
502
564
}
503
565
if (!binding.loc .exists () || binding.loc .empty ()) { // TODO(varun): When can each case happen?
@@ -599,7 +661,6 @@ class CFGTraversal final {
599
661
}
600
662
}
601
663
}
602
- print_map (this ->blockLocals .at (bb), bb);
603
664
}
604
665
}
605
666
};
0 commit comments