Skip to content

Commit 6568945

Browse files
committed
Some of the functions do not really need the iterative data flow in DSE. i.e. for function
of which a single post-order would be enough for DSE. In this case, we do not really need to compute the genset and killset (which is a costly operation). On stdlib, i see 93% of the functions are "OneIterationFunction". With this change, i see the compilation time of DSE drops from 2.0% to 1.7% of the entire compilation. This represents 4.3% of all the time spent in SILOptimizations (39.5%).
1 parent bee0d95 commit 6568945

File tree

1 file changed

+59
-26
lines changed

1 file changed

+59
-26
lines changed

lib/SILOptimizer/Transforms/DeadStoreElimination.cpp

Lines changed: 59 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,21 @@ static inline bool isPerformingDSE(DSEKind Kind) {
114114
return Kind == DSEKind::PerformDSE;
115115
}
116116

117+
/// Return true if all basic blocks have their successors processed if
118+
/// they are iterated in post order.
119+
static bool isOneIterationFunction(PostOrderFunctionInfo *PO) {
120+
bool OneIterationFunction = true;
121+
llvm::DenseSet<SILBasicBlock *> HandledBBs;
122+
123+
for (SILBasicBlock *B : PO->getPostOrder()) {
124+
for (auto &X : B->getSuccessors()) {
125+
OneIterationFunction &= (HandledBBs.find(X) != HandledBBs.end());
126+
}
127+
HandledBBs.insert(B);
128+
}
129+
return OneIterationFunction;
130+
}
131+
117132
/// Returns true if this is an instruction that may have side effects in a
118133
/// general sense but are inert from a load store perspective.
119134
static bool isDeadStoreInertInstruction(SILInstruction *Inst) {
@@ -227,7 +242,7 @@ class BlockState {
227242
void initReturnBlock(DSEContext &Ctx);
228243

229244
/// Initialize the bitvectors for the basic block.
230-
void init(DSEContext &Ctx);
245+
void init(DSEContext &Ctx, bool OneIterationFunction);
231246

232247
/// Check whether the BBWriteSetIn has changed. If it does, we need to rerun
233248
/// the data flow on this block's predecessors to reach fixed point.
@@ -387,6 +402,9 @@ class DSEContext {
387402
/// Entry point for dead store elimination.
388403
bool run();
389404

405+
/// Run the iterative DF to converge the BBWriteSetIn.
406+
void runIterativeDF();
407+
390408
/// Returns the escape analysis we use.
391409
EscapeAnalysis *getEA() { return EA; }
392410

@@ -430,7 +448,7 @@ void BlockState::initReturnBlock(DSEContext &Ctx) {
430448
}
431449
}
432450

433-
void BlockState::init(DSEContext &Ctx) {
451+
void BlockState::init(DSEContext &Ctx, bool OneIterationFunction) {
434452
std::vector<LSLocation> &LV = Ctx.getLocationVault();
435453
LocationNum = LV.size();
436454
// The initial state of BBWriteSetIn should be all 1's. Otherwise the
@@ -446,7 +464,7 @@ void BlockState::init(DSEContext &Ctx) {
446464
// However, by doing so, we can only eliminate the dead stores after the
447465
// data flow stabilizes.
448466
//
449-
BBWriteSetIn.resize(LocationNum, true);
467+
BBWriteSetIn.resize(LocationNum, !OneIterationFunction);
450468
BBWriteSetOut.resize(LocationNum, false);
451469
BBWriteSetMid.resize(LocationNum, false);
452470

@@ -536,6 +554,8 @@ void DSEContext::processBasicBlockForDSE(SILBasicBlock *BB) {
536554
for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
537555
processInstruction(&(*I), DSEKind::PerformDSE);
538556
}
557+
558+
S->BBWriteSetIn = S->BBWriteSetMid;
539559
}
540560

541561
void DSEContext::mergeSuccessorStates(SILBasicBlock *BB) {
@@ -948,28 +968,7 @@ void DSEContext::processInstruction(SILInstruction *I, DSEKind Kind) {
948968
invalidateLSLocationBase(I, Kind);
949969
}
950970

951-
bool DSEContext::run() {
952-
// Walk over the function and find all the locations accessed by
953-
// this function.
954-
LSLocation::enumerateLSLocations(*F, LocationVault, LocToBitIndex, TE);
955-
956-
// For all basic blocks in the function, initialize a BB state.
957-
//
958-
// DenseMap has a minimum size of 64, while many functions do not have more
959-
// than 64 basic blocks. Therefore, allocate the BlockState in a vector and
960-
// use pointer in BBToLocState to access them.
961-
for (auto &B : *F) {
962-
BlockStates.push_back(BlockState(&B));
963-
// Since we know all the locations accessed in this function, we can resize
964-
// the bit vector to the appropriate size.
965-
BlockStates.back().init(*this);
966-
}
967-
968-
// Initialize the BBToLocState mapping.
969-
for (auto &S : BlockStates) {
970-
BBToLocState[S.getBB()] = &S;
971-
}
972-
971+
void DSEContext::runIterativeDF() {
973972
// We perform dead store elimination in the following phases.
974973
//
975974
// Phase 1. we compute the max store set at the beginning of the basic block.
@@ -983,7 +982,6 @@ bool DSEContext::run() {
983982
//
984983
// Phase 5. we remove the dead stores.
985984

986-
987985
// Generate the genset and killset for each basic block. We can process the
988986
// basic blocks in any order.
989987
//
@@ -1018,6 +1016,40 @@ bool DSEContext::run() {
10181016
}
10191017
}
10201018
}
1019+
}
1020+
1021+
bool DSEContext::run() {
1022+
// Is this a one iteration function.
1023+
auto *PO = PM->getAnalysis<PostOrderAnalysis>()->get(F);
1024+
1025+
// Do we really need to run the iterative data flow on the function.
1026+
bool OneIterationFunction = isOneIterationFunction(PO);
1027+
1028+
// Walk over the function and find all the locations accessed by
1029+
// this function.
1030+
LSLocation::enumerateLSLocations(*F, LocationVault, LocToBitIndex, TE);
1031+
1032+
// For all basic blocks in the function, initialize a BB state.
1033+
//
1034+
// DenseMap has a minimum size of 64, while many functions do not have more
1035+
// than 64 basic blocks. Therefore, allocate the BlockState in a vector and
1036+
// use pointer in BBToLocState to access them.
1037+
for (auto &B : *F) {
1038+
BlockStates.push_back(BlockState(&B));
1039+
// Since we know all the locations accessed in this function, we can resize
1040+
// the bit vector to the appropriate size.
1041+
BlockStates.back().init(*this, OneIterationFunction);
1042+
}
1043+
1044+
// Initialize the BBToLocState mapping.
1045+
for (auto &S : BlockStates) {
1046+
BBToLocState[S.getBB()] = &S;
1047+
}
1048+
1049+
// We need to run the iterative data flow on the function.
1050+
if (!OneIterationFunction) {
1051+
runIterativeDF();
1052+
}
10211053

10221054
// The data flow has stabilized, run one last iteration over all the basic
10231055
// blocks and try to remove dead stores.
@@ -1043,6 +1075,7 @@ bool DSEContext::run() {
10431075
recursivelyDeleteTriviallyDeadInstructions(I, true);
10441076
}
10451077
}
1078+
10461079
return Changed;
10471080
}
10481081

0 commit comments

Comments
 (0)