diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2532edc5d8699..4204f35d1a20d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1476,8 +1476,7 @@ class BoUpSLP { /// Clear the internal data structures that are created by 'buildTree'. void deleteTree() { VectorizableTree.clear(); - ScalarToTreeEntry.clear(); - MultiNodeScalars.clear(); + ScalarToTreeEntries.clear(); MustGather.clear(); NonScheduledFirst.clear(); EntryToLastInstruction.clear(); @@ -1760,7 +1759,7 @@ class BoUpSLP { auto AllUsersVectorized = [U1, U2, this](Value *V) { return llvm::all_of(V->users(), [U1, U2, this](Value *U) { - return U == U1 || U == U2 || R.getTreeEntry(U) != nullptr; + return U == U1 || U == U2 || R.isVectorized(U); }); }; return AllUsersVectorized(V1) && AllUsersVectorized(V2); @@ -1776,9 +1775,13 @@ class BoUpSLP { } auto CheckSameEntryOrFail = [&]() { - if (const TreeEntry *TE1 = R.getTreeEntry(V1); - TE1 && TE1 == R.getTreeEntry(V2)) - return LookAheadHeuristics::ScoreSplatLoads; + if (ArrayRef TEs1 = R.getTreeEntries(V1); !TEs1.empty()) { + SmallPtrSet Set(TEs1.begin(), TEs1.end()); + if (ArrayRef TEs2 = R.getTreeEntries(V2); + !TEs2.empty() && + any_of(TEs2, [&](TreeEntry *E) { return Set.contains(E); })) + return LookAheadHeuristics::ScoreSplatLoads; + } return LookAheadHeuristics::ScoreFail; }; @@ -2851,13 +2854,7 @@ class BoUpSLP { continue; auto *I = cast(V); salvageDebugInfo(*I); - SmallVector Entries; - if (const TreeEntry *Entry = getTreeEntry(I)) { - Entries.push_back(Entry); - auto It = MultiNodeScalars.find(I); - if (It != MultiNodeScalars.end()) - Entries.append(It->second.begin(), It->second.end()); - } + ArrayRef Entries = getTreeEntries(I); for (Use &U : I->operands()) { if (auto *OpI = dyn_cast_if_present(U.get()); OpI && !DeletedInstructions.contains(OpI) && OpI->hasOneUser() && @@ -2961,7 +2958,10 @@ class BoUpSLP { } /// Check if the value is vectorized in the tree. - bool isVectorized(Value *V) const { return getTreeEntry(V); } + bool isVectorized(Value *V) const { + assert(V && "V cannot be nullptr."); + return ScalarToTreeEntries.contains(V); + } ~BoUpSLP(); @@ -2999,16 +2999,10 @@ class BoUpSLP { ArrayRef VL = UserTE->getOperand(OpIdx); TreeEntry *TE = nullptr; const auto *It = find_if(VL, [&](Value *V) { - TE = getTreeEntry(V); - if (TE && is_contained(TE->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) - return true; - auto It = MultiNodeScalars.find(V); - if (It != MultiNodeScalars.end()) { - for (TreeEntry *E : It->second) { - if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) { - TE = E; - return true; - } + for (TreeEntry *E : getTreeEntries(V)) { + if (is_contained(E->UserTreeIndices, EdgeInfo(UserTE, OpIdx))) { + TE = E; + return true; } } return false; @@ -3659,18 +3653,24 @@ class BoUpSLP { Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end()); } if (!Last->isGather()) { + SmallPtrSet Processed; for (Value *V : VL) { if (isa(V)) continue; - const TreeEntry *TE = getTreeEntry(V); - assert((!TE || TE == Last || doesNotNeedToBeScheduled(V)) && - "Scalar already in tree!"); - if (TE) { - if (TE != Last) - MultiNodeScalars.try_emplace(V).first->getSecond().push_back(Last); - continue; + auto It = ScalarToTreeEntries.find(V); + assert( + (It == ScalarToTreeEntries.end() || + (It->getSecond().size() == 1 && It->getSecond().front() == Last) || + doesNotNeedToBeScheduled(V)) && + "Scalar already in tree!"); + if (It == ScalarToTreeEntries.end()) { + ScalarToTreeEntries.try_emplace(V).first->getSecond().push_back(Last); + (void)Processed.insert(V); + } else if (Processed.insert(V).second) { + assert(!is_contained(It->getSecond(), Last) && + "Value already associated with the node."); + It->getSecond().push_back(Last); } - ScalarToTreeEntry[V] = Last; } // Update the scheduler bundle to point to this TreeEntry. ScheduleData *BundleMember = *Bundle; @@ -3725,14 +3725,23 @@ class BoUpSLP { } #endif - TreeEntry *getTreeEntry(Value *V) { + /// Get list of vector entries, associated with the value \p V. + ArrayRef getTreeEntries(Value *V) const { assert(V && "V cannot be nullptr."); - return ScalarToTreeEntry.lookup(V); + auto It = ScalarToTreeEntries.find(V); + if (It == ScalarToTreeEntries.end()) + return {}; + return It->getSecond(); } - const TreeEntry *getTreeEntry(Value *V) const { + /// Returns first vector node for value \p V, matching values \p VL. + TreeEntry *getSameValuesTreeEntry(Value *V, ArrayRef VL, + bool SameVF = false) const { assert(V && "V cannot be nullptr."); - return ScalarToTreeEntry.lookup(V); + for (TreeEntry *TE : ScalarToTreeEntries.lookup(V)) + if ((!SameVF || TE->getVectorFactor() == VL.size()) && TE->isSame(VL)) + return TE; + return nullptr; } /// Check that the operand node of alternate node does not generate @@ -3752,12 +3761,8 @@ class BoUpSLP { OrdersType &CurrentOrder, SmallVectorImpl &PointerOps); - /// Maps a specific scalar to its tree entry. - SmallDenseMap ScalarToTreeEntry; - - /// List of scalars, used in several vectorize nodes, and the list of the - /// nodes. - SmallDenseMap> MultiNodeScalars; + /// Maps a specific scalar to its tree entry(ies). + SmallDenseMap> ScalarToTreeEntries; /// Maps a value to the proposed vectorizable size. SmallDenseMap InstrElementSize; @@ -3798,16 +3803,19 @@ class BoUpSLP { /// This POD struct describes one external user in the vectorized tree. struct ExternalUser { - ExternalUser(Value *S, llvm::User *U, int L) - : Scalar(S), User(U), Lane(L) {} + ExternalUser(Value *S, llvm::User *U, const TreeEntry &E, int L) + : Scalar(S), User(U), E(E), Lane(L) {} + + /// Which scalar in our function. + Value *Scalar = nullptr; - // Which scalar in our function. - Value *Scalar; + /// Which user that uses the scalar. + llvm::User *User = nullptr; - // Which user that uses the scalar. - llvm::User *User; + /// Vector node, the value is part of. + const TreeEntry &E; - // Which lane does the scalar belong to. + /// Which lane does the scalar belong to. int Lane; }; using UserList = SmallVector; @@ -5113,7 +5121,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, auto IsAnyPointerUsedOutGraph = IsPossibleStrided && any_of(PointerOps, [&](Value *V) { return isa(V) && any_of(V->users(), [&](User *U) { - return !getTreeEntry(U) && !MustGather.contains(U); + return !isVectorized(U) && !MustGather.contains(U); }); }); const unsigned AbsoluteDiff = std::abs(*Diff); @@ -6572,7 +6580,7 @@ void BoUpSLP::buildExternalUses( LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " << FoundLane << " from " << *Scalar << ".\n"); ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()); - ExternalUses.emplace_back(Scalar, nullptr, FoundLane); + ExternalUses.emplace_back(Scalar, nullptr, *Entry, FoundLane); continue; } for (User *U : Scalar->users()) { @@ -6587,16 +6595,24 @@ void BoUpSLP::buildExternalUses( continue; // Skip in-tree scalars that become vectors - if (TreeEntry *UseEntry = getTreeEntry(U)) { + if (ArrayRef UseEntries = getTreeEntries(U); + !UseEntries.empty()) { // Some in-tree scalars will remain as scalar in vectorized // instructions. If that is the case, the one in FoundLane will // be used. - if (UseEntry->State == TreeEntry::ScatterVectorize || - !doesInTreeUserNeedToExtract( - Scalar, getRootEntryInstruction(*UseEntry), TLI, TTI)) { + if (any_of(UseEntries, [&](TreeEntry *UseEntry) { + return UseEntry->State == TreeEntry::ScatterVectorize || + !doesInTreeUserNeedToExtract( + Scalar, getRootEntryInstruction(*UseEntry), TLI, + TTI); + })) { LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U << ".\n"); - assert(!UseEntry->isGather() && "Bad state"); + assert(none_of(UseEntries, + [](TreeEntry *UseEntry) { + return UseEntry->isGather(); + }) && + "Bad state"); continue; } U = nullptr; @@ -6613,7 +6629,7 @@ void BoUpSLP::buildExternalUses( << " from lane " << FoundLane << " from " << *Scalar << ".\n"); It = ScalarToExtUses.try_emplace(Scalar, ExternalUses.size()).first; - ExternalUses.emplace_back(Scalar, U, FoundLane); + ExternalUses.emplace_back(Scalar, U, *Entry, FoundLane); if (!U) break; } @@ -6644,7 +6660,7 @@ BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const { !isValidElementType(SI->getValueOperand()->getType())) continue; // Skip entry if already - if (getTreeEntry(U)) + if (isVectorized(U)) continue; Value *Ptr = @@ -7027,10 +7043,11 @@ void BoUpSLP::tryToVectorizeGatheredLoads( for (User *U : LI->users()) { if (auto *UI = dyn_cast(U); UI && isDeleted(UI)) continue; - if (const TreeEntry *UTE = getTreeEntry(U)) { + for (const TreeEntry *UTE : getTreeEntries(U)) { for (int I : seq(UTE->getNumOperands())) { - if (all_of(UTE->getOperand(I), - [LI](Value *V) { return V == LI; })) + if (all_of(UTE->getOperand(I), [LI](Value *V) { + return V == LI || isa(V); + })) // Found legal broadcast - do not vectorize. return false; } @@ -7135,7 +7152,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads( int LastDist = LocalLoadsDists.front().second; bool AllowMaskedGather = IsMaskedGatherSupported(OriginalLoads); for (const std::pair &L : LocalLoadsDists) { - if (getTreeEntry(L.first)) + if (isVectorized(L.first)) continue; assert(LastDist >= L.second && "Expected first distance always not less than second"); @@ -7187,9 +7204,9 @@ void BoUpSLP::tryToVectorizeGatheredLoads( for (auto [Slice, _] : Results) { LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize gathered loads (" << Slice.size() << ")\n"); - if (any_of(Slice, [&](Value *V) { return getTreeEntry(V); })) { + if (any_of(Slice, [&](Value *V) { return isVectorized(V); })) { for (Value *L : Slice) - if (!getTreeEntry(L)) + if (!isVectorized(L)) SortedNonVectorized.push_back(cast(L)); continue; } @@ -7228,7 +7245,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads( any_of(E->Scalars, [&, Slice = Slice](Value *V) { if (isa(V)) return false; - if (getTreeEntry(V)) + if (isVectorized(V)) return true; const auto &Nodes = ValueToGatherNodes.at(V); return (Nodes.size() != 1 || !Nodes.contains(E)) && @@ -7315,7 +7332,7 @@ void BoUpSLP::tryToVectorizeGatheredLoads( for (unsigned I = 0, E = Slice.size(); I < E; I += VF) { ArrayRef SubSlice = Slice.slice(I, std::min(VF, E - I)); - if (getTreeEntry(SubSlice.front())) + if (isVectorized(SubSlice.front())) continue; // Check if the subslice is to be-vectorized entry, which is not // equal to entry. @@ -7585,7 +7602,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S, DenseMap Uniques; for (Value *V : Op) { if (isa(V) || - getTreeEntry(V) || (L && L->isLoopInvariant(V))) { + isVectorized(V) || (L && L->isLoopInvariant(V))) { if (isa(V)) ++UndefCnt; continue; @@ -7603,7 +7620,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S, return none_of(Uniques, [&](const auto &P) { return P.first->hasNUsesOrMore(P.second + 1) && none_of(P.first->users(), [&](User *U) { - return getTreeEntry(U) || Uniques.contains(U); + return isVectorized(U) || Uniques.contains(U); }); }); }) || @@ -8167,59 +8184,25 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Check if this is a duplicate of another entry. if (S) { - if (TreeEntry *E = getTreeEntry(S.getMainOp())) { - LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() - << ".\n"); - if (GatheredLoadsEntriesFirst.has_value() || !E->isSame(VL)) { - auto It = MultiNodeScalars.find(S.getMainOp()); - if (It != MultiNodeScalars.end()) { - auto *TEIt = find_if(It->getSecond(), - [&](TreeEntry *ME) { return ME->isSame(VL); }); - if (TEIt != It->getSecond().end()) - E = *TEIt; - else - E = nullptr; - } else { - E = nullptr; - } - } - if (!E) { - if (!doesNotNeedToBeScheduled(S.getMainOp())) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); - if (TryToFindDuplicates(S)) - newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndices); - return; - } - SmallPtrSet Nodes; - Nodes.insert(getTreeEntry(S.getMainOp())); - for (const TreeEntry *E : MultiNodeScalars.lookup(S.getMainOp())) - Nodes.insert(E); - SmallPtrSet Values(VL.begin(), VL.end()); - if (any_of(Nodes, [&](const TreeEntry *E) { - if (all_of(E->Scalars, - [&](Value *V) { return Values.contains(V); })) - return true; - SmallPtrSet EValues(E->Scalars.begin(), - E->Scalars.end()); - return ( - all_of(VL, [&](Value *V) { return EValues.contains(V); })); - })) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n"); - if (TryToFindDuplicates(S)) - newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndices); - return; - } - } else { - // Record the reuse of the tree node. FIXME, currently this is only - // used to properly draw the graph rather than for the actual - // vectorization. + LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n"); + for (TreeEntry *E : getTreeEntries(S.getMainOp())) { + if (E->isSame(VL)) { + // Record the reuse of the tree node. E->UserTreeIndices.push_back(UserTreeIdx); LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp() << ".\n"); return; } + SmallPtrSet Values(E->Scalars.begin(), E->Scalars.end()); + if (all_of(VL, [&](Value *V) { + return isa(V) || Values.contains(V); + })) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n"); + if (TryToFindDuplicates(S)) + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndices); + return; + } } } @@ -8371,7 +8354,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if ((!IsScatterVectorizeUserTE && !isa(V)) || doesNotNeedToBeScheduled(V)) continue; - if (getTreeEntry(V)) { + if (isVectorized(V)) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V << ") is already in tree.\n"); if (TryToFindDuplicates(S)) @@ -9029,8 +9012,7 @@ bool BoUpSLP::areAllUsersVectorized( Instruction *I, const SmallDenseSet *VectorizedVals) const { return (I->hasOneUse() && (!VectorizedVals || VectorizedVals->contains(I))) || all_of(I->users(), [this](User *U) { - return ScalarToTreeEntry.contains(U) || - isVectorLikeInstWithConstOps(U) || + return isVectorized(U) || isVectorLikeInstWithConstOps(U) || (isa(U) && MustGather.contains(U)); }); } @@ -9844,13 +9826,9 @@ void BoUpSLP::transformNodes() { ArrayRef Slice = VL.slice(Cnt, VF); // If any instruction is vectorized already - do not try again. // Reuse the existing node, if it fully matches the slice. - if (const TreeEntry *SE = getTreeEntry(Slice.front()); - SE || getTreeEntry(Slice.back())) { - if (!SE) - continue; - if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) - continue; - } + if (isVectorized(Slice.front()) && + !getSameValuesTreeEntry(Slice.front(), Slice, /*SameVF=*/true)) + continue; // Constant already handled effectively - skip. if (allConstant(Slice)) continue; @@ -9933,12 +9911,8 @@ void BoUpSLP::transformNodes() { for (auto [Cnt, Sz] : Slices) { ArrayRef Slice = VL.slice(Cnt, Sz); // If any instruction is vectorized already - do not try again. - if (TreeEntry *SE = getTreeEntry(Slice.front()); - SE || getTreeEntry(Slice.back())) { - if (!SE) - continue; - if (VF != SE->getVectorFactor() || !SE->isSame(Slice)) - continue; + if (TreeEntry *SE = getSameValuesTreeEntry(Slice.front(), Slice, + /*SameVF=*/true)) { SE->UserTreeIndices.emplace_back(&E, UINT_MAX); AddCombinedNode(SE->Idx, Cnt, Sz); continue; @@ -10724,7 +10698,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { auto *EE = cast(V); VecBase = EE->getVectorOperand(); UniqueBases.insert(VecBase); - const TreeEntry *VE = R.getTreeEntry(V); + ArrayRef VEs = R.getTreeEntries(V); if (!CheckedExtracts.insert(V).second || !R.areAllUsersVectorized(cast(V), &VectorizedVals) || any_of(EE->users(), @@ -10733,7 +10707,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { !R.areAllUsersVectorized(cast(U), &VectorizedVals); }) || - (VE && VE != E)) + (!VEs.empty() && !is_contained(VEs, E))) continue; std::optional EEIdx = getExtractIndex(EE); if (!EEIdx) @@ -11166,13 +11140,14 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, const unsigned Sz = UniqueValues.size(); SmallBitVector UsedScalars(Sz, false); for (unsigned I = 0; I < Sz; ++I) { - if (isa(UniqueValues[I]) && getTreeEntry(UniqueValues[I]) == E) + if (isa(UniqueValues[I]) && + is_contained(getTreeEntries(UniqueValues[I]), E)) continue; UsedScalars.set(I); } auto GetCastContextHint = [&](Value *V) { - if (const TreeEntry *OpTE = getTreeEntry(V)) - return getCastContextHint(*OpTE); + if (ArrayRef OpTEs = getTreeEntries(V); OpTEs.size() == 1) + return getCastContextHint(*OpTEs.front()); InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI); if (SrcState && SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle()) @@ -11294,11 +11269,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, Value *Op = PHI->getIncomingValue(I); Operands[I] = Op; } - if (const TreeEntry *OpTE = getTreeEntry(Operands.front())) - if (OpTE->isSame(Operands) && CountedOps.insert(OpTE).second) - if (!OpTE->ReuseShuffleIndices.empty()) - ScalarCost += TTI::TCC_Basic * (OpTE->ReuseShuffleIndices.size() - - OpTE->Scalars.size()); + if (const TreeEntry *OpTE = + getSameValuesTreeEntry(Operands.front(), Operands)) + if (CountedOps.insert(OpTE).second && + !OpTE->ReuseShuffleIndices.empty()) + ScalarCost += TTI::TCC_Basic * (OpTE->ReuseShuffleIndices.size() - + OpTE->Scalars.size()); } return CommonCost - ScalarCost; @@ -12231,7 +12207,7 @@ InstructionCost BoUpSLP::getSpillCost() const { // Update LiveValues. LiveValues.erase(PrevInst); for (auto &J : PrevInst->operands()) { - if (isa(&*J) && getTreeEntry(&*J)) + if (isa(&*J) && isVectorized(&*J)) LiveValues.insert(cast(&*J)); } @@ -12478,9 +12454,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { continue; } if (TE.isGather() && TE.hasState()) { - if (const TreeEntry *E = getTreeEntry(TE.getMainOp()); - E && E->getVectorFactor() == TE.getVectorFactor() && - E->isSame(TE.Scalars)) { + if (const TreeEntry *E = + getSameValuesTreeEntry(TE.getMainOp(), TE.Scalars); + E && E->getVectorFactor() == TE.getVectorFactor()) { // Some gather nodes might be absolutely the same as some vectorizable // nodes after reordering, need to handle it. LLVM_DEBUG(dbgs() << "SLP: Adding cost 0 for bundle " @@ -12552,7 +12528,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { continue; std::optional InsertIdx = getElementIndex(VU); if (InsertIdx) { - const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar); + const TreeEntry *ScalarTE = &EU.E; auto *It = find_if( ShuffledInserts, [this, VU](const ShuffledInsertData &Data) { @@ -12561,7 +12537,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { return areTwoInsertFromSameBuildVector( VU, VecInsert, [this](InsertElementInst *II) -> Value * { Value *Op0 = II->getOperand(0); - if (getTreeEntry(II) && !getTreeEntry(Op0)) + if (isVectorized(II) && !isVectorized(Op0)) return nullptr; return Op0; }); @@ -12619,7 +12595,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { // for the extract and the added cost of the sign extend if needed. InstructionCost ExtraCost = TTI::TCC_Free; auto *VecTy = getWidenedType(EU.Scalar->getType(), BundleWidth); - const TreeEntry *Entry = getTreeEntry(EU.Scalar); + const TreeEntry *Entry = &EU.E; auto It = MinBWs.find(Entry); if (It != MinBWs.end()) { auto *MinTy = IntegerType::get(F->getContext(), It->second.first); @@ -12662,7 +12638,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { auto *Inst = cast(EU.Scalar); InstructionCost ScalarCost = TTI->getInstructionCost(Inst, CostKind); auto OperandIsScalar = [&](Value *V) { - if (!getTreeEntry(V)) { + if (!isVectorized(V)) { // Some extractelements might be not vectorized, but // transformed into shuffle and removed from the function, // consider it here. @@ -12678,7 +12654,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { if (auto *Op = dyn_cast(CI->getOperand(0)); Op && all_of(Op->operands(), OperandIsScalar)) { InstructionCost OpCost = - (getTreeEntry(Op) && !ValueToExtUses->contains(Op)) + (isVectorized(Op) && !ValueToExtUses->contains(Op)) ? TTI->getInstructionCost(Op, CostKind) : 0; if (ScalarCost + OpCost <= ExtraCost) { @@ -12705,7 +12681,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { cast( VectorizableTree.front()->getMainOp()) ->getParent()) && - !getTreeEntry(U); + !isVectorized(U); }) && count_if(Entry->Scalars, [&](Value *V) { return ValueToExtUses->contains(V); @@ -12767,8 +12743,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { // instead of extractelement. for (Value *V : ScalarOpsFromCasts) { ExternalUsesAsOriginalScalar.insert(V); - if (const TreeEntry *E = getTreeEntry(V)) { - ExternalUses.emplace_back(V, nullptr, E->findLaneForValue(V)); + if (ArrayRef TEs = getTreeEntries(V); !TEs.empty()) { + ExternalUses.emplace_back(V, nullptr, *TEs.front(), + TEs.front()->findLaneForValue(V)); } } // Add reduced value cost, if resized. @@ -13188,21 +13165,18 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( continue; VToTEs.insert(TEPtr); } - if (const TreeEntry *VTE = getTreeEntry(V)) { - if (ForOrder && VTE->Idx < GatheredLoadsEntriesFirst.value_or(0)) { - if (VTE->State != TreeEntry::Vectorize) { - auto It = MultiNodeScalars.find(V); - if (It == MultiNodeScalars.end()) - continue; - VTE = *It->getSecond().begin(); - // Iterate through all vectorized nodes. - auto *MIt = find_if(It->getSecond(), [](const TreeEntry *MTE) { - return MTE->State == TreeEntry::Vectorize; - }); - if (MIt == It->getSecond().end()) - continue; - VTE = *MIt; - } + if (ArrayRef VTEs = getTreeEntries(V); !VTEs.empty()) { + const TreeEntry *VTE = VTEs.front(); + if (ForOrder && VTE->Idx < GatheredLoadsEntriesFirst.value_or(0) && + VTEs.size() > 1 && VTE->State != TreeEntry::Vectorize) { + VTEs = VTEs.drop_front(); + // Iterate through all vectorized nodes. + const auto *MIt = find_if(VTEs, [](const TreeEntry *MTE) { + return MTE->State == TreeEntry::Vectorize; + }); + if (MIt == VTEs.end()) + continue; + VTE = *MIt; } if (none_of(TE->CombinedEntriesWithIndices, [&](const auto &P) { return P.first == VTE->Idx; })) { @@ -13366,7 +13340,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( // by extractelements processing) or may form vector node in future. auto MightBeIgnored = [=](Value *V) { auto *I = dyn_cast(V); - return I && !IsSplatOrUndefs && !ScalarToTreeEntry.count(I) && + return I && !IsSplatOrUndefs && !isVectorized(I) && !isVectorLikeInstWithConstOps(I) && !areAllUsersVectorized(I, UserIgnoreList) && isSimple(I); }; @@ -13952,7 +13926,7 @@ Value *BoUpSLP::gather( for (int I = 0, E = VL.size(); I < E; ++I) { if (auto *Inst = dyn_cast(VL[I])) if ((CheckPredecessor(Inst->getParent(), Builder.GetInsertBlock()) || - getTreeEntry(Inst) || + isVectorized(Inst) || (L && (!Root || L->isLoopInvariant(Root)) && L->contains(Inst))) && PostponedIndices.insert(I).second) PostponedInsts.emplace_back(Inst, I); @@ -13969,7 +13943,7 @@ Value *BoUpSLP::gather( isa_and_nonnull(CI)) { Value *Op = CI->getOperand(0); if (auto *IOp = dyn_cast(Op); - !IOp || !(isDeleted(IOp) || getTreeEntry(IOp))) + !IOp || !(isDeleted(IOp) || isVectorized(IOp))) V = Op; } Scalar = Builder.CreateIntCast( @@ -13995,7 +13969,7 @@ Value *BoUpSLP::gather( CSEBlocks.insert(InsElt->getParent()); // Add to our 'need-to-extract' list. if (isa(V)) { - if (TreeEntry *Entry = getTreeEntry(V)) { + if (ArrayRef Entries = getTreeEntries(V); !Entries.empty()) { // Find which lane we need to extract. User *UserOp = nullptr; if (Scalar != V) { @@ -14005,8 +13979,8 @@ Value *BoUpSLP::gather( UserOp = InsElt; } if (UserOp) { - unsigned FoundLane = Entry->findLaneForValue(V); - ExternalUses.emplace_back(V, UserOp, FoundLane); + unsigned FoundLane = Entries.front()->findLaneForValue(V); + ExternalUses.emplace_back(V, UserOp, *Entries.front(), FoundLane); } } } @@ -14241,8 +14215,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { continue; auto *EI = cast(VL[I]); VecBase = EI->getVectorOperand(); - if (const TreeEntry *TE = R.getTreeEntry(VecBase)) - VecBase = TE->VectorizedValue; + if (ArrayRef TEs = R.getTreeEntries(VecBase); !TEs.empty()) + VecBase = TEs.front()->VectorizedValue; assert(VecBase && "Expected vectorized value."); UniqueBases.insert(VecBase); // If the only one use is vectorized - can delete the extractelement @@ -14250,18 +14224,20 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { if (!EI->hasOneUse() || R.ExternalUsesAsOriginalScalar.contains(EI) || (NumParts != 1 && count(VL, EI) > 1) || any_of(EI->users(), [&](User *U) { - const TreeEntry *UTE = R.getTreeEntry(U); - return !UTE || R.MultiNodeScalars.contains(U) || + ArrayRef UTEs = R.getTreeEntries(U); + return UTEs.empty() || UTEs.size() > 1 || (isa(U) && !R.areAllUsersVectorized(cast(U))) || - count_if(R.VectorizableTree, - [&](const std::unique_ptr &TE) { - return any_of(TE->UserTreeIndices, - [&](const EdgeInfo &Edge) { - return Edge.UserTE == UTE; - }) && - is_contained(VL, EI); - }) != 1; + (!UTEs.empty() && + count_if(R.VectorizableTree, + [&](const std::unique_ptr &TE) { + return any_of(TE->UserTreeIndices, + [&](const EdgeInfo &Edge) { + return Edge.UserTE == + UTEs.front(); + }) && + is_contained(VL, EI); + }) != 1); })) continue; R.eraseInstruction(EI); @@ -14296,8 +14272,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { return S; Value *VecOp = cast(std::get<0>(D))->getVectorOperand(); - if (const TreeEntry *TE = R.getTreeEntry(VecOp)) - VecOp = TE->VectorizedValue; + if (ArrayRef TEs = R.getTreeEntries(VecOp); + !TEs.empty()) + VecOp = TEs.front()->VectorizedValue; assert(VecOp && "Expected vectorized value."); const unsigned Size = cast(VecOp->getType())->getNumElements(); @@ -14307,8 +14284,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { if (I == PoisonMaskElem) continue; Value *VecOp = cast(V)->getVectorOperand(); - if (const TreeEntry *TE = R.getTreeEntry(VecOp)) - VecOp = TE->VectorizedValue; + if (ArrayRef TEs = R.getTreeEntries(VecOp); !TEs.empty()) + VecOp = TEs.front()->VectorizedValue; assert(VecOp && "Expected vectorized value."); VecOp = castToScalarTyElem(VecOp); Bases[I / VF] = VecOp; @@ -14634,29 +14611,20 @@ BoUpSLP::TreeEntry *BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, if (!S) return nullptr; auto CheckSameVE = [&](const TreeEntry *VE) { - return VE->isSame(VL) && - (any_of(VE->UserTreeIndices, - [E, NodeIdx](const EdgeInfo &EI) { - return EI.UserTE == E && EI.EdgeIdx == NodeIdx; - }) || - any_of(VectorizableTree, - [E, NodeIdx, VE](const std::unique_ptr &TE) { - return TE->isOperandGatherNode( - {const_cast(E), NodeIdx}) && - VE->isSame(TE->Scalars); - })); + return any_of(VE->UserTreeIndices, + [E, NodeIdx](const EdgeInfo &EI) { + return EI.UserTE == E && EI.EdgeIdx == NodeIdx; + }) || + any_of(VectorizableTree, + [E, NodeIdx, VE](const std::unique_ptr &TE) { + return TE->isOperandGatherNode( + {const_cast(E), NodeIdx}) && + VE->isSame(TE->Scalars); + }); }; - TreeEntry *VE = getTreeEntry(S.getMainOp()); + TreeEntry *VE = getSameValuesTreeEntry(S.getMainOp(), VL); if (VE && CheckSameVE(VE)) return VE; - auto It = MultiNodeScalars.find(S.getMainOp()); - if (It != MultiNodeScalars.end()) { - auto *I = find_if(It->getSecond(), [&](const TreeEntry *TE) { - return TE != VE && CheckSameVE(TE); - }); - if (I != It->getSecond().end()) - return *I; - } return nullptr; } @@ -14874,9 +14842,10 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, for (auto [Idx, I] : enumerate(ExtractMask)) { if (I == PoisonMaskElem) continue; - if (const auto *TE = getTreeEntry( - cast(StoredGS[Idx])->getVectorOperand())) - ExtractEntries.push_back(TE); + if (ArrayRef TEs = getTreeEntries( + cast(StoredGS[Idx])->getVectorOperand()); + !TEs.empty()) + ExtractEntries.append(TEs.begin(), TEs.end()); } if (std::optional Delayed = ShuffleBuilder.needToDelay(E, ExtractEntries)) { @@ -14907,10 +14876,10 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, any_of(E->Scalars, IsaPred)) && any_of(E->Scalars, [this](Value *V) { - return isa(V) && getTreeEntry(V); + return isa(V) && isVectorized(V); })) || (E->hasState() && E->isAltShuffle()) || - all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || + all_of(E->Scalars, [this](Value *V) { return isVectorized(V); }) || isSplat(E->Scalars) || (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) { GatherShuffles = @@ -15025,7 +14994,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, // non-poisonous, or by freezing the incoming scalar value first. auto *It = find_if(Scalars, [this, E](Value *V) { return !isa(V) && - (getTreeEntry(V) || isGuaranteedNotToBePoison(V, AC) || + (isVectorized(V) || isGuaranteedNotToBePoison(V, AC) || (E->UserTreeIndices.size() == 1 && any_of(V->uses(), [E](const Use &U) { // Check if the value already used in the same operation in @@ -15083,9 +15052,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, continue; auto *EI = cast(StoredGS[I]); Value *VecOp = EI->getVectorOperand(); - if (const auto *TE = getTreeEntry(VecOp)) - if (TE->VectorizedValue) - VecOp = TE->VectorizedValue; + if (ArrayRef TEs = getTreeEntries(VecOp); + !TEs.empty() && TEs.front()->VectorizedValue) + VecOp = TEs.front()->VectorizedValue; if (!Vec1) { Vec1 = VecOp; } else if (Vec1 != VecOp) { @@ -15413,8 +15382,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { case Instruction::ExtractElement: { Value *V = E->getSingleOperand(0); - if (const TreeEntry *TE = getTreeEntry(V)) - V = TE->VectorizedValue; + if (ArrayRef TEs = getTreeEntries(V); !TEs.empty()) + V = TEs.front()->VectorizedValue; setInsertPointAfterBundle(E); V = FinalShuffle(V, E); E->VectorizedValue = V; @@ -16344,13 +16313,13 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, DenseMap> PostponedValues; for (const TreeEntry *E : PostponedNodes) { auto *TE = const_cast(E); - if (auto *VecTE = getTreeEntry(TE->Scalars.front())) - if (VecTE->isSame(TE->UserTreeIndices.front().UserTE->getOperand( - TE->UserTreeIndices.front().EdgeIdx)) && - VecTE->isSame(TE->Scalars)) - // Found gather node which is absolutely the same as one of the - // vectorized nodes. It may happen after reordering. - continue; + if (auto *VecTE = getSameValuesTreeEntry( + TE->Scalars.front(), TE->UserTreeIndices.front().UserTE->getOperand( + TE->UserTreeIndices.front().EdgeIdx)); + VecTE && VecTE->isSame(TE->Scalars)) + // Found gather node which is absolutely the same as one of the + // vectorized nodes. It may happen after reordering. + continue; auto *PrevVec = cast(TE->VectorizedValue); TE->VectorizedValue = nullptr; auto *UserI = @@ -16392,14 +16361,8 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, "Expected integer vector types only."); std::optional IsSigned; for (Value *V : TE->Scalars) { - if (const TreeEntry *BaseTE = getTreeEntry(V)) { - auto It = MinBWs.find(BaseTE); - if (It != MinBWs.end()) { - IsSigned = IsSigned.value_or(false) || It->second.second; - if (*IsSigned) - break; - } - for (const TreeEntry *MNTE : MultiNodeScalars.lookup(V)) { + if (isVectorized(V)) { + for (const TreeEntry *MNTE : getTreeEntries(V)) { auto It = MinBWs.find(MNTE); if (It != MinBWs.end()) { IsSigned = IsSigned.value_or(false) || It->second.second; @@ -16475,7 +16438,7 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, // has multiple uses of the same value. if (User && !is_contained(Scalar->users(), User)) continue; - TreeEntry *E = getTreeEntry(Scalar); + const TreeEntry *E = &ExternalUse.E; assert(E && "Invalid scalar"); assert(!E->isGather() && "Extracting from a gather list"); // Non-instruction pointers are not deleted, just skip them. @@ -16533,8 +16496,8 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, ES && isa(Vec)) { Value *V = ES->getVectorOperand(); auto *IVec = cast(Vec); - if (const TreeEntry *ETE = getTreeEntry(V)) - V = ETE->VectorizedValue; + if (ArrayRef ETEs = getTreeEntries(V); !ETEs.empty()) + V = ETEs.front()->VectorizedValue; if (auto *IV = dyn_cast(V); !IV || IV == Vec || IV->getParent() != IVec->getParent() || IV->comesBefore(IVec)) @@ -16587,27 +16550,31 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, if (!User) { if (!ScalarsWithNullptrUser.insert(Scalar).second) continue; - assert((ExternallyUsedValues.count(Scalar) || - Scalar->hasNUsesOrMore(UsesLimit) || - ExternalUsesAsOriginalScalar.contains(Scalar) || - any_of(Scalar->users(), - [&](llvm::User *U) { - if (ExternalUsesAsOriginalScalar.contains(U)) - return true; - TreeEntry *UseEntry = getTreeEntry(U); - return UseEntry && - (UseEntry->State == TreeEntry::Vectorize || - UseEntry->State == - TreeEntry::StridedVectorize) && - (E->State == TreeEntry::Vectorize || - E->State == TreeEntry::StridedVectorize) && - doesInTreeUserNeedToExtract( - Scalar, getRootEntryInstruction(*UseEntry), - TLI, TTI); - })) && - "Scalar with nullptr User must be registered in " - "ExternallyUsedValues map or remain as scalar in vectorized " - "instructions"); + assert( + (ExternallyUsedValues.count(Scalar) || + Scalar->hasNUsesOrMore(UsesLimit) || + ExternalUsesAsOriginalScalar.contains(Scalar) || + any_of( + Scalar->users(), + [&, TTI = TTI](llvm::User *U) { + if (ExternalUsesAsOriginalScalar.contains(U)) + return true; + ArrayRef UseEntries = getTreeEntries(U); + return !UseEntries.empty() && + (E->State == TreeEntry::Vectorize || + E->State == TreeEntry::StridedVectorize) && + any_of(UseEntries, [&, TTI = TTI](TreeEntry *UseEntry) { + return (UseEntry->State == TreeEntry::Vectorize || + UseEntry->State == + TreeEntry::StridedVectorize) && + doesInTreeUserNeedToExtract( + Scalar, getRootEntryInstruction(*UseEntry), + TLI, TTI); + }); + })) && + "Scalar with nullptr User must be registered in " + "ExternallyUsedValues map or remain as scalar in vectorized " + "instructions"); if (auto *VecI = dyn_cast(Vec)) { if (auto *PHI = dyn_cast(VecI)) { if (PHI->getParent()->isLandingPad()) @@ -16870,7 +16837,7 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); // It is legal to delete users in the ignorelist. - assert((getTreeEntry(U) || + assert((isVectorized(U) || (UserIgnoreList && UserIgnoreList->contains(U)) || (isa_and_nonnull(U) && isDeleted(cast(U)))) && @@ -16892,7 +16859,7 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, // Clear up reduction references, if any. if (UserIgnoreList) { for (Instruction *I : RemovedInsts) { - const TreeEntry *IE = getTreeEntry(I); + const TreeEntry *IE = getTreeEntries(I).front(); if (IE->Idx != 0 && !(VectorizableTree.front()->isGather() && !IE->UserTreeIndices.empty() && @@ -17607,10 +17574,11 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; I = I->getNextNode()) { if (ScheduleData *SD = BS->getScheduleData(I)) { - [[maybe_unused]] TreeEntry *SDTE = getTreeEntry(SD->Inst); + [[maybe_unused]] ArrayRef SDTEs = getTreeEntries(SD->Inst); assert((isVectorLikeInstWithConstOps(SD->Inst) || SD->isPartOfBundle() == - (SDTE && !doesNotNeedToSchedule(SDTE->Scalars))) && + (!SDTEs.empty() && + !doesNotNeedToSchedule(SDTEs.front()->Scalars))) && "scheduler and vectorizer bundle mismatch"); SD->FirstInBundle->SchedulingPriority = Idx++; @@ -17772,7 +17740,7 @@ bool BoUpSLP::collectValuesToDemote( auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool { if (isa(V)) return true; - if (MultiNodeScalars.contains(V)) + if (getTreeEntries(V).size() > 1) return false; // For lat shuffle of sext/zext with many uses need to check the extra bit // for unsigned values, otherwise may have incorrect casting for reused @@ -17834,14 +17802,14 @@ bool BoUpSLP::collectValuesToDemote( if (E.isGather() || !Visited.insert(&E).second || any_of(E.Scalars, [&](Value *V) { return !isa(V) && all_of(V->users(), [&](User *U) { - return isa(U) && !getTreeEntry(U); + return isa(U) && !isVectorized(U); }); })) return FinalAnalysis(); if (any_of(E.Scalars, [&](Value *V) { return !all_of(V->users(), [=](User *U) { - return getTreeEntry(U) || + return isVectorized(U) || (E.Idx == 0 && UserIgnoreList && UserIgnoreList->contains(U)) || (!isa(U) && U->getType()->isSized() && @@ -18192,9 +18160,9 @@ void BoUpSLP::computeMinimumValueSizes() { return V->hasOneUse() || isa(V) || (!V->hasNUsesOrMore(UsesLimit) && none_of(V->users(), [&](User *U) { - const TreeEntry *TE = getTreeEntry(U); + ArrayRef TEs = getTreeEntries(U); const TreeEntry *UserTE = E.UserTreeIndices.back().UserTE; - if (TE == UserTE || !TE) + if (TEs.empty() || is_contained(TEs, UserTE)) return false; if (!isa(U) || @@ -18203,8 +18171,11 @@ void BoUpSLP::computeMinimumValueSizes() { return true; unsigned UserTESz = DL->getTypeSizeInBits( UserTE->Scalars.front()->getType()); - auto It = MinBWs.find(TE); - if (It != MinBWs.end() && It->second.first > UserTESz) + if (all_of(TEs, [&](const TreeEntry *TE) { + auto It = MinBWs.find(TE); + return It != MinBWs.end() && + It->second.first > UserTESz; + })) return true; return DL->getTypeSizeInBits(U->getType()) > UserTESz; }));