Skip to content

[VPlan] Split out VPBlendRecipe simplifications from simplifyRecipes. NFC #134073

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 7, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 80 additions & 68 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -926,74 +926,6 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
using namespace llvm::VPlanPatternMatch;

if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
// Try to remove redundant blend recipes.
SmallPtrSet<VPValue *, 4> UniqueValues;
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
UniqueValues.insert(Blend->getIncomingValue(0));
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
if (!match(Blend->getMask(I), m_False()))
UniqueValues.insert(Blend->getIncomingValue(I));

if (UniqueValues.size() == 1) {
Blend->replaceAllUsesWith(*UniqueValues.begin());
Blend->eraseFromParent();
return;
}

if (Blend->isNormalized())
return;

// Normalize the blend so its first incoming value is used as the initial
// value with the others blended into it.

unsigned StartIndex = 0;
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
// If a value's mask is used only by the blend then is can be deadcoded.
// TODO: Find the most expensive mask that can be deadcoded, or a mask
// that's used by multiple blends where it can be removed from them all.
VPValue *Mask = Blend->getMask(I);
if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
StartIndex = I;
break;
}
}

SmallVector<VPValue *, 4> OperandsWithMask;
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));

for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
if (I == StartIndex)
continue;
OperandsWithMask.push_back(Blend->getIncomingValue(I));
OperandsWithMask.push_back(Blend->getMask(I));
}

auto *NewBlend = new VPBlendRecipe(
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
NewBlend->insertBefore(&R);

VPValue *DeadMask = Blend->getMask(StartIndex);
Blend->replaceAllUsesWith(NewBlend);
Blend->eraseFromParent();
recursivelyDeleteDeadRecipes(DeadMask);

/// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
VPValue *NewMask;
if (NewBlend->getNumOperands() == 3 &&
match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
VPValue *Inc0 = NewBlend->getOperand(0);
VPValue *Inc1 = NewBlend->getOperand(1);
VPValue *OldMask = NewBlend->getOperand(2);
NewBlend->setOperand(0, Inc1);
NewBlend->setOperand(1, Inc0);
NewBlend->setOperand(2, NewMask);
if (OldMask->getNumUsers() == 0)
cast<VPInstruction>(OldMask)->eraseFromParent();
}
return;
}

// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
// part 0 can be replaced by their start value, if only the first lane is
// demanded.
Expand Down Expand Up @@ -1092,6 +1024,85 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
}
}

/// Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes
/// to make sure the masks are simplified.
static void simplifyBlends(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *Blend = dyn_cast<VPBlendRecipe>(&R);
if (!Blend)
continue;

// Try to remove redundant blend recipes.
SmallPtrSet<VPValue *, 4> UniqueValues;
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
UniqueValues.insert(Blend->getIncomingValue(0));
for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
if (!match(Blend->getMask(I), m_False()))
UniqueValues.insert(Blend->getIncomingValue(I));

if (UniqueValues.size() == 1) {
Blend->replaceAllUsesWith(*UniqueValues.begin());
Blend->eraseFromParent();
continue;
}

if (Blend->isNormalized())
continue;

// Normalize the blend so its first incoming value is used as the initial
// value with the others blended into it.

unsigned StartIndex = 0;
for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
// If a value's mask is used only by the blend then is can be deadcoded.
// TODO: Find the most expensive mask that can be deadcoded, or a mask
// that's used by multiple blends where it can be removed from them all.
VPValue *Mask = Blend->getMask(I);
if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
StartIndex = I;
break;
}
}

SmallVector<VPValue *, 4> OperandsWithMask;
OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));

for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
if (I == StartIndex)
continue;
OperandsWithMask.push_back(Blend->getIncomingValue(I));
OperandsWithMask.push_back(Blend->getMask(I));
}

auto *NewBlend = new VPBlendRecipe(
cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
NewBlend->insertBefore(&R);

VPValue *DeadMask = Blend->getMask(StartIndex);
Blend->replaceAllUsesWith(NewBlend);
Blend->eraseFromParent();
recursivelyDeleteDeadRecipes(DeadMask);

/// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
VPValue *NewMask;
if (NewBlend->getNumOperands() == 3 &&
match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
VPValue *Inc0 = NewBlend->getOperand(0);
VPValue *Inc1 = NewBlend->getOperand(1);
VPValue *OldMask = NewBlend->getOperand(2);
NewBlend->setOperand(0, Inc1);
NewBlend->setOperand(1, Inc0);
NewBlend->setOperand(2, NewMask);
if (OldMask->getNumUsers() == 0)
cast<VPInstruction>(OldMask)->eraseFromParent();
}
}
}
}

/// Optimize the width of vector induction variables in \p Plan based on a known
/// constant Trip Count, \p BestVF and \p BestUF.
static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
Expand Down Expand Up @@ -1687,6 +1698,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(removeRedundantInductionCasts, Plan);

runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
runPass(simplifyBlends, Plan);
runPass(removeDeadRecipes, Plan);
runPass(legalizeAndOptimizeInductions, Plan);
runPass(removeRedundantExpandSCEVRecipes, Plan);
Expand Down
Loading