Skip to content

[InstCombine] Replace an integer comparison of a phi node with multiple ucmp/scmp operands and a constant with phi of individual comparisons of original intrinsic's arguments #107769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 41 additions & 30 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1809,8 +1809,8 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// Check to see whether the instruction can be folded into each phi operand.
// If there is one operand that does not fold, remember the BB it is in.
SmallVector<Value *> NewPhiValues;
BasicBlock *NonSimplifiedBB = nullptr;
Value *NonSimplifiedInVal = nullptr;
SmallVector<unsigned int> OpsToMoveUseTo;
bool SeenNonSimplifiedInVal = false;
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InVal = PN->getIncomingValue(i);
BasicBlock *InBB = PN->getIncomingBlock(i);
Expand All @@ -1820,22 +1820,34 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
continue;
}

if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
// If the only use of phi is comparing it with a constant then we can
// put this comparison in the incoming BB directly after a ucmp/scmp call
// because we know that it will simplify to a single icmp.
const APInt *Ignored;
if (isa<CmpIntrinsic>(InVal) && InVal->hasOneUse() &&
match(&I, m_c_ICmp(m_Specific(PN), m_APInt(Ignored)))) {
OpsToMoveUseTo.push_back(i);
NewPhiValues.push_back(nullptr);
continue;
}

if (SeenNonSimplifiedInVal)
return nullptr; // More than one non-simplified value.
SeenNonSimplifiedInVal = true;

NonSimplifiedBB = InBB;
NonSimplifiedInVal = InVal;
NewPhiValues.push_back(nullptr);
OpsToMoveUseTo.push_back(i);

// If the InVal is an invoke at the end of the pred block, then we can't
// insert a computation after it without breaking the edge.
if (isa<InvokeInst>(InVal))
if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
if (cast<Instruction>(InVal)->getParent() == InBB)
return nullptr;

// Do not push the operation across a loop backedge. This could result in
// an infinite combine loop, and is generally non-profitable (especially
// if the operation was originally outside the loop).
if (isBackEdge(NonSimplifiedBB, PN->getParent()))
if (isBackEdge(InBB, PN->getParent()))
return nullptr;
}

Expand All @@ -1844,11 +1856,26 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// inserting the computation on some other paths (e.g. inside a loop). Only
// do this if the pred block is unconditionally branching into the phi block.
// Also, make sure that the pred block is not dead code.
if (NonSimplifiedBB != nullptr) {
BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
if (!BI || !BI->isUnconditional() ||
!DT.isReachableFromEntry(NonSimplifiedBB))
// After checking for all of the above, clone the instruction that uses the
// phi node and move it into the incoming BB because we know that the next
// iteration of InstCombine will simplify it.
for (auto OpIndex : OpsToMoveUseTo) {
Value *Op = PN->getIncomingValue(OpIndex);
BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);

BranchInst *BI = dyn_cast<BranchInst>(OpBB->getTerminator());
if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(OpBB))
return nullptr;

Instruction *Clone = I.clone();
for (Use &U : Clone->operands()) {
if (U == PN)
U = Op;
else
U = U->DoPHITranslation(PN->getParent(), OpBB);
}
Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
NewPhiValues[OpIndex] = Clone;
}

// Okay, we can do the transformation: create the new PHI node.
Expand All @@ -1857,30 +1884,14 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
NewPN->takeName(PN);
NewPN->setDebugLoc(PN->getDebugLoc());

// If we are going to have to insert a new computation, do so right before the
// predecessor's terminator.
Instruction *Clone = nullptr;
if (NonSimplifiedBB) {
Clone = I.clone();
for (Use &U : Clone->operands()) {
if (U == PN)
U = NonSimplifiedInVal;
else
U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
}
InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
}

for (unsigned i = 0; i != NumPHIValues; ++i) {
if (NewPhiValues[i])
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
else
NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
}

for (User *U : make_early_inc_range(PN->users())) {
Instruction *User = cast<Instruction>(U);
if (User == &I) continue;
if (User == &I)
continue;
replaceInstUsesWith(*User, NewPN);
eraseInstFromFunction(*User);
}
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please also add negative tests where one scmp is not one-use or the icmp operand is not constant?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The case you're proposing would still actually enable this optimization since we can still move the icmp into every incoming BB, and only one of the scmps will not be removed due to it being used more than once (in this case the BB with scmp becomes what was previously NonSimplifiedBB). I think it's better to add a negative test where both scmps are not one-use, since in that case the optimization wouldn't be trivially profitable.

Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

define i1 @icmp_of_phi_of_scmp_with_constant(i1 %c, i16 %x, i16 %y)
; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant(
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; CHECK: [[TRUE]]:
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[FALSE]]:
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[Y]], [[X]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[R:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ]
; CHECK-NEXT: ret i1 [[R]]
;
{
entry:
br i1 %c, label %true, label %false
true:
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
br label %exit
false:
%cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
br label %exit
exit:
%phi = phi i8 [%cmp1, %true], [%cmp2, %false]
%r = icmp slt i8 %phi, 0
ret i1 %r
}
Loading