Skip to content

[InstCombineCompares] Try to "strengthen" compares based on known bits. #79405

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
103 changes: 103 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
Expand Down Expand Up @@ -6100,6 +6101,91 @@ bool InstCombinerImpl::replacedSelectWithOperand(SelectInst *SI,
return false;
}

// Try to "strengthen" the RHS of compare based on known bits.
// For example, replace `icmp ugt %x, 14` with `icmp ugt %x, 15` when
// it is known that the two least significant bits of `%x` is zero.
static Instruction *strengthenICmpUsingKnownBits(ICmpInst &I,
KnownBits Op0Known,
KnownBits Op1Known,
Comment on lines +6108 to +6109
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
KnownBits Op0Known,
KnownBits Op1Known,
const KnownBits &Op0Known,
const KnownBits &Op1Known,

unsigned BitWidth) {
if (!BitWidth)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BitWidth is always non-zero.

return nullptr;
if (!(Op1Known.isConstant() && Op0Known.Zero.isMask()))
return nullptr;

Value *Op0 = I.getOperand(0);
ICmpInst::Predicate Pred = I.getPredicate();
Type *Ty = Op0->getType();
APInt RHSConst = Op1Known.getConstant();

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can add a check here to avoid breaking the SPF.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dtcxzyw Added a check not to break any select patterns.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dtcxzyw Added a check not to break any select patterns.

Tests?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dtcxzyw Added a check not to break any select patterns.

Emm, it seems like some regressions are still there :(
dtcxzyw/llvm-opt-benchmark#148 (comment)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dtcxzyw I just added test for select patterns and sign-check pattern. Everything works as intended. I tried your specific example and the pattern's wasn't broken. There must be something else happening in that code. Can you double check
that the degradation is still there? If yes, can you provide the reduced test case?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dtcxzyw I just added test for select patterns and sign-check pattern. Everything works as intended. I tried your specific example and the pattern's wasn't broken. There must be something else happening in that code. Can you double check that the degradation is still there? If yes, can you provide the reduced test case?

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE = constant i64 67108864

define ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr %this, i64 %0) {
entry:
  %mul = shl i64 %0, 1
  store i64 %mul, ptr %this, align 8
  %call = call ptr @_ZSt3minImERKT_S2_S2_(ptr %this, ptr @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE)
  %1 = load i64, ptr %call, align 8
  store i64 %1, ptr %this, align 8
  ret ptr null
}

define ptr @_ZSt3minImERKT_S2_S2_(ptr %__a, ptr %__b) {
entry:
  %0 = load i64, ptr %__b, align 8
  %1 = load i64, ptr %__a, align 8
  %cmp = icmp ult i64 %0, %1
  %__b.__a = select i1 %cmp, ptr %__b, ptr %__a
  ret ptr %__b.__a
}

Baseline (-O3):

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE = local_unnamed_addr constant i64 67108864

; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: write)
define noalias ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr nocapture writeonly %this, i64 %0) local_unnamed_addr #0 {
entry:
  %mul = shl i64 %0, 1
  %1 = tail call i64 @llvm.umin.i64(i64 %mul, i64 67108864)
  store i64 %1, ptr %this, align 8
  ret ptr null
}

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
define ptr @_ZSt3minImERKT_S2_S2_(ptr readonly %__a, ptr readonly %__b) local_unnamed_addr #1 {
entry:
  %0 = load i64, ptr %__b, align 8
  %1 = load i64, ptr %__a, align 8
  %cmp = icmp ult i64 %0, %1
  %__b.__a = select i1 %cmp, ptr %__b, ptr %__a
  ret ptr %__b.__a
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.umin.i64(i64, i64) #2

attributes #0 = { mustprogress nofree nosync nounwind willreturn memory(argmem: write) }
attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

After this patch:

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE = local_unnamed_addr constant i64 67108864

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
define noalias noundef ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr nocapture writeonly %this, i64 %0) local_unnamed_addr #0 {
entry:
  %mul = shl i64 %0, 1
  %cmp.i = icmp ugt i64 %mul, 67108865
  %1 = select i1 %cmp.i, i64 67108864, i64 %mul
  store i64 %1, ptr %this, align 8
  ret ptr null
}

; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
define ptr @_ZSt3minImERKT_S2_S2_(ptr readonly %__a, ptr readonly %__b) local_unnamed_addr #1 {
entry:
  %0 = load i64, ptr %__b, align 8
  %1 = load i64, ptr %__a, align 8
  %cmp = icmp ult i64 %0, %1
  %__b.__a = select i1 %cmp, ptr %__b, ptr %__a
  ret ptr %__b.__a
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) }
attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) }

It looks like a phase ordering problem :(

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After inlining the call %call = call ptr @_ZSt3minImERKT_S2_S2_(ptr %this, ptr @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE) the code looks like this:

; *** IR Dump After InlinerPass on (_ZN6Assimp14StackAllocator8AllocateEm) ***
define ptr @_ZN6Assimp14StackAllocator8AllocateEm(ptr %this, i64 %0) local_unnamed_addr {
entry:
  %mul = shl i64 %0, 1
  store i64 %mul, ptr %this, align 8
  %1 = load i64, ptr %this, align 8
  %cmp.i = icmp ult i64 67108864, %1
  %__b.__a.i = select i1 %cmp.i, ptr @_ZN6Assimp14StackAllocator18g_maxBytesPerBlockE, ptr %this
  %2 = load i64, ptr %__b.__a.i, align 8
  store i64 %2, ptr %this, align 8
  ret ptr null
}

so the constant _ZN6Assimp14StackAllocator18g_maxBytesPerBlockE was replaced with its value in icmp instruction, but not in the select. Why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, the select can only be simplified after inlining and combining it with load.

ConstantRange Op0PredRange =
ConstantRange::makeExactICmpRegion(Pred, RHSConst);
int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();
unsigned KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();

if (KnownZeroMaskLength == 0)
return nullptr;

APInt PowOf2(BitWidth, 1 << KnownZeroMaskLength);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use APInt::getOneBitSet instead. Please also add an i128 test.

APInt Op0MinAccordingToPred(BitWidth, 0);
APInt Op0MaxAccordingToPred(BitWidth, 0);
APInt Op0MinRefinedByKnownBits(BitWidth, 0);
APInt Op0MaxRefinedByKnownBits(BitWidth, 0);
APInt NewLower(BitWidth, 0);
APInt NewUpper(BitWidth, 0);
bool ImprovedLower = false;
bool ImprovedUpper = false;
if (I.isSigned()) {
Op0MinAccordingToPred = Op0PredRange.getSignedMin();
Op0MaxAccordingToPred = Op0PredRange.getSignedMax();
// Compute the smallest number satisfying the known-bits constrained
// which is at greater or equal Op0MinAccordingToPred.
Op0MinRefinedByKnownBits =
PowOf2 * APIntOps::RoundingSDiv(Op0MinAccordingToPred, PowOf2,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the compile-time impact of this patch?

APInt::Rounding::UP);
// Compute the largest number satisfying the known-bits constrained
// which is at less or equal Op0MaxAccordingToPred.
Comment on lines +6144 to +6145
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Compute the largest number satisfying the known-bits constrained
// which is at less or equal Op0MaxAccordingToPred.
// Compute the largest number satisfying the known-bits constraints
// which is at less or equal to Op0MaxAccordingToPred.

Op0MaxRefinedByKnownBits =
PowOf2 * APIntOps::RoundingSDiv(Op0MaxAccordingToPred, PowOf2,
APInt::Rounding::DOWN);
NewLower = APIntOps::smax(Op0MinRefinedByKnownBits, Op0MinAccordingToPred);
NewUpper = APIntOps::smin(Op0MaxRefinedByKnownBits, Op0MaxAccordingToPred);
ImprovedLower = NewLower.sgt(Op0MinAccordingToPred);
ImprovedUpper = NewUpper.slt(Op0MaxAccordingToPred);
} else {
Op0MinAccordingToPred = Op0PredRange.getUnsignedMin();
Op0MaxAccordingToPred = Op0PredRange.getUnsignedMax();
Op0MinRefinedByKnownBits =
PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,
APInt::Rounding::UP);
Comment on lines +6157 to +6158
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,
APInt::Rounding::UP);
PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,
APInt::Rounding::UP);

Can the multiplication overflow?

Op0MaxRefinedByKnownBits =
PowOf2 * APIntOps::RoundingUDiv(Op0MaxAccordingToPred, PowOf2,
APInt::Rounding::DOWN);
NewLower = APIntOps::umax(Op0MinRefinedByKnownBits, Op0MinAccordingToPred);
NewUpper = APIntOps::umin(Op0MaxRefinedByKnownBits, Op0MaxAccordingToPred);
ImprovedLower = NewLower.ugt(Op0MinAccordingToPred);
ImprovedUpper = NewUpper.ult(Op0MaxAccordingToPred);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of creating this custom bounds, why not use ConstantRange?


// Non-strict inequalities should have been canonicalized to strict ones
// by now.
switch (Pred) {
default:
break;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can early exit when the predicate is not signed.

case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_SLT: {
if (ImprovedUpper)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid refining lower when the predicate is ult/slt?

return new ICmpInst(Pred, Op0, ConstantInt::get(Ty, NewUpper + 1));
break;
}
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_SGT: {
if (ImprovedLower)
return new ICmpInst(Pred, Op0, ConstantInt::get(Ty, NewLower - 1));
break;
}
}
return nullptr;
}

/// Try to fold the comparison based on range information we can get by checking
/// whether bits are known to be zero or one in the inputs.
Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
Expand Down Expand Up @@ -6357,6 +6443,23 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
(Op0Known.One.isNegative() && Op1Known.One.isNegative())))
return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);

// if the result of compare is used only in conditional branches, try to
// "strengthen" the compare. This may allow us to deduce stronger results
// about the value involved in comparison in the blocks dominated by these branches.
bool AllUsesAreInBranches = true;
for (const Use &U : I.uses()) {
const Instruction *UI = cast<Instruction>(U.getUser());
if (!dyn_cast<BranchInst>(UI)) {
AllUsesAreInBranches = false;
break;
}
}
if (AllUsesAreInBranches) {
if (Instruction *Res =
strengthenICmpUsingKnownBits(I, Op0Known, Op1Known, BitWidth))
return Res;
}

return nullptr;
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/InstCombine/assume-loop-align.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ define void @foo(ptr %a, ptr %b) #0 {
; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 1648
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 1633
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/InstCombine/icmp-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -969,7 +969,7 @@ define i1 @mul_of_pow2_no_lz_other_op(i32 %x, i8 %y) {
; CHECK-NEXT: [[B:%.*]] = and i32 [[X:%.*]], 2
; CHECK-NEXT: [[S:%.*]] = sext i8 [[Y:%.*]] to i32
; CHECK-NEXT: [[M:%.*]] = mul nuw nsw i32 [[B]], [[S]]
; CHECK-NEXT: [[R:%.*]] = icmp sgt i32 [[M]], 254
; CHECK-NEXT: [[R:%.*]] = icmp sgt i32 [[M]], 255
; CHECK-NEXT: ret i1 [[R]]
;
%b = and i32 %x, 2
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/InstCombine/icmp-or.ll
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ define i1 @decrement_sgt_n1_commute_use1(i8 %px) {
; CHECK-NEXT: [[X:%.*]] = mul i8 [[PX:%.*]], 42
; CHECK-NEXT: [[DEC:%.*]] = add i8 [[X]], -1
; CHECK-NEXT: call void @use(i8 [[DEC]])
; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[X]], 0
; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[X]], 1
; CHECK-NEXT: ret i1 [[R]]
;
%x = mul i8 %px, 42 ; thwart complexity-based canonicalization
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ define i1 @icmp_sgt6(i8 %x) {

define i1 @icmp_sgt7(i8 %x) {
; CHECK-LABEL: @icmp_sgt7(
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], 62
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 63
; CHECK-NEXT: ret i1 [[CMP]]
;
%shl = shl nsw i8 %x, 1
Expand Down Expand Up @@ -224,7 +224,7 @@ define i1 @icmp_sle1(i8 %x) {

define i1 @icmp_sle2(i8 %x) {
; CHECK-LABEL: @icmp_sle2(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X:%.*]], -63
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], -64
; CHECK-NEXT: ret i1 [[CMP]]
;
%shl = shl nsw i8 %x, 1
Expand Down
150 changes: 134 additions & 16 deletions llvm/test/Transforms/InstCombine/icmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1490,8 +1490,8 @@ define <2 x i1> @test70vec(<2 x i32> %X) {

define i1 @icmp_sext16trunc(i32 %x) {
; CHECK-LABEL: @icmp_sext16trunc(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
; CHECK-NEXT: [[SEXT1:%.*]] = shl i32 [[X:%.*]], 16
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SEXT1]], 2293761
; CHECK-NEXT: ret i1 [[CMP]]
;
%trunc = trunc i32 %x to i16
Expand All @@ -1502,8 +1502,8 @@ define i1 @icmp_sext16trunc(i32 %x) {

define i1 @icmp_sext8trunc(i32 %x) {
; CHECK-LABEL: @icmp_sext8trunc(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
; CHECK-NEXT: [[SEXT1:%.*]] = shl i32 [[X:%.*]], 24
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SEXT1]], 587202561
; CHECK-NEXT: ret i1 [[CMP]]
;
%trunc = trunc i32 %x to i8
Expand All @@ -1515,8 +1515,8 @@ define i1 @icmp_sext8trunc(i32 %x) {
; Vectors should fold the same way.
define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {
; CHECK-LABEL: @icmp_sext8trunc_vec(
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], <i8 36, i8 36>
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 24, i32 24>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[TMP1]], <i32 587202561, i32 587202561>
; CHECK-NEXT: ret <2 x i1> [[CMP]]
;
%trunc = trunc <2 x i32> %x to <2 x i8>
Expand All @@ -1527,8 +1527,8 @@ define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {

define i1 @icmp_shl16(i32 %x) {
; CHECK-LABEL: @icmp_shl16(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 16
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SHL]], 2293761
; CHECK-NEXT: ret i1 [[CMP]]
;
%shl = shl i32 %x, 16
Expand All @@ -1541,7 +1541,7 @@ define i1 @icmp_shl16(i32 %x) {
define i1 @icmp_shl17(i32 %x) {
; CHECK-LABEL: @icmp_shl17(
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 17
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SHL]], 2359296
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SHL]], 2228225
; CHECK-NEXT: ret i1 [[CMP]]
;
%shl = shl i32 %x, 17
Expand All @@ -1551,8 +1551,8 @@ define i1 @icmp_shl17(i32 %x) {

define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) {
; CHECK-LABEL: @icmp_shl16_vec(
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], <i16 36, i16 36>
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 16, i32 16>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i32> [[SHL]], <i32 2293761, i32 2293761>
; CHECK-NEXT: ret <2 x i1> [[CMP]]
;
%shl = shl <2 x i32> %x, <i32 16, i32 16>
Expand All @@ -1562,8 +1562,8 @@ define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) {

define i1 @icmp_shl24(i32 %x) {
; CHECK-LABEL: @icmp_shl24(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], 24
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[SHL]], 587202561
; CHECK-NEXT: ret i1 [[CMP]]
;
%shl = shl i32 %x, 24
Expand Down Expand Up @@ -2199,7 +2199,7 @@ define i1 @icmp_ashr_and_overshift(i8 %X) {
define i1 @icmp_and_ashr_neg_and_legal(i8 %x) {
; CHECK-LABEL: @icmp_and_ashr_neg_and_legal(
; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -32
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 16
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 1
; CHECK-NEXT: ret i1 [[CMP]]
;
%ashr = ashr i8 %x, 4
Expand All @@ -2225,7 +2225,7 @@ define i1 @icmp_and_ashr_mixed_and_shiftout(i8 %x) {
define i1 @icmp_and_ashr_neg_cmp_slt_legal(i8 %x) {
; CHECK-LABEL: @icmp_and_ashr_neg_cmp_slt_legal(
; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -32
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], -64
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], -95
; CHECK-NEXT: ret i1 [[CMP]]
;
%ashr = ashr i8 %x, 4
Expand All @@ -2239,7 +2239,7 @@ define i1 @icmp_and_ashr_neg_cmp_slt_shiftout(i8 %x) {
; CHECK-LABEL: @icmp_and_ashr_neg_cmp_slt_shiftout(
; CHECK-NEXT: [[ASHR:%.*]] = ashr i8 [[X:%.*]], 4
; CHECK-NEXT: [[AND:%.*]] = and i8 [[ASHR]], -2
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[AND]], -68
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[AND]], -69
; CHECK-NEXT: ret i1 [[CMP]]
;
%ashr = ashr i8 %x, 4
Expand Down Expand Up @@ -5183,3 +5183,121 @@ entry:
%cmp = icmp eq i8 %add2, %add1
ret i1 %cmp
}

define i1 @strengthen_icmp_using_known_bits_ugt(i16 %a) {
; CHECK-LABEL: @strengthen_icmp_using_known_bits_ugt(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[A:%.*]], 15
; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
%and_ = and i16 %a, 65532
%cmp = icmp ugt i16 %and_, 14
ret i1 %cmp
}

define i1 @strengthen_icmp_using_known_bits_ult(i16 %a) {
; CHECK-LABEL: @strengthen_icmp_using_known_bits_ult(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND_:%.*]] = and i16 [[A:%.*]], -4
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[AND_]], 17
; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
%and_ = and i16 %a, 65532
%cmp = icmp ult i16 %and_, 18
ret i1 %cmp
}

define i1 @strengthen_icmp_using_known_bits_sgt(i16 %a) {
; CHECK-LABEL: @strengthen_icmp_using_known_bits_sgt(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[A:%.*]], -1
; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
%and_ = and i16 %a, 65520
%cmp = icmp sgt i16 %and_, -15
ret i1 %cmp
}

define i1 @strengthen_icmp_using_known_bits_slt(i16 %a) {
; CHECK-LABEL: @strengthen_icmp_using_known_bits_slt(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND_:%.*]] = and i16 [[A:%.*]], -4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[AND_]], -15
; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
%and_ = and i16 %a, 65532
%cmp = icmp slt i16 %and_, -14
ret i1 %cmp
}

define i1 @dont_strengthen_icmp_in_sign_bit_check(i8 %a) {
; CHECK-LABEL: @dont_strengthen_icmp_in_sign_bit_check(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ICMP_:%.*]] = icmp sgt i8 [[A:%.*]], -1
; CHECK-NEXT: ret i1 [[ICMP_]]
;
entry:
%shl_ = and i8 %a, 252
%icmp_ = icmp sgt i8 %shl_, -1
ret i1 %icmp_
}

define i8 @dont_strengthen_icmp_in_smin(i8 %a) {
; CHECK-LABEL: @dont_strengthen_icmp_in_smin(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SHL_:%.*]] = shl i8 [[A:%.*]], 2
; CHECK-NEXT: [[SELECT_:%.*]] = call i8 @llvm.smin.i8(i8 [[SHL_]], i8 7)
; CHECK-NEXT: ret i8 [[SELECT_]]
;
entry:
%shl_ = shl i8 %a, 2
%icmp_ = icmp slt i8 %shl_, 7
%select_ = select i1 %icmp_, i8 %shl_, i8 7
ret i8 %select_
}

define i8 @dont_strengthen_icmp_in_umin(i8 %a) {
; CHECK-LABEL: @dont_strengthen_icmp_in_umin(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SHL_:%.*]] = shl i8 [[A:%.*]], 2
; CHECK-NEXT: [[SELECT_:%.*]] = call i8 @llvm.umin.i8(i8 [[SHL_]], i8 7)
; CHECK-NEXT: ret i8 [[SELECT_]]
;
entry:
%shl_ = shl i8 %a, 2
%icmp_ = icmp ult i8 %shl_, 7
%select_ = select i1 %icmp_, i8 %shl_, i8 7
ret i8 %select_
}

define i8 @dont_strengthen_icmp_in_smax(i8 %a) {
; CHECK-LABEL: @dont_strengthen_icmp_in_smax(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SHL_:%.*]] = shl i8 [[A:%.*]], 2
; CHECK-NEXT: [[SELECT_:%.*]] = call i8 @llvm.smax.i8(i8 [[SHL_]], i8 6)
; CHECK-NEXT: ret i8 [[SELECT_]]
;
entry:
%shl_ = shl i8 %a, 2
%icmp_ = icmp sgt i8 %shl_, 6
%select_ = select i1 %icmp_, i8 %shl_, i8 6
ret i8 %select_
}

define i8 @dont_strengthen_icmp_in_umax(i8 %a) {
; CHECK-LABEL: @dont_strengthen_icmp_in_umax(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SHL_:%.*]] = shl i8 [[A:%.*]], 2
; CHECK-NEXT: [[SELECT_:%.*]] = call i8 @llvm.umax.i8(i8 [[SHL_]], i8 6)
; CHECK-NEXT: ret i8 [[SELECT_]]
;
entry:
%shl_ = shl i8 %a, 2
%icmp_ = icmp ugt i8 %shl_, 6
%select_ = select i1 %icmp_, i8 %shl_, i8 6
ret i8 %select_
}
Loading