llvm · mgudim · Jan 16, 2024 · Jan 25, 2024 · Jan 25, 2024 · Jan 25, 2024
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Utils/Local.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DataLayout.h"
@@ -6100,6 +6101,91 @@ bool InstCombinerImpl::replacedSelectWithOperand(SelectInst *SI,
   return false;
 }
 
+// Try to "strengthen" the RHS of compare based on known bits.
+// For example, replace `icmp ugt %x, 14` with `icmp ugt %x, 15` when
+// it is known that the two least significant bits of `%x` is zero.
+static Instruction *strengthenICmpUsingKnownBits(ICmpInst &I,
+                                                 KnownBits Op0Known,
+                                                 KnownBits Op1Known,
-                                                 KnownBits Op0Known,
-                                                 KnownBits Op1Known,
+                                                 const KnownBits &Op0Known,
+                                                 const KnownBits &Op1Known,
-                                                 KnownBits Op0Known,
-                                                 KnownBits Op1Known,
+                                                 const KnownBits &Op0Known,
+                                                 const KnownBits &Op1Known,
+                                                 unsigned BitWidth) {
+  if (!BitWidth)
+    return nullptr;
+  if (!(Op1Known.isConstant() && Op0Known.Zero.isMask()))
+    return nullptr;
+
+  Value *Op0 = I.getOperand(0);
+  ICmpInst::Predicate Pred = I.getPredicate();
+  Type *Ty = Op0->getType();
+  APInt RHSConst = Op1Known.getConstant();
+
+  ConstantRange Op0PredRange =
+      ConstantRange::makeExactICmpRegion(Pred, RHSConst);
+  int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();
-  int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();
+  unsigned KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();
-  int KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();
+  unsigned KnownZeroMaskLength = BitWidth - Op0Known.Zero.countLeadingZeros();
+  if (KnownZeroMaskLength == 0)
+    return nullptr;
+
+  APInt PowOf2(BitWidth, 1 << KnownZeroMaskLength);
+  APInt Op0MinAccordingToPred(BitWidth, 0);
+  APInt Op0MaxAccordingToPred(BitWidth, 0);
+  APInt Op0MinRefinedByKnownBits(BitWidth, 0);
+  APInt Op0MaxRefinedByKnownBits(BitWidth, 0);
+  APInt NewLower(BitWidth, 0);
+  APInt NewUpper(BitWidth, 0);
+  bool ImprovedLower = false;
+  bool ImprovedUpper = false;
+  if (I.isSigned()) {
+    Op0MinAccordingToPred = Op0PredRange.getSignedMin();
+    Op0MaxAccordingToPred = Op0PredRange.getSignedMax();
+    // Compute the smallest number satisfying the known-bits constrained
+    // which is at greater or equal Op0MinAccordingToPred.
+    Op0MinRefinedByKnownBits =
+        PowOf2 * APIntOps::RoundingSDiv(Op0MinAccordingToPred, PowOf2,
+                                        APInt::Rounding::UP);
+    // Compute the largest number satisfying the known-bits constrained
+    // which is at less or equal Op0MaxAccordingToPred.
-    // Compute the largest number satisfying the known-bits constrained
-    // which is at less or equal Op0MaxAccordingToPred.
+    // Compute the largest number satisfying the known-bits constraints
+    // which is at less or equal to Op0MaxAccordingToPred.
-    // Compute the largest number satisfying the known-bits constrained
-    // which is at less or equal Op0MaxAccordingToPred.
+    // Compute the largest number satisfying the known-bits constraints
+    // which is at less or equal to Op0MaxAccordingToPred.
+    Op0MaxRefinedByKnownBits =
+        PowOf2 * APIntOps::RoundingSDiv(Op0MaxAccordingToPred, PowOf2,
+                                        APInt::Rounding::DOWN);
+    NewLower = APIntOps::smax(Op0MinRefinedByKnownBits, Op0MinAccordingToPred);
+    NewUpper = APIntOps::smin(Op0MaxRefinedByKnownBits, Op0MaxAccordingToPred);
+    ImprovedLower = NewLower.sgt(Op0MinAccordingToPred);
+    ImprovedUpper = NewUpper.slt(Op0MaxAccordingToPred);
+  } else {
+    Op0MinAccordingToPred = Op0PredRange.getUnsignedMin();
+    Op0MaxAccordingToPred = Op0PredRange.getUnsignedMax();
+    Op0MinRefinedByKnownBits =
+        PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,
+                                        APInt::Rounding::UP);
-        PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,
-                                        APInt::Rounding::UP);
+        PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,
+                                        APInt::Rounding::UP);
-        PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,
-                                        APInt::Rounding::UP);
+        PowOf2 * APIntOps::RoundingUDiv(Op0MinAccordingToPred, PowOf2,
+                                        APInt::Rounding::UP);
+    Op0MaxRefinedByKnownBits =
+        PowOf2 * APIntOps::RoundingUDiv(Op0MaxAccordingToPred, PowOf2,
+                                        APInt::Rounding::DOWN);
+    NewLower = APIntOps::umax(Op0MinRefinedByKnownBits, Op0MinAccordingToPred);
+    NewUpper = APIntOps::umin(Op0MaxRefinedByKnownBits, Op0MaxAccordingToPred);
+    ImprovedLower = NewLower.ugt(Op0MinAccordingToPred);
+    ImprovedUpper = NewUpper.ult(Op0MaxAccordingToPred);
+  }
+
+  // Non-strict inequalities should have been canonicalized to strict ones
+  // by now.
+  switch (Pred) {
+  default:
+    break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT: {
+    if (ImprovedUpper)
+      return new ICmpInst(Pred, Op0, ConstantInt::get(Ty, NewUpper + 1));
+    break;
+  }
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_SGT: {
+    if (ImprovedLower)
+      return new ICmpInst(Pred, Op0, ConstantInt::get(Ty, NewLower - 1));
+    break;
+  }
+  }
+  return nullptr;
+}
+
 /// Try to fold the comparison based on range information we can get by checking
 /// whether bits are known to be zero or one in the inputs.
 Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
@@ -6357,6 +6443,23 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
        (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
     return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
 
+  // if the result of compare is used only in conditional branches, try to
+  // "strengthen" the compare. This may allow us to deduce stronger results
+  // about the value involved in comparison in the blocks dominated by these branches.
+  bool AllUsesAreInBranches = true;
+  for (const Use &U : I.uses()) {
+    const Instruction *UI = cast<Instruction>(U.getUser());
+    if (!dyn_cast<BranchInst>(UI)) {
+      AllUsesAreInBranches = false;
+      break;
+    }
+  }
+  if (AllUsesAreInBranches) {
+    if (Instruction *Res =
+            strengthenICmpUsingKnownBits(I, Op0Known, Op1Known, BitWidth))
+      return Res;
+  }
+
   return nullptr;
 }
 

diff --git a/llvm/test/Transforms/InstCombine/assume-loop-align.ll b/llvm/test/Transforms/InstCombine/assume-loop-align.ll
@@ -28,7 +28,7 @@ define void @foo(ptr %a, ptr %b) #0 {
 ; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 1648
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 1633
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void

diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll
@@ -969,7 +969,7 @@ define i1 @mul_of_pow2_no_lz_other_op(i32 %x, i8 %y) {
 ; CHECK-NEXT:    [[B:%.*]] = and i32 [[X:%.*]], 2
 ; CHECK-NEXT:    [[S:%.*]] = sext i8 [[Y:%.*]] to i32
 ; CHECK-NEXT:    [[M:%.*]] = mul nuw nsw i32 [[B]], [[S]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt i32 [[M]], 254
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i32 [[M]], 255
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %b = and i32 %x, 2

diff --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll
@@ -308,7 +308,7 @@ define i1 @decrement_sgt_n1_commute_use1(i8 %px) {
 ; CHECK-NEXT:    [[X:%.*]] = mul i8 [[PX:%.*]], 42
 ; CHECK-NEXT:    [[DEC:%.*]] = add i8 [[X]], -1
 ; CHECK-NEXT:    call void @use(i8 [[DEC]])
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt i8 [[X]], 0
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt i8 [[X]], 1
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = mul i8 %px, 42 ; thwart complexity-based canonicalization

diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll
@@ -136,7 +136,7 @@ define i1 @icmp_sgt6(i8 %x) {
 
 define i1 @icmp_sgt7(i8 %x) {
 ; CHECK-LABEL: @icmp_sgt7(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], 62
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], 63
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1
@@ -224,7 +224,7 @@ define i1 @icmp_sle1(i8 %x) {
 
 define i1 @icmp_sle2(i8 %x) {
 ; CHECK-LABEL: @icmp_sle2(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], -63
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X:%.*]], -64
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl nsw i8 %x, 1

diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -1490,8 +1490,8 @@ define <2 x i1> @test70vec(<2 x i32> %X) {
 
 define i1 @icmp_sext16trunc(i32 %x) {
 ; CHECK-LABEL: @icmp_sext16trunc(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
+; CHECK-NEXT:    [[SEXT1:%.*]] = shl i32 [[X:%.*]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SEXT1]], 2293761
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %trunc = trunc i32 %x to i16
@@ -1502,8 +1502,8 @@ define i1 @icmp_sext16trunc(i32 %x) {
 
 define i1 @icmp_sext8trunc(i32 %x) {
 ; CHECK-LABEL: @icmp_sext8trunc(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
+; CHECK-NEXT:    [[SEXT1:%.*]] = shl i32 [[X:%.*]], 24
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SEXT1]], 587202561
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %trunc = trunc i32 %x to i8
@@ -1515,8 +1515,8 @@ define i1 @icmp_sext8trunc(i32 %x) {
 ; Vectors should fold the same way.
 define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_sext8trunc_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], <i8 36, i8 36>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 24, i32 24>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[TMP1]], <i32 587202561, i32 587202561>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %trunc = trunc <2 x i32> %x to <2 x i8>
@@ -1527,8 +1527,8 @@ define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {
 
 define i1 @icmp_shl16(i32 %x) {
 ; CHECK-LABEL: @icmp_shl16(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SHL]], 2293761
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl i32 %x, 16
@@ -1541,7 +1541,7 @@ define i1 @icmp_shl16(i32 %x) {
 define i1 @icmp_shl17(i32 %x) {
 ; CHECK-LABEL: @icmp_shl17(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 17
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SHL]], 2359296
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SHL]], 2228225
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl i32 %x, 17
@@ -1551,8 +1551,8 @@ define i1 @icmp_shl17(i32 %x) {
 
 define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_shl16_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], <i16 36, i16 36>
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[SHL]], <i32 2293761, i32 2293761>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> %x, <i32 16, i32 16>
@@ -1562,8 +1562,8 @@ define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) {
 
 define i1 @icmp_shl24(i32 %x) {
 ; CHECK-LABEL: @icmp_shl24(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], 24
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[SHL]], 587202561
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %shl = shl i32 %x, 24
@@ -2199,7 +2199,7 @@ define i1 @icmp_ashr_and_overshift(i8 %X) {
 define i1 @icmp_and_ashr_neg_and_legal(i8 %x) {
 ; CHECK-LABEL: @icmp_and_ashr_neg_and_legal(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %ashr = ashr i8 %x, 4
@@ -2225,7 +2225,7 @@ define i1 @icmp_and_ashr_mixed_and_shiftout(i8 %x) {
 define i1 @icmp_and_ashr_neg_cmp_slt_legal(i8 %x) {
 ; CHECK-LABEL: @icmp_and_ashr_neg_cmp_slt_legal(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], -64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], -95
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %ashr = ashr i8 %x, 4
@@ -2239,7 +2239,7 @@ define i1 @icmp_and_ashr_neg_cmp_slt_shiftout(i8 %x) {
 ; CHECK-LABEL: @icmp_and_ashr_neg_cmp_slt_shiftout(
 ; CHECK-NEXT:    [[ASHR:%.*]] = ashr i8 [[X:%.*]], 4
 ; CHECK-NEXT:    [[AND:%.*]] = and i8 [[ASHR]], -2
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[AND]], -68
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[AND]], -69
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %ashr = ashr i8 %x, 4
@@ -5183,3 +5183,121 @@ entry:
   %cmp = icmp eq i8 %add2, %add1
   ret i1 %cmp
 }
+
+define i1 @strengthen_icmp_using_known_bits_ugt(i16 %a) {
+; CHECK-LABEL: @strengthen_icmp_using_known_bits_ugt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[A:%.*]], 15
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %and_ = and i16 %a, 65532
+  %cmp = icmp ugt i16 %and_, 14
+  ret i1 %cmp
+}
+
+define i1 @strengthen_icmp_using_known_bits_ult(i16 %a) {
+; CHECK-LABEL: @strengthen_icmp_using_known_bits_ult(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND_:%.*]] = and i16 [[A:%.*]], -4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[AND_]], 17
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %and_ = and i16 %a, 65532
+  %cmp = icmp ult i16 %and_, 18
+  ret i1 %cmp
+}
+
+define i1 @strengthen_icmp_using_known_bits_sgt(i16 %a) {
+; CHECK-LABEL: @strengthen_icmp_using_known_bits_sgt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[A:%.*]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %and_ = and i16 %a, 65520
+  %cmp = icmp sgt i16 %and_, -15
+  ret i1 %cmp
+}
+
+define i1 @strengthen_icmp_using_known_bits_slt(i16 %a) {
+; CHECK-LABEL: @strengthen_icmp_using_known_bits_slt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[AND_:%.*]] = and i16 [[A:%.*]], -4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[AND_]], -15
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %and_ = and i16 %a, 65532
+  %cmp = icmp slt i16 %and_, -14
+  ret i1 %cmp
+}
+
+define i1 @dont_strengthen_icmp_in_sign_bit_check(i8 %a) {
+; CHECK-LABEL: @dont_strengthen_icmp_in_sign_bit_check(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ICMP_:%.*]] = icmp sgt i8 [[A:%.*]], -1
+; CHECK-NEXT:    ret i1 [[ICMP_]]
+;
+entry:
+  %shl_ = and i8 %a, 252
+  %icmp_ = icmp sgt i8 %shl_, -1
+  ret i1 %icmp_
+}
+
+define i8 @dont_strengthen_icmp_in_smin(i8 %a) {
+; CHECK-LABEL: @dont_strengthen_icmp_in_smin(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHL_:%.*]] = shl i8 [[A:%.*]], 2
+; CHECK-NEXT:    [[SELECT_:%.*]] = call i8 @llvm.smin.i8(i8 [[SHL_]], i8 7)
+; CHECK-NEXT:    ret i8 [[SELECT_]]
+;
+entry:
+  %shl_ = shl i8 %a, 2
+  %icmp_ = icmp slt i8 %shl_, 7
+  %select_ = select i1 %icmp_, i8 %shl_, i8 7
+  ret i8 %select_
+}
+
+define i8 @dont_strengthen_icmp_in_umin(i8 %a) {
+; CHECK-LABEL: @dont_strengthen_icmp_in_umin(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHL_:%.*]] = shl i8 [[A:%.*]], 2
+; CHECK-NEXT:    [[SELECT_:%.*]] = call i8 @llvm.umin.i8(i8 [[SHL_]], i8 7)
+; CHECK-NEXT:    ret i8 [[SELECT_]]
+;
+entry:
+  %shl_ = shl i8 %a, 2
+  %icmp_ = icmp ult i8 %shl_, 7
+  %select_ = select i1 %icmp_, i8 %shl_, i8 7
+  ret i8 %select_
+}
+
+define i8 @dont_strengthen_icmp_in_smax(i8 %a) {
+; CHECK-LABEL: @dont_strengthen_icmp_in_smax(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHL_:%.*]] = shl i8 [[A:%.*]], 2
+; CHECK-NEXT:    [[SELECT_:%.*]] = call i8 @llvm.smax.i8(i8 [[SHL_]], i8 6)
+; CHECK-NEXT:    ret i8 [[SELECT_]]
+;
+entry:
+  %shl_ = shl i8 %a, 2
+  %icmp_ = icmp sgt i8 %shl_, 6
+  %select_ = select i1 %icmp_, i8 %shl_, i8 6
+  ret i8 %select_
+}
+
+define i8 @dont_strengthen_icmp_in_umax(i8 %a) {
+; CHECK-LABEL: @dont_strengthen_icmp_in_umax(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SHL_:%.*]] = shl i8 [[A:%.*]], 2
+; CHECK-NEXT:    [[SELECT_:%.*]] = call i8 @llvm.umax.i8(i8 [[SHL_]], i8 6)
+; CHECK-NEXT:    ret i8 [[SELECT_]]
+;
+entry:
+  %shl_ = shl i8 %a, 2
+  %icmp_ = icmp ugt i8 %shl_, 6
+  %select_ = select i1 %icmp_, i8 %shl_, i8 6
+  ret i8 %select_
+}