diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 0e3436d12702d..362d03f912b3b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -234,8 +234,26 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) return I->getOperand(1); + // If the 'and' has only one use and that use is a return instruction, + // and the constant mask is a simple low-bit zero-extension (like 0xFF, + // 0xFFFF, etc.), then skip constant RHS shrinkage. This is because the + // backend is capable of optimizing this pattern into a single instruction + // zero-extension during codegen anyways. For example, by utilizing the eax + // register in x86. Performing the constant operand shrinkage transformation + // here may block the backend's optimization especially with assumes which + // the backend is unable to take advantage of. + bool IsReturnOnlyUse = I->hasOneUse() && isa(I->user_back()); + + const APInt *C; + bool IsZextBitmask = match(I->getOperand(1), m_APInt(C)) && + ((C->getBitWidth() >= 8 && C->isMask(8)) || + (C->getBitWidth() >= 16 && C->isMask(16)) || + (C->getBitWidth() >= 32 && C->isMask(32)) || + (C->getBitWidth() >= 64 && C->isMask(64))); + // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero)) + if (!(IsReturnOnlyUse && IsZextBitmask) && + ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero)) return I; break; diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll index f7268ec9df090..9a4144b834fe9 100644 --- a/llvm/test/Transforms/InstCombine/bswap-fold.ll +++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll @@ -838,7 +838,7 @@ define i32 @bs_active_high7(i32 %0) { define <2 x i64> @bs_active_high4(<2 x i64> %0) { ; CHECK-LABEL: @bs_active_high4( ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], splat (i64 4) -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 240) +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 255) ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %2 = shl <2 x i64> %0, diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index 9a9fec694ff0e..1e653f3201f39 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -384,7 +384,7 @@ define i64 @test_icmp_trunc2(i64 %x) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], 12 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[SEXT:%.*]] = and i64 [[X]], 2147483647 +; CHECK-NEXT: [[SEXT:%.*]] = and i64 [[X]], 4294967295 ; CHECK-NEXT: ret i64 [[SEXT]] ; CHECK: if.else: ; CHECK-NEXT: ret i64 0 @@ -408,7 +408,7 @@ define i64 @test_icmp_trunc3(i64 %n) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], 96 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[RET:%.*]] = and i64 [[N]], 127 +; CHECK-NEXT: [[RET:%.*]] = and i64 [[N]], 4294967295 ; CHECK-NEXT: ret i64 [[RET]] ; CHECK: if.else: ; CHECK-NEXT: ret i64 0 diff --git a/llvm/test/Transforms/InstCombine/trunc-shl-zext.ll b/llvm/test/Transforms/InstCombine/trunc-shl-zext.ll index 576125b86de8f..03d3ea7f1f5fa 100644 --- a/llvm/test/Transforms/InstCombine/trunc-shl-zext.ll +++ b/llvm/test/Transforms/InstCombine/trunc-shl-zext.ll @@ -7,7 +7,7 @@ define i32 @trunc_shl_zext_32(i32 %a) { ; CHECK-LABEL: define i32 @trunc_shl_zext_32 ; CHECK-SAME: (i32 [[A:%.*]]) { ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], 4 -; CHECK-NEXT: [[EXT:%.*]] = and i32 [[SHL]], 65520 +; CHECK-NEXT: [[EXT:%.*]] = and i32 [[SHL]], 65535 ; CHECK-NEXT: ret i32 [[EXT]] ; %trunc = trunc i32 %a to i16 @@ -20,7 +20,7 @@ define i64 @trunc_shl_zext_64(i64 %a) { ; CHECK-LABEL: define i64 @trunc_shl_zext_64 ; CHECK-SAME: (i64 [[A:%.*]]) { ; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 7 -; CHECK-NEXT: [[EXT:%.*]] = and i64 [[SHL]], 128 +; CHECK-NEXT: [[EXT:%.*]] = and i64 [[SHL]], 255 ; CHECK-NEXT: ret i64 [[EXT]] ; %trunc = trunc i64 %a to i8 diff --git a/llvm/test/Transforms/InstCombine/vscale.ll b/llvm/test/Transforms/InstCombine/vscale.ll index dbb5ca4bae9be..550af4c4d6ccd 100644 --- a/llvm/test/Transforms/InstCombine/vscale.ll +++ b/llvm/test/Transforms/InstCombine/vscale.ll @@ -17,7 +17,7 @@ define i64 @pomote_zext_shl_vscale_i32_to_i64() { ; CHECK-LABEL: @pomote_zext_shl_vscale_i32_to_i64( ; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[VSCALE]], 3 -; CHECK-NEXT: [[EXT:%.*]] = and i64 [[SHL]], 4294967288 +; CHECK-NEXT: [[EXT:%.*]] = and i64 [[SHL]], 4294967295 ; CHECK-NEXT: ret i64 [[EXT]] ; %vscale = call i32 @llvm.vscale.i32()