diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8eadf079d4f2f..de26ce2853c5b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46433,6 +46433,62 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, return LockOp; } +// Check whether we're just testing the signbit, and whether we can simplify +// this by tracking where the signbit came from. +static SDValue checkSignTestSetCCCombine(SDValue Cmp, X86::CondCode &CC, + SelectionDAG &DAG) { + if (CC != X86::COND_S && CC != X86::COND_NS) + return SDValue(); + + if (!Cmp.hasOneUse()) + return SDValue(); + + SDValue Src; + if (Cmp.getOpcode() == X86ISD::CMP) { + // CMP(X,0) -> signbit test + if (!isNullConstant(Cmp.getOperand(1))) + return SDValue(); + Src = Cmp.getOperand(0); + // Peek through a SRA node as we just need the signbit. + // TODO: Remove one use limit once sdiv-fix regressions are fixed. + // TODO: Use SimplifyDemandedBits instead of just SRA? + if (Src.getOpcode() != ISD::SRA || !Src.hasOneUse()) + return SDValue(); + Src = Src.getOperand(0); + } else if (Cmp.getOpcode() == X86ISD::OR) { + // OR(X,Y) -> see if only one operand contributes to the signbit. + // TODO: XOR(X,Y) -> see if only one operand contributes to the signbit. + if (DAG.SignBitIsZero(Cmp.getOperand(0))) + Src = Cmp.getOperand(1); + else if (DAG.SignBitIsZero(Cmp.getOperand(1))) + Src = Cmp.getOperand(0); + else + return SDValue(); + } else { + return SDValue(); + } + + // Replace with a TEST on the MSB. + SDLoc DL(Cmp); + MVT SrcVT = Src.getSimpleValueType(); + APInt BitMask = APInt::getSignMask(SrcVT.getScalarSizeInBits()); + + // If Src came from a SHL (probably from an expanded SIGN_EXTEND_INREG), then + // peek through and adjust the TEST bit. + if (Src.getOpcode() == ISD::SHL) { + if (std::optional ShiftAmt = DAG.getValidShiftAmount(Src)) { + Src = Src.getOperand(0); + BitMask.lshrInPlace(*ShiftAmt); + } + } + + SDValue Mask = DAG.getNode(ISD::AND, DL, SrcVT, Src, + DAG.getConstant(BitMask, DL, SrcVT)); + CC = CC == X86::COND_S ? X86::COND_NE : X86::COND_E; + return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Mask, + DAG.getConstant(0, DL, SrcVT)); +} + // Check whether a boolean test is testing a boolean value generated by // X86ISD::SETCC. If so, return the operand of that SETCC and proper condition // code. @@ -47072,6 +47128,9 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, if (SDValue Flags = combineCarryThroughADD(EFLAGS, DAG)) return Flags; + if (SDValue R = checkSignTestSetCCCombine(EFLAGS, CC, DAG)) + return R; + if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC)) return R; diff --git a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll index ec2323ac2250c..7d0c5838c1554 100644 --- a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll +++ b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll @@ -363,10 +363,9 @@ define i1 @is_posnormal_f80(x86_fp80 %x) { ; CHECK-32-NEXT: pushl %esi ; CHECK-32-NEXT: .cfi_def_cfa_offset 8 ; CHECK-32-NEXT: .cfi_offset %esi, -8 -; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; CHECK-32-NEXT: movswl %dx, %ecx -; CHECK-32-NEXT: sarl $15, %ecx ; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: movl %ecx, %edx ; CHECK-32-NEXT: andl $32767, %edx # imm = 0x7FFF ; CHECK-32-NEXT: decl %edx ; CHECK-32-NEXT: movzwl %dx, %edx @@ -374,8 +373,8 @@ define i1 @is_posnormal_f80(x86_fp80 %x) { ; CHECK-32-NEXT: cmpl $32766, %edx # imm = 0x7FFE ; CHECK-32-NEXT: sbbl %esi, %esi ; CHECK-32-NEXT: setb %dl -; CHECK-32-NEXT: testl %ecx, %ecx -; CHECK-32-NEXT: setns %cl +; CHECK-32-NEXT: testl $32768, %ecx # imm = 0x8000 +; CHECK-32-NEXT: sete %cl ; CHECK-32-NEXT: shrl $31, %eax ; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: andb %dl, %al @@ -411,10 +410,9 @@ define i1 @is_negnormal_f80(x86_fp80 %x) { ; CHECK-32-NEXT: pushl %esi ; CHECK-32-NEXT: .cfi_def_cfa_offset 8 ; CHECK-32-NEXT: .cfi_offset %esi, -8 -; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %edx -; CHECK-32-NEXT: movswl %dx, %ecx -; CHECK-32-NEXT: sarl $15, %ecx ; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-32-NEXT: movl %ecx, %edx ; CHECK-32-NEXT: andl $32767, %edx # imm = 0x7FFF ; CHECK-32-NEXT: decl %edx ; CHECK-32-NEXT: movzwl %dx, %edx @@ -422,8 +420,8 @@ define i1 @is_negnormal_f80(x86_fp80 %x) { ; CHECK-32-NEXT: cmpl $32766, %edx # imm = 0x7FFE ; CHECK-32-NEXT: sbbl %esi, %esi ; CHECK-32-NEXT: setb %dl -; CHECK-32-NEXT: testl %ecx, %ecx -; CHECK-32-NEXT: sets %cl +; CHECK-32-NEXT: testl $32768, %ecx # imm = 0x8000 +; CHECK-32-NEXT: setne %cl ; CHECK-32-NEXT: shrl $31, %eax ; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: andb %dl, %al @@ -543,24 +541,23 @@ define i1 @is_negsubnormal_f80(x86_fp80 %x) { ; CHECK-32-NEXT: .cfi_def_cfa_offset 12 ; CHECK-32-NEXT: .cfi_offset %esi, -12 ; CHECK-32-NEXT: .cfi_offset %edi, -8 -; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; CHECK-32-NEXT: movswl %cx, %eax -; CHECK-32-NEXT: sarl $15, %eax -; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-32-NEXT: movl %eax, %ecx ; CHECK-32-NEXT: andl $32767, %ecx # imm = 0x7FFF -; CHECK-32-NEXT: xorl %edx, %edx -; CHECK-32-NEXT: addl $-1, %esi -; CHECK-32-NEXT: adcl $-1, %edi -; CHECK-32-NEXT: adcl $-1, %ecx +; CHECK-32-NEXT: xorl %esi, %esi +; CHECK-32-NEXT: addl $-1, %edi ; CHECK-32-NEXT: adcl $-1, %edx -; CHECK-32-NEXT: cmpl $-1, %esi -; CHECK-32-NEXT: sbbl $2147483647, %edi # imm = 0x7FFFFFFF +; CHECK-32-NEXT: adcl $-1, %ecx +; CHECK-32-NEXT: adcl $-1, %esi +; CHECK-32-NEXT: cmpl $-1, %edi +; CHECK-32-NEXT: sbbl $2147483647, %edx # imm = 0x7FFFFFFF ; CHECK-32-NEXT: sbbl $0, %ecx -; CHECK-32-NEXT: sbbl $0, %edx +; CHECK-32-NEXT: sbbl $0, %esi ; CHECK-32-NEXT: setb %cl -; CHECK-32-NEXT: testl %eax, %eax -; CHECK-32-NEXT: sets %al +; CHECK-32-NEXT: testl $32768, %eax # imm = 0x8000 +; CHECK-32-NEXT: setne %al ; CHECK-32-NEXT: andb %cl, %al ; CHECK-32-NEXT: popl %esi ; CHECK-32-NEXT: .cfi_def_cfa_offset 8 diff --git a/llvm/test/CodeGen/X86/movmsk-bittest.ll b/llvm/test/CodeGen/X86/movmsk-bittest.ll index 7c8fe03ff4741..b67e70e71c3d5 100644 --- a/llvm/test/CodeGen/X86/movmsk-bittest.ll +++ b/llvm/test/CodeGen/X86/movmsk-bittest.ll @@ -37,18 +37,16 @@ define i32 @movmsk_slt_v2i64_1(<2 x i64> %v, i32 %a, i32 %b) { ; SSE: # %bb.0: ; SSE-NEXT: movl %edi, %eax ; SSE-NEXT: movmskpd %xmm0, %ecx -; SSE-NEXT: shlb $6, %cl -; SSE-NEXT: sarb $6, %cl -; SSE-NEXT: cmovnsl %esi, %eax +; SSE-NEXT: testb $2, %cl +; SSE-NEXT: cmovel %esi, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: movmsk_slt_v2i64_1: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vmovmskpd %xmm0, %ecx -; AVX-NEXT: shlb $6, %cl -; AVX-NEXT: sarb $6, %cl -; AVX-NEXT: cmovnsl %esi, %eax +; AVX-NEXT: testb $2, %cl +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: retq %cmp = icmp slt <2 x i64> %v, zeroinitializer %msk = bitcast <2 x i1> %cmp to i2 @@ -62,18 +60,16 @@ define i32 @movmsk_sgt_v2i64_1(<2 x i64> %v, i32 %a, i32 %b) { ; SSE: # %bb.0: ; SSE-NEXT: movl %edi, %eax ; SSE-NEXT: movmskpd %xmm0, %ecx -; SSE-NEXT: shlb $6, %cl -; SSE-NEXT: sarb $6, %cl -; SSE-NEXT: cmovsl %esi, %eax +; SSE-NEXT: testb $2, %cl +; SSE-NEXT: cmovnel %esi, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: movmsk_sgt_v2i64_1: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vmovmskpd %xmm0, %ecx -; AVX-NEXT: shlb $6, %cl -; AVX-NEXT: sarb $6, %cl -; AVX-NEXT: cmovsl %esi, %eax +; AVX-NEXT: testb $2, %cl +; AVX-NEXT: cmovnel %esi, %eax ; AVX-NEXT: retq %cmp = icmp slt <2 x i64> %v, zeroinitializer %msk = bitcast <2 x i1> %cmp to i2 @@ -111,18 +107,16 @@ define i32 @movmsk_slt_v4i32_3(<4 x i32> %v, i32 %a, i32 %b) { ; SSE: # %bb.0: ; SSE-NEXT: movl %edi, %eax ; SSE-NEXT: movmskps %xmm0, %ecx -; SSE-NEXT: shlb $4, %cl -; SSE-NEXT: sarb $4, %cl -; SSE-NEXT: cmovnsl %esi, %eax +; SSE-NEXT: testb $8, %cl +; SSE-NEXT: cmovel %esi, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: movmsk_slt_v4i32_3: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vmovmskps %xmm0, %ecx -; AVX-NEXT: shlb $4, %cl -; AVX-NEXT: sarb $4, %cl -; AVX-NEXT: cmovnsl %esi, %eax +; AVX-NEXT: testb $8, %cl +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: retq %cmp = icmp slt <4 x i32> %v, zeroinitializer %msk = bitcast <4 x i1> %cmp to i4 @@ -136,18 +130,16 @@ define i32 @movmsk_sgt_v4i32_3(<4 x i32> %v, i32 %a, i32 %b) { ; SSE: # %bb.0: ; SSE-NEXT: movl %edi, %eax ; SSE-NEXT: movmskps %xmm0, %ecx -; SSE-NEXT: shlb $4, %cl -; SSE-NEXT: sarb $4, %cl -; SSE-NEXT: cmovsl %esi, %eax +; SSE-NEXT: testb $8, %cl +; SSE-NEXT: cmovnel %esi, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: movmsk_sgt_v4i32_3: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vmovmskps %xmm0, %ecx -; AVX-NEXT: shlb $4, %cl -; AVX-NEXT: sarb $4, %cl -; AVX-NEXT: cmovsl %esi, %eax +; AVX-NEXT: testb $8, %cl +; AVX-NEXT: cmovnel %esi, %eax ; AVX-NEXT: retq %cmp = icmp slt <4 x i32> %v, zeroinitializer %msk = bitcast <4 x i1> %cmp to i4 @@ -256,20 +248,17 @@ define i32 @movmsk_slt_v4i64_3(<4 x i64> %v, i32 %a, i32 %b) { ; SSE-LABEL: movmsk_slt_v4i64_3: ; SSE: # %bb.0: ; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; SSE-NEXT: movmskps %xmm0, %ecx -; SSE-NEXT: shlb $4, %cl -; SSE-NEXT: sarb $4, %cl -; SSE-NEXT: cmovnsl %esi, %eax +; SSE-NEXT: movmskps %xmm1, %ecx +; SSE-NEXT: testb $8, %cl +; SSE-NEXT: cmovel %esi, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: movmsk_slt_v4i64_3: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vmovmskpd %ymm0, %ecx -; AVX-NEXT: shlb $4, %cl -; AVX-NEXT: sarb $4, %cl -; AVX-NEXT: cmovnsl %esi, %eax +; AVX-NEXT: testb $8, %cl +; AVX-NEXT: cmovel %esi, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq %cmp = icmp slt <4 x i64> %v, zeroinitializer @@ -283,20 +272,17 @@ define i32 @movmsk_sgt_v4i64_3(<4 x i64> %v, i32 %a, i32 %b) { ; SSE-LABEL: movmsk_sgt_v4i64_3: ; SSE: # %bb.0: ; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; SSE-NEXT: movmskps %xmm0, %ecx -; SSE-NEXT: shlb $4, %cl -; SSE-NEXT: sarb $4, %cl -; SSE-NEXT: cmovsl %esi, %eax +; SSE-NEXT: movmskps %xmm1, %ecx +; SSE-NEXT: testb $8, %cl +; SSE-NEXT: cmovnel %esi, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: movmsk_sgt_v4i64_3: ; AVX: # %bb.0: ; AVX-NEXT: movl %edi, %eax ; AVX-NEXT: vmovmskpd %ymm0, %ecx -; AVX-NEXT: shlb $4, %cl -; AVX-NEXT: sarb $4, %cl -; AVX-NEXT: cmovsl %esi, %eax +; AVX-NEXT: testb $8, %cl +; AVX-NEXT: cmovnel %esi, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq %cmp = icmp slt <4 x i64> %v, zeroinitializer @@ -487,22 +473,18 @@ define i32 @movmsk_slt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) { ; SSE-LABEL: movmsk_slt_v32i8_31: ; SSE: # %bb.0: ; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: pmovmskb %xmm0, %ecx -; SSE-NEXT: pmovmskb %xmm1, %edx -; SSE-NEXT: shll $16, %edx -; SSE-NEXT: orl %ecx, %edx -; SSE-NEXT: cmovnsl %esi, %eax +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: testl $32768, %ecx # imm = 0x8000 +; SSE-NEXT: cmovel %esi, %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: movmsk_slt_v32i8_31: ; AVX1: # %bb.0: ; AVX1-NEXT: movl %edi, %eax -; AVX1-NEXT: vpmovmskb %xmm0, %ecx ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %edx -; AVX1-NEXT: shll $16, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: cmovnsl %esi, %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: testl $32768, %ecx # imm = 0x8000 +; AVX1-NEXT: cmovel %esi, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -534,22 +516,18 @@ define i32 @movmsk_sgt_v32i8_31(<32 x i8> %v, i32 %a, i32 %b) { ; SSE-LABEL: movmsk_sgt_v32i8_31: ; SSE: # %bb.0: ; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: pmovmskb %xmm0, %ecx -; SSE-NEXT: pmovmskb %xmm1, %edx -; SSE-NEXT: shll $16, %edx -; SSE-NEXT: orl %ecx, %edx -; SSE-NEXT: cmovsl %esi, %eax +; SSE-NEXT: pmovmskb %xmm1, %ecx +; SSE-NEXT: testl $32768, %ecx # imm = 0x8000 +; SSE-NEXT: cmovnel %esi, %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: movmsk_sgt_v32i8_31: ; AVX1: # %bb.0: ; AVX1-NEXT: movl %edi, %eax -; AVX1-NEXT: vpmovmskb %xmm0, %ecx ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmovmskb %xmm0, %edx -; AVX1-NEXT: shll $16, %edx -; AVX1-NEXT: orl %ecx, %edx -; AVX1-NEXT: cmovsl %esi, %eax +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: testl $32768, %ecx # imm = 0x8000 +; AVX1-NEXT: cmovnel %esi, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ;