From be559fe2224c2dd48c55f39e7e925b9ec262b62c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Jun 2024 17:51:33 +0100 Subject: [PATCH 1/2] [X86] combine-abs.ll - add ABS test coverage for #94344 --- llvm/test/CodeGen/X86/combine-abs.ll | 75 ++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index 202c88109eaeb..e639f6645b2cd 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -201,6 +201,81 @@ define <8 x i32> @combine_v8i32_abs_pos(<8 x i32> %a) { ret <8 x i32> %2 } +; TODO: (abs x) upper bits are known zero if x has extra sign bits +define i32 @combine_i32_abs_zerosign(i32 %a) { +; CHECK-LABEL: combine_i32_abs_zerosign: +; CHECK: # %bb.0: +; CHECK-NEXT: sarl $15, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: cmovsl %edi, %eax +; CHECK-NEXT: andl $-524288, %eax # imm = 0xFFF80000 +; CHECK-NEXT: retq + %1 = ashr i32 %a, 15 + %2 = call i32 @llvm.abs.i32(i32 %1, i1 false) + %3 = and i32 %2, -524288 ; 0xFFF80000 + ret i32 %3 +} + +define <8 x i16> @combine_v8i16_abs_zerosign(<8 x i16> %a) { +; SSE2-LABEL: combine_v8i16_abs_zerosign: +; SSE2: # %bb.0: +; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: psubw %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: combine_v8i16_abs_zerosign: +; SSE42: # %bb.0: +; SSE42-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE42-NEXT: pabsw %xmm0, %xmm0 +; SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE42-NEXT: retq +; +; AVX2-LABEL: combine_v8i16_abs_zerosign: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpabsw %xmm0, %xmm0 +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: combine_v8i16_abs_zerosign: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpabsw %xmm0, %xmm0 +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: combine_v8i16_abs_zerosign: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpabsw %xmm0, %xmm0 +; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: retq + %1 = ashr <8 x i16> %a, + %2 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %1, i1 false) + %3 = and <8 x i16> %2, + ret <8 x i16> %3 +} + +; negative test - mask extends beyond known zero bits +define i32 @combine_i32_abs_zerosign_negative(i32 %a) { +; CHECK-LABEL: combine_i32_abs_zerosign_negative: +; CHECK: # %bb.0: +; CHECK-NEXT: sarl $3, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: cmovsl %edi, %eax +; CHECK-NEXT: andl $-524288, %eax # imm = 0xFFF80000 +; CHECK-NEXT: retq + %1 = ashr i32 %a, 3 + %2 = call i32 @llvm.abs.i32(i32 %1, i1 false) + %3 = and i32 %2, -524288 ; 0xFFF80000 + ret i32 %3 +} + declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) nounwind readnone declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) nounwind readnone declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) nounwind readnone From 4c7834edcebc6df8f850e12817d7f8c5def860e5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 4 Jun 2024 17:57:05 +0100 Subject: [PATCH 2/2] [DAG] computeKnownBits - abs(x) will be zero in the upper bits if x is sign-extended As reported on #94344 - if x has more than one signbit, then the upper bits of its absolute value are guaranteed to be zero Alive2: https://alive2.llvm.org/ce/z/a87fHU --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 + llvm/test/CodeGen/X86/combine-abs.ll | 53 ++++--------------- 2 files changed, 13 insertions(+), 42 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 414c724b94f7b..6c9b64810c33b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4051,6 +4051,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::ABS: { Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known = Known2.abs(); + Known.Zero.setHighBits( + ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1) - 1); break; } case ISD::USUBSAT: { diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll index e639f6645b2cd..76ee02e798707 100644 --- a/llvm/test/CodeGen/X86/combine-abs.ll +++ b/llvm/test/CodeGen/X86/combine-abs.ll @@ -201,15 +201,11 @@ define <8 x i32> @combine_v8i32_abs_pos(<8 x i32> %a) { ret <8 x i32> %2 } -; TODO: (abs x) upper bits are known zero if x has extra sign bits +; (abs x) upper bits are known zero if x has extra sign bits define i32 @combine_i32_abs_zerosign(i32 %a) { ; CHECK-LABEL: combine_i32_abs_zerosign: ; CHECK: # %bb.0: -; CHECK-NEXT: sarl $15, %edi -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: negl %eax -; CHECK-NEXT: cmovsl %edi, %eax -; CHECK-NEXT: andl $-524288, %eax # imm = 0xFFF80000 +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %1 = ashr i32 %a, 15 %2 = call i32 @llvm.abs.i32(i32 %1, i1 false) @@ -218,42 +214,15 @@ define i32 @combine_i32_abs_zerosign(i32 %a) { } define <8 x i16> @combine_v8i16_abs_zerosign(<8 x i16> %a) { -; SSE2-LABEL: combine_v8i16_abs_zerosign: -; SSE2: # %bb.0: -; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: psubw %xmm0, %xmm1 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: retq -; -; SSE42-LABEL: combine_v8i16_abs_zerosign: -; SSE42: # %bb.0: -; SSE42-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE42-NEXT: pabsw %xmm0, %xmm0 -; SSE42-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE42-NEXT: retq -; -; AVX2-LABEL: combine_v8i16_abs_zerosign: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpabsw %xmm0, %xmm0 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: combine_v8i16_abs_zerosign: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpabsw %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: retq +; SSE-LABEL: combine_v8i16_abs_zerosign: +; SSE: # %bb.0: +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: retq ; -; AVX512VL-LABEL: combine_v8i16_abs_zerosign: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpabsw %xmm0, %xmm0 -; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VL-NEXT: retq +; AVX-LABEL: combine_v8i16_abs_zerosign: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = ashr <8 x i16> %a, %2 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %1, i1 false) %3 = and <8 x i16> %2, @@ -268,7 +237,7 @@ define i32 @combine_i32_abs_zerosign_negative(i32 %a) { ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: negl %eax ; CHECK-NEXT: cmovsl %edi, %eax -; CHECK-NEXT: andl $-524288, %eax # imm = 0xFFF80000 +; CHECK-NEXT: andl $536346624, %eax # imm = 0x1FF80000 ; CHECK-NEXT: retq %1 = ashr i32 %a, 3 %2 = call i32 @llvm.abs.i32(i32 %1, i1 false)