From e69db9de4744cba33f9a58427c0c83b4ee3a4fe8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 5 Jun 2024 10:16:19 +0100 Subject: [PATCH 1/2] [AArch64] neon-abd.ll - add ABDS test coverage for #94442 --- llvm/test/CodeGen/AArch64/neon-abd.ll | 42 +++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll index 901cb8adc23f0..4862c8dde7d10 100644 --- a/llvm/test/CodeGen/AArch64/neon-abd.ll +++ b/llvm/test/CodeGen/AArch64/neon-abd.ll @@ -554,6 +554,48 @@ define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) { ret <16 x i8> %sub } +; TODO: (abds x, y) upper bits are known zero if x and y have extra sign bits +define <4 x i16> @combine_sabd_4h_zerosign(<4 x i16> %a, <4 x i16> %b) #0 { +; CHECK-LABEL: combine_sabd_4h_zerosign: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI41_0 +; CHECK-NEXT: adrp x9, .LCPI41_1 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI41_0] +; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI41_1] +; CHECK-NEXT: sshl v0.4h, v0.4h, v2.4h +; CHECK-NEXT: sshl v1.4h, v1.4h, v3.4h +; CHECK-NEXT: movi v2.4h, #128, lsl #8 +; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-NEXT: ret + %a.ext = ashr <4 x i16> %a, + %b.ext = ashr <4 x i16> %b, + %max = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a.ext, <4 x i16> %b.ext) + %min = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a.ext, <4 x i16> %b.ext) + %sub = sub <4 x i16> %max, %min + %mask = and <4 x i16> %sub, + ret <4 x i16> %mask +} + +; negative test - mask extends beyond known zero bits +define <2 x i32> @combine_sabd_2s_zerosign_negative(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: combine_sabd_2s_zerosign_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.2s, v0.2s, #3 +; CHECK-NEXT: sshr v1.2s, v1.2s, #15 +; CHECK-NEXT: mvni v2.2s, #7, msl #16 +; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-NEXT: ret + %a.ext = ashr <2 x i32> %a, + %b.ext = ashr <2 x i32> %b, + %max = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a.ext, <2 x i32> %b.ext) + %min = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a.ext, <2 x i32> %b.ext) + %sub = sub <2 x i32> %max, %min + %mask = and <2 x i32> %sub, ; 0xFFF80000 + ret <2 x i32> %mask +} + declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1) declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) From f6add386ad56dac324dcb79a43e8336e47278a9f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 5 Jun 2024 10:19:42 +0100 Subject: [PATCH 2/2] [DAG] computeKnownBits - abds(x, y) will be zero in the upper bits if x and y are sign-extended As reported on #94442 - if x and y have more than one signbit, then the upper bits of its absolute value are guaranteed to be zero Alive2: https://alive2.llvm.org/ce/z/7_z2Vc --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 7 +++++++ llvm/test/CodeGen/AArch64/neon-abd.ll | 12 ++---------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 414c724b94f7b..7d87b4fb6b17a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3477,6 +3477,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known = KnownBits::abds(Known, Known2); + unsigned SignBits1 = + ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (SignBits1 == 1) + break; + unsigned SignBits0 = + ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known.Zero.setHighBits(std::min(SignBits0, SignBits1) - 1); break; } case ISD::UMUL_LOHI: { diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll index 4862c8dde7d10..f743bae84053d 100644 --- a/llvm/test/CodeGen/AArch64/neon-abd.ll +++ b/llvm/test/CodeGen/AArch64/neon-abd.ll @@ -554,19 +554,11 @@ define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) { ret <16 x i8> %sub } -; TODO: (abds x, y) upper bits are known zero if x and y have extra sign bits +; (abds x, y) upper bits are known zero if x and y have extra sign bits define <4 x i16> @combine_sabd_4h_zerosign(<4 x i16> %a, <4 x i16> %b) #0 { ; CHECK-LABEL: combine_sabd_4h_zerosign: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI41_0 -; CHECK-NEXT: adrp x9, .LCPI41_1 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI41_0] -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI41_1] -; CHECK-NEXT: sshl v0.4h, v0.4h, v2.4h -; CHECK-NEXT: sshl v1.4h, v1.4h, v3.4h -; CHECK-NEXT: movi v2.4h, #128, lsl #8 -; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h -; CHECK-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: ret %a.ext = ashr <4 x i16> %a, %b.ext = ashr <4 x i16> %b,