diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4fcbe08e4b2b9..36fd8c136a3c5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5236,6 +5236,23 @@ SDValue DAGCombiner::visitAVG(SDNode *N) { return DAG.getNode(ISD::SRL, DL, VT, X, DAG.getShiftAmountConstant(1, VT, DL)); + // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y)) + SDValue A; + SDValue B; + if (sd_match( + N, m_BinOp(ISD::AVGFLOORU, m_ZExt(m_Value(A)), m_ZExt(m_Value(B)))) && + A.getValueType() == B.getValueType() && + hasOperation(ISD::AVGFLOORU, A.getValueType())) { + SDValue AvgFloorU = DAG.getNode(ISD::AVGFLOORU, DL, A.getValueType(), A, B); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgFloorU); + } + if (sd_match( + N, m_BinOp(ISD::AVGCEILU, m_ZExt(m_Value(A)), m_ZExt(m_Value(B)))) && + A.getValueType() == B.getValueType() && + hasOperation(ISD::AVGCEILU, A.getValueType())) { + SDValue AvgCeilU = DAG.getNode(ISD::AVGCEILU, DL, A.getValueType(), A, B); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgCeilU); + } return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll index f36b8440fe4bf..b2cf089d8145f 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll @@ -9,9 +9,8 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-LABEL: haddu_zext: ; CHECK: // %bb.0: +; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> @@ -23,9 +22,8 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) { define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-LABEL: rhaddu_zext: ; CHECK: // %bb.0: +; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> diff --git a/llvm/test/CodeGen/AArch64/avg.ll b/llvm/test/CodeGen/AArch64/avg.ll new file mode 100644 index 0000000000000..70cc360f4ae57 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/avg.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +define <16 x i16> @zext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: zext_avgflooru: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: uhadd v1.8b, v2.8b, v3.8b +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ret + %x0 = zext <16 x i8> %a0 to <16 x i16> + %x1 = zext <16 x i8> %a1 to <16 x i16> + %and = and <16 x i16> %x0, %x1 + %xor = xor <16 x i16> %x0, %x1 + %shift = lshr <16 x i16> %xor, + %avg = add <16 x i16> %and, %shift + ret <16 x i16> %avg +} + +define <16 x i16> @zext_avgflooru_negative(<16 x i8> %a0, <16 x i4> %a1) { +; CHECK-LABEL: zext_avgflooru_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.16b, #15 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: uhadd v1.8b, v3.8b, v2.8b +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ret + %x0 = zext <16 x i8> %a0 to <16 x i16> + %x1 = zext <16 x i4> %a1 to <16 x i16> + %and = and <16 x i16> %x0, %x1 + %xor = xor <16 x i16> %x0, %x1 + %shift = lshr <16 x i16> %xor, + %avg = add <16 x i16> %and, %shift + ret <16 x i16> %avg +} + +define <16 x i16> @zext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: zext_avgceilu: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: urhadd v1.8b, v2.8b, v3.8b +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ret + %x0 = zext <16 x i8> %a0 to <16 x i16> + %x1 = zext <16 x i8> %a1 to <16 x i16> + %or = or <16 x i16> %x0, %x1 + %xor = xor <16 x i16> %x0, %x1 + %shift = lshr <16 x i16> %xor, + %avg = sub <16 x i16> %or, %shift + ret <16 x i16> %avg +} + +define <16 x i16> @zext_avgceilu_negative(<16 x i4> %a0, <16 x i8> %a1) { +; CHECK-LABEL: zext_avgceilu_negative: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.16b, #15 +; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: urhadd v1.8b, v2.8b, v3.8b +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ret + %x0 = zext <16 x i4> %a0 to <16 x i16> + %x1 = zext <16 x i8> %a1 to <16 x i16> + %or = or <16 x i16> %x0, %x1 + %xor = xor <16 x i16> %x0, %x1 + %shift = lshr <16 x i16> %xor, + %avg = sub <16 x i16> %or, %shift + ret <16 x i16> %avg +}