From 9705cee0fa583597a88a98fcdbed17ecb5b5abce Mon Sep 17 00:00:00 2001 From: Dinar Temirbulatov Date: Tue, 23 Apr 2024 10:09:43 +0000 Subject: [PATCH 1/3] [AArch64][SVE2] SVE2 NBSL instruction lowering. Allow to fold BSL/CNOT instuctions to NBSL instruction for scalable vectors. --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 17 +++++++++++++++++ llvm/test/CodeGen/AArch64/sve2-bsl.ll | 15 +++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 525ae79da9962..00a1e5f3efadd 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3767,6 +3767,23 @@ let Predicates = [HasSVE2orSME] in { // SVE2 extract vector (immediate offset, constructive) def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; + + // zext(cmpeq(bsl(x, y, z), splat(0))) -> nbsl(x, y, z) + def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive)), + (nxv16i8 (AArch64bsp nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)), (SVEDup0), SETEQ)))), + (NBSL_ZZZZ nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)>; + + def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive)), + (nxv8i16 (AArch64bsp nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)), (SVEDup0), SETEQ)))), + (NBSL_ZZZZ nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)>; + + def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive)), + (nxv4i32 (AArch64bsp nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)), (SVEDup0), SETEQ)))), + (NBSL_ZZZZ nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)>; + + def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive)), + (nxv2i64 (AArch64bsp nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)), (SVEDup0), SETEQ)))), + (NBSL_ZZZZ nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)>; } // End HasSVE2orSME let Predicates = [HasSVE2] in { diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll index 23b2622f5f586..a7edd944e399f 100644 --- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll +++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll @@ -41,3 +41,18 @@ define @no_bsl_fold( %a, %c = or %1, %2 ret %c } + +define @nbsl( %a, %b) { +; CHECK-LABEL: nbsl: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.s, #0x7fffffff +; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %1 = and %a, shufflevector ( insertelement ( poison, i32 2147483647, i64 0), poison, zeroinitializer) + %2 = and %b, shufflevector ( insertelement ( poison, i32 -2147483648, i64 0), poison, zeroinitializer) + %3 = or %1, %2 + %4 = icmp eq %3, zeroinitializer + %5 = zext %4 to + ret %5 +} From 1889d8dca6d73ce5409a5e998db23cde6d33d69b Mon Sep 17 00:00:00 2001 From: Dinar Temirbulatov Date: Thu, 25 Apr 2024 00:53:04 +0000 Subject: [PATCH 2/3] Resolved remarks. --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 + .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 19 +------ llvm/test/CodeGen/AArch64/sve2-bsl.ll | 55 ++++++++++++++++--- 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index a7abb58064a53..9c585e15cef27 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -740,6 +740,8 @@ def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; +def AArch64nbsl: PatFrag<(ops node:$Op1, node:$Op2, node:$Op3), + (vnot (AArch64bsp node:$Op1, node:$Op2, node:$Op3))>; def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 00a1e5f3efadd..06b943cf95146 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3760,30 +3760,13 @@ let Predicates = [HasSVE2orSME] in { defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", int_aarch64_sve_bsl, AArch64bsp>; defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>; defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>; - defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl>; + defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl, AArch64nbsl>; // SVE2 bitwise xor and rotate right by immediate defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>; // SVE2 extract vector (immediate offset, constructive) def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; - - // zext(cmpeq(bsl(x, y, z), splat(0))) -> nbsl(x, y, z) - def : Pat<(nxv16i8 (zext (nxv16i1 (AArch64setcc_z (nxv16i1 (SVEAllActive)), - (nxv16i8 (AArch64bsp nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)), (SVEDup0), SETEQ)))), - (NBSL_ZZZZ nxv16i8:$Op1, nxv16i8:$Op2, nxv16i8:$Op3)>; - - def : Pat<(nxv8i16 (zext (nxv8i1 (AArch64setcc_z (nxv8i1 (SVEAllActive)), - (nxv8i16 (AArch64bsp nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)), (SVEDup0), SETEQ)))), - (NBSL_ZZZZ nxv8i16:$Op1, nxv8i16:$Op2, nxv8i16:$Op3)>; - - def : Pat<(nxv4i32 (zext (nxv4i1 (AArch64setcc_z (nxv4i1 (SVEAllActive)), - (nxv4i32 (AArch64bsp nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)), (SVEDup0), SETEQ)))), - (NBSL_ZZZZ nxv4i32:$Op1, nxv4i32:$Op2, nxv4i32:$Op3)>; - - def : Pat<(nxv2i64 (zext (nxv2i1 (AArch64setcc_z (nxv2i1 (SVEAllActive)), - (nxv2i64 (AArch64bsp nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)), (SVEDup0), SETEQ)))), - (NBSL_ZZZZ nxv2i64:$Op1, nxv2i64:$Op2, nxv2i64:$Op3)>; } // End HasSVE2orSME let Predicates = [HasSVE2] in { diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll index a7edd944e399f..ef7d4abe5c5f4 100644 --- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll +++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll @@ -42,17 +42,54 @@ define @no_bsl_fold( %a, ret %c } -define @nbsl( %a, %b) { -; CHECK-LABEL: nbsl: +define @nbsl_i8( %a, %b) { +; CHECK-LABEL: nbsl_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.b, #127 // =0x7f +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %1 = and %a, splat(i8 127) + %2 = and %b, splat(i8 -128) + %3 = or %1, %2 + %4 = xor %3, splat(i8 -1) + ret %4 +} + +define @nbsl_i16( %a, %b) { +; CHECK-LABEL: nbsl_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.h, #32767 // =0x7fff +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %1 = and %a, splat(i16 32767) + %2 = and %b, splat(i16 -32768) + %3 = or %1, %2 + %4 = xor %3, splat(i16 -1) + ret %4 +} + +define @nbsl_i32( %a, %b) { +; CHECK-LABEL: nbsl_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.s, #0x7fffffff -; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z1.d -; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret - %1 = and %a, shufflevector ( insertelement ( poison, i32 2147483647, i64 0), poison, zeroinitializer) - %2 = and %b, shufflevector ( insertelement ( poison, i32 -2147483648, i64 0), poison, zeroinitializer) + %1 = and %a, splat(i32 2147483647) + %2 = and %b, splat(i32 -2147483648) %3 = or %1, %2 - %4 = icmp eq %3, zeroinitializer - %5 = zext %4 to - ret %5 + %4 = xor %3, splat(i32 -1) + ret %4 +} + +define @nbsl_i64( %a, %b) { +; CHECK-LABEL: nbsl_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %1 = and %a, splat(i64 9223372036854775807) + %2 = and %b, splat(i64 -9223372036854775808) + %3 = or %1, %2 + %4 = xor %3, splat(i64 -1) + ret %4 } From 45ad4b195a7fda04b7a7b081ac01421b5939d808 Mon Sep 17 00:00:00 2001 From: Dinar Temirbulatov Date: Fri, 26 Apr 2024 09:03:16 +0000 Subject: [PATCH 3/3] Formatting. --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 9c585e15cef27..59cdd7f071c09 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -741,7 +741,7 @@ def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; def AArch64nbsl: PatFrag<(ops node:$Op1, node:$Op2, node:$Op3), - (vnot (AArch64bsp node:$Op1, node:$Op2, node:$Op3))>; + (vnot (AArch64bsp node:$Op1, node:$Op2, node:$Op3))>; def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;