From 01a162540133cce3bbf99d1298197f87fe810d0a Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 22 Apr 2024 09:05:34 +0100 Subject: [PATCH] [AArch64] Match ZIP and UZP starting from undef elements. NFC In case the first element of a zip/uzp mask is undef, the isZIPMask and isUZPMask functions have a 50% chance of picking the wrong "WhichResult", meaning they don't match a zip/uzp where they could. This patch alters the matching code to first check for the first non-undef element, to try and get WhichResult correct. --- .../Target/AArch64/AArch64PerfectShuffle.h | 43 ++++++++++++++++--- llvm/test/CodeGen/AArch64/arm64-uzp.ll | 30 ++++--------- llvm/test/CodeGen/AArch64/arm64-zip.ll | 18 ++------ 3 files changed, 49 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h index 7abaead694d11..a143243a8d3bb 100644 --- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h +++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h @@ -6620,11 +6620,28 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef M) { return (PFEntry >> 30) + 1; } -inline bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { +/// Return true for zip1 or zip2 masks of the form: +/// <0, 8, 1, 9, 2, 10, 3, 11> or +/// <4, 12, 5, 13, 6, 14, 7, 15> +inline bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResultOut) { unsigned NumElts = VT.getVectorNumElements(); if (NumElts % 2 != 0) return false; - WhichResult = (M[0] == 0 ? 0 : 1); + // Check the first non-undef element for which half to use. + unsigned WhichResult = 2; + for (unsigned i = 0; i != NumElts / 2; i++) { + if (M[i * 2] >= 0) { + WhichResult = ((unsigned)M[i * 2] == i ? 0 : 1); + break; + } else if (M[i * 2 + 1] >= 0) { + WhichResult = ((unsigned)M[i * 2 + 1] == NumElts + i ? 0 : 1); + break; + } + } + if (WhichResult == 2) + return false; + + // Check all elements match. unsigned Idx = WhichResult * NumElts / 2; for (unsigned i = 0; i != NumElts; i += 2) { if ((M[i] >= 0 && (unsigned)M[i] != Idx) || @@ -6632,20 +6649,34 @@ inline bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { return false; Idx += 1; } - + WhichResultOut = WhichResult; return true; } -inline bool isUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { +/// Return true for uzp1 or uzp2 masks of the form: +/// <0, 2, 4, 6, 8, 10, 12, 14> or +/// <1, 3, 5, 7, 9, 11, 13, 15> +inline bool isUZPMask(ArrayRef M, EVT VT, unsigned &WhichResultOut) { unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); + // Check the first non-undef element for which half to use. + unsigned WhichResult = 2; + for (unsigned i = 0; i != NumElts; i++) { + if (M[i] >= 0) { + WhichResult = ((unsigned)M[i] == i * 2 ? 0 : 1); + break; + } + } + if (WhichResult == 2) + return false; + + // Check all elements match. for (unsigned i = 0; i != NumElts; ++i) { if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned)M[i] != 2 * i + WhichResult) return false; } - + WhichResultOut = WhichResult; return true; } diff --git a/llvm/test/CodeGen/AArch64/arm64-uzp.ll b/llvm/test/CodeGen/AArch64/arm64-uzp.ll index 6e01ebc95a1cb..49a51d96fbc84 100644 --- a/llvm/test/CodeGen/AArch64/arm64-uzp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-uzp.ll @@ -110,13 +110,9 @@ define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vuzpQi16_undef0: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: uzp2.8h v3, v0, v1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-NEXT: add.8h v0, v0, v3 +; CHECK-NEXT: uzp1.8h v2, v0, v1 +; CHECK-NEXT: uzp2.8h v0, v0, v1 +; CHECK-NEXT: add.8h v0, v2, v0 ; CHECK-NEXT: ret %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> @@ -127,13 +123,9 @@ define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vuzpQi16_undef01: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: uzp2.8h v3, v0, v1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-NEXT: add.8h v0, v0, v3 +; CHECK-NEXT: uzp1.8h v2, v0, v1 +; CHECK-NEXT: uzp2.8h v0, v0, v1 +; CHECK-NEXT: add.8h v0, v2, v0 ; CHECK-NEXT: ret %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> @@ -144,13 +136,9 @@ define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vuzpQi16_undef012: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI10_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: uzp2.8h v3, v0, v1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-NEXT: add.8h v0, v0, v3 +; CHECK-NEXT: uzp1.8h v2, v0, v1 +; CHECK-NEXT: uzp2.8h v0, v0, v1 +; CHECK-NEXT: add.8h v0, v2, v0 ; CHECK-NEXT: ret %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll index 349751dda461f..4c771cbd2966c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zip.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll @@ -142,11 +142,7 @@ define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind { define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vzip1_undef_01: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: zip1.8h v0, v0, v1 ; CHECK-NEXT: ret %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> ret <8 x i16> %s @@ -155,11 +151,7 @@ define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vzip1_undef_0: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: zip1.8h v0, v0, v1 ; CHECK-NEXT: ret %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> ret <8 x i16> %s @@ -177,11 +169,7 @@ define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind { define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind { ; CHECK-LABEL: vzip1_undef_012: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI11_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: zip1.8h v0, v0, v1 ; CHECK-NEXT: ret %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> ret <8 x i16> %s