From cb3682e226ae1ad72121e58f7eccb594c9cd5632 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Fri, 8 Sep 2023 14:33:57 +0800 Subject: [PATCH 1/2] [X86] Do not directly fold for VINSERTPS We have already customized folding for VINSERTPS by 7e6606f4f1, which do the folding when alignment >= 4 bytes. We cannot arbitrarily fold it like others because we need to calculate the source offset. --- llvm/test/CodeGen/X86/avx.ll | 6 ++++-- llvm/test/TableGen/x86-fold-tables.inc | 2 -- llvm/utils/TableGen/X86ManualFoldTables.def | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/X86/avx.ll b/llvm/test/CodeGen/X86/avx.ll index dc59186d568cc..6a10d855ea95c 100644 --- a/llvm/test/CodeGen/X86/avx.ll +++ b/llvm/test/CodeGen/X86/avx.ll @@ -184,12 +184,14 @@ define <4 x float> @nofold_insertps(ptr %a, <4 x float> %b) { ; X86-LABEL: nofold_insertps: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: vinsertps $176, (%eax), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[2] +; X86-NEXT: vmovups (%eax), %xmm1 +; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2] ; X86-NEXT: retl ; ; X64-LABEL: nofold_insertps: ; X64: ## %bb.0: -; X64-NEXT: vinsertps $176, (%rdi), %xmm0, %xmm0 ## xmm0 = xmm0[0,1,2],mem[2] +; X64-NEXT: vmovups (%rdi), %xmm1 +; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2] ; X64-NEXT: retq %1 = load <4 x float>, ptr %a, align 1 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 6168fdfd323c3..0e6b8b04f3ca7 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -2105,8 +2105,6 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = { {X86::VINSERTI64x2Z256rr, X86::VINSERTI64x2Z256rm, 0}, {X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0}, {X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0}, - {X86::VINSERTPSZrr, X86::VINSERTPSZrm, TB_NO_REVERSE}, - {X86::VINSERTPSrr, X86::VINSERTPSrm, TB_NO_REVERSE}, {X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0}, {X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0}, {X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0}, diff --git a/llvm/utils/TableGen/X86ManualFoldTables.def b/llvm/utils/TableGen/X86ManualFoldTables.def index d949830b09883..68f1c1177c821 100644 --- a/llvm/utils/TableGen/X86ManualFoldTables.def +++ b/llvm/utils/TableGen/X86ManualFoldTables.def @@ -43,6 +43,8 @@ NOFOLD(VEXTRACTI32x8Zrrk) NOFOLD(VEXTRACTI64x2Z256rrk) NOFOLD(VEXTRACTI64x2Zrrk) NOFOLD(VEXTRACTI64x4Zrrk) +NOFOLD(VINSERTPSZrr) +NOFOLD(VINSERTPSrr) NOFOLD(VMOVAPDZ128mrk) NOFOLD(VMOVAPDZ256mrk) NOFOLD(VMOVAPDZmrk) From 4d5bf917ee0b888fb6355c677110a7a3e23d00ac Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Fri, 8 Sep 2023 15:08:09 +0800 Subject: [PATCH 2/2] Move to INSERTPSrr together --- llvm/utils/TableGen/X86ManualFoldTables.def | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/utils/TableGen/X86ManualFoldTables.def b/llvm/utils/TableGen/X86ManualFoldTables.def index 68f1c1177c821..8e6cb4a7bd879 100644 --- a/llvm/utils/TableGen/X86ManualFoldTables.def +++ b/llvm/utils/TableGen/X86ManualFoldTables.def @@ -43,8 +43,6 @@ NOFOLD(VEXTRACTI32x8Zrrk) NOFOLD(VEXTRACTI64x2Z256rrk) NOFOLD(VEXTRACTI64x2Zrrk) NOFOLD(VEXTRACTI64x4Zrrk) -NOFOLD(VINSERTPSZrr) -NOFOLD(VINSERTPSrr) NOFOLD(VMOVAPDZ128mrk) NOFOLD(VMOVAPDZ256mrk) NOFOLD(VMOVAPDZmrk) @@ -227,6 +225,8 @@ NOFOLD(MMX_MOVQ64rr_REV) // => // insertpsrm xmm1, m32, imm NOFOLD(INSERTPSrr) +NOFOLD(VINSERTPSZrr) +NOFOLD(VINSERTPSrr) #undef NOFOLD #ifndef ENTRY