From 8a2f1fa8ea8398c42ee8c9f19b8323eb6caaca7a Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Fri, 27 Oct 2023 14:26:47 +0800 Subject: [PATCH 1/2] [X86][EVEX512] Do not allow 512-bit memcpy without EVEX512 Solves crash mentioned in #65920. --- llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 4 +-- llvm/test/CodeGen/X86/evex512-mem.ll | 29 +++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/X86/evex512-mem.ll diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index c47ddae072b4f..2fe145f9267de 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -281,7 +281,7 @@ EVT X86TargetLowering::getOptimalMemOpType( if (Op.size() >= 16 && (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) { // FIXME: Check if unaligned 64-byte accesses are slow. - if (Op.size() >= 64 && Subtarget.hasAVX512() && + if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() && (Subtarget.getPreferVectorWidth() >= 512)) { return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; } @@ -395,7 +395,7 @@ bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context, return true; return false; case 512: - if (Subtarget.hasAVX512()) + if (Subtarget.hasAVX512() && Subtarget.hasEVEX512()) return true; return false; default: diff --git a/llvm/test/CodeGen/X86/evex512-mem.ll b/llvm/test/CodeGen/X86/evex512-mem.ll new file mode 100644 index 0000000000000..a900d0837c1ba --- /dev/null +++ b/llvm/test/CodeGen/X86/evex512-mem.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512 +; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl,-evex512 < %s | FileCheck %s --check-prefix=AVX256 + +define void @test1() { +; AVX512-LABEL: test1: +; AVX512: ## %bb.0: +; AVX512-NEXT: movq 64, %rax +; AVX512-NEXT: movq %rax, (%rax) +; AVX512-NEXT: vmovups 0, %zmm0 +; AVX512-NEXT: vmovups %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX256-LABEL: test1: +; AVX256: ## %bb.0: +; AVX256-NEXT: movq 64, %rax +; AVX256-NEXT: movq %rax, (%rax) +; AVX256-NEXT: vmovups 0, %ymm0 +; AVX256-NEXT: vmovups 32, %ymm1 +; AVX256-NEXT: vmovups %ymm1, (%rax) +; AVX256-NEXT: vmovups %ymm0, (%rax) +; AVX256-NEXT: vzeroupper +; AVX256-NEXT: retq + call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 8 null, i64 72, i1 false) + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) From 902b35652b731f82ee47cb48ceb93e20571a3428 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Fri, 27 Oct 2023 14:51:56 +0800 Subject: [PATCH 2/2] Address review comments --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 5 +++-- llvm/test/CodeGen/X86/evex512-mem.ll | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 884f22b006bcb..8a04987e768a1 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -180,7 +180,7 @@ X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { case TargetTransformInfo::RGK_Scalar: return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); case TargetTransformInfo::RGK_FixedWidthVector: - if (ST->hasAVX512() && PreferVectorWidth >= 512) + if (ST->hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512) return TypeSize::getFixed(512); if (ST->hasAVX() && PreferVectorWidth >= 256) return TypeSize::getFixed(256); @@ -6131,7 +6131,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { // Only enable vector loads for equality comparison. Right now the vector // version is not as fast for three way compare (see #33329). const unsigned PreferredWidth = ST->getPreferVectorWidth(); - if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64); + if (PreferredWidth >= 512 && ST->hasAVX512() && ST->hasEVEX512()) + Options.LoadSizes.push_back(64); if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32); if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16); } diff --git a/llvm/test/CodeGen/X86/evex512-mem.ll b/llvm/test/CodeGen/X86/evex512-mem.ll index a900d0837c1ba..85bb3b3a5487f 100644 --- a/llvm/test/CodeGen/X86/evex512-mem.ll +++ b/llvm/test/CodeGen/X86/evex512-mem.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512 -; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl,-evex512 < %s | FileCheck %s --check-prefix=AVX256 +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512 +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl,-evex512 < %s | FileCheck %s --check-prefix=AVX256 define void @test1() { ; AVX512-LABEL: test1: -; AVX512: ## %bb.0: +; AVX512: # %bb.0: ; AVX512-NEXT: movq 64, %rax ; AVX512-NEXT: movq %rax, (%rax) ; AVX512-NEXT: vmovups 0, %zmm0 @@ -13,7 +13,7 @@ define void @test1() { ; AVX512-NEXT: retq ; ; AVX256-LABEL: test1: -; AVX256: ## %bb.0: +; AVX256: # %bb.0: ; AVX256-NEXT: movq 64, %rax ; AVX256-NEXT: movq %rax, (%rax) ; AVX256-NEXT: vmovups 0, %ymm0