diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index c47ddae072b4f..2fe145f9267de 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -281,7 +281,7 @@ EVT X86TargetLowering::getOptimalMemOpType( if (Op.size() >= 16 && (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) { // FIXME: Check if unaligned 64-byte accesses are slow. - if (Op.size() >= 64 && Subtarget.hasAVX512() && + if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() && (Subtarget.getPreferVectorWidth() >= 512)) { return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; } @@ -395,7 +395,7 @@ bool X86TargetLowering::allowsMemoryAccess(LLVMContext &Context, return true; return false; case 512: - if (Subtarget.hasAVX512()) + if (Subtarget.hasAVX512() && Subtarget.hasEVEX512()) return true; return false; default: diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 884f22b006bcb..8a04987e768a1 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -180,7 +180,7 @@ X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { case TargetTransformInfo::RGK_Scalar: return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); case TargetTransformInfo::RGK_FixedWidthVector: - if (ST->hasAVX512() && PreferVectorWidth >= 512) + if (ST->hasAVX512() && ST->hasEVEX512() && PreferVectorWidth >= 512) return TypeSize::getFixed(512); if (ST->hasAVX() && PreferVectorWidth >= 256) return TypeSize::getFixed(256); @@ -6131,7 +6131,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { // Only enable vector loads for equality comparison. Right now the vector // version is not as fast for three way compare (see #33329). const unsigned PreferredWidth = ST->getPreferVectorWidth(); - if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64); + if (PreferredWidth >= 512 && ST->hasAVX512() && ST->hasEVEX512()) + Options.LoadSizes.push_back(64); if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32); if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16); } diff --git a/llvm/test/CodeGen/X86/evex512-mem.ll b/llvm/test/CodeGen/X86/evex512-mem.ll new file mode 100644 index 0000000000000..85bb3b3a5487f --- /dev/null +++ b/llvm/test/CodeGen/X86/evex512-mem.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512 +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512bw,avx512vl,-evex512 < %s | FileCheck %s --check-prefix=AVX256 + +define void @test1() { +; AVX512-LABEL: test1: +; AVX512: # %bb.0: +; AVX512-NEXT: movq 64, %rax +; AVX512-NEXT: movq %rax, (%rax) +; AVX512-NEXT: vmovups 0, %zmm0 +; AVX512-NEXT: vmovups %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX256-LABEL: test1: +; AVX256: # %bb.0: +; AVX256-NEXT: movq 64, %rax +; AVX256-NEXT: movq %rax, (%rax) +; AVX256-NEXT: vmovups 0, %ymm0 +; AVX256-NEXT: vmovups 32, %ymm1 +; AVX256-NEXT: vmovups %ymm1, (%rax) +; AVX256-NEXT: vmovups %ymm0, (%rax) +; AVX256-NEXT: vzeroupper +; AVX256-NEXT: retq + call void @llvm.memcpy.p0.p0.i64(ptr align 8 poison, ptr align 8 null, i64 72, i1 false) + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)