From c0c6ab1f2a91fa96a119eac8e1bb3bf0deb8dfdc Mon Sep 17 00:00:00 2001 From: "Wang, Xin10" Date: Tue, 2 Jan 2024 23:10:13 -0800 Subject: [PATCH 1/7] Support Lowering for APX Promoted SHA/MOVDIR/CRC32/INVPCID instructions --- llvm/lib/Target/X86/X86FastISel.cpp | 8 +- llvm/lib/Target/X86/X86InstrSystem.td | 13 +- llvm/lib/Target/X86/X86InstrVMX.td | 8 +- llvm/test/CodeGen/X86/apx/cet.ll | 50 +++++ llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll | 61 ++++++ llvm/test/CodeGen/X86/apx/crc32.ll | 58 ++++++ llvm/test/CodeGen/X86/apx/invpcid.ll | 27 +++ llvm/test/CodeGen/X86/apx/movdir.ll | 38 ++++ llvm/test/CodeGen/X86/apx/sha.ll | 186 +++++++++++++++++++ 9 files changed, 438 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/X86/apx/cet.ll create mode 100644 llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll create mode 100644 llvm/test/CodeGen/X86/apx/crc32.ll create mode 100644 llvm/test/CodeGen/X86/apx/invpcid.ll create mode 100644 llvm/test/CodeGen/X86/apx/movdir.ll create mode 100644 llvm/test/CodeGen/X86/apx/sha.ll diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 0ba31e173a1a7..3658af785c24e 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3047,19 +3047,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { default: llvm_unreachable("Unexpected intrinsic."); case Intrinsic::x86_sse42_crc32_32_8: - Opc = X86::CRC32r32r8; + Opc = Subtarget->hasCRC32() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8; RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_16: - Opc = X86::CRC32r32r16; + Opc = Subtarget->hasCRC32() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16; RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_32: - Opc = X86::CRC32r32r32; + Opc = Subtarget->hasCRC32() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32; RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_64_64: - Opc = X86::CRC32r64r64; + Opc = Subtarget->hasCRC32() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64; RC = &X86::GR64RegClass; break; } diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 699e5847e63fb..30530a00809f3 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), Requires<[Not64BitMode, HasINVPCID]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD, - Requires<[In64BitMode, HasINVPCID]>; + Requires<[In64BitMode, HasINVPCID, NoEGPR]>; def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, - EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID, HasEGPR]>; } // SchedRW -let Predicates = [In64BitMode, HasINVPCID] in { +let Predicates = [In64BitMode, HasINVPCID, NoEGPR] in { // The instruction can only use a 64 bit register as the register argument // in 64 bit mode, while the intrinsic only accepts a 32 bit argument // corresponding to it. @@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in { addr:$src2)>; } +let Predicates = [In64BitMode, HasINVPCID, HasEGPR] in { + def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2), + (INVPCID64_EVEX + (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit), + addr:$src2)>; +} + //===----------------------------------------------------------------------===// // SMAP Instruction diff --git a/llvm/lib/Target/X86/X86InstrVMX.td b/llvm/lib/Target/X86/X86InstrVMX.td index 7cc468fe15ad4..e672246789721 100644 --- a/llvm/lib/Target/X86/X86InstrVMX.td +++ b/llvm/lib/Target/X86/X86InstrVMX.td @@ -21,10 +21,10 @@ def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), Requires<[Not64BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD, - Requires<[In64BitMode]>; + Requires<[In64BitMode, NoEGPR]>; def INVEPT64_EVEX : I<0xF0, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invept\t{$src2, $src1|$src1, $src2}", []>, - EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>; // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), @@ -32,10 +32,10 @@ def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), Requires<[Not64BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD, - Requires<[In64BitMode]>; + Requires<[In64BitMode, NoEGPR]>; def INVVPID64_EVEX : I<0xF1, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invvpid\t{$src2, $src1|$src1, $src2}", []>, - EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; diff --git a/llvm/test/CodeGen/X86/apx/cet.ll b/llvm/test/CodeGen/X86/apx/cet.ll new file mode 100644 index 0000000000000..98f3844d1ccd1 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/cet.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk,+egpr --show-mc-encoding | FileCheck %s + +define void @test_wrssd(i32 %a, ptr %__p) { +; CHECK-LABEL: test_wrssd: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: wrssd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7c,0x08,0x66,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +entry: + tail call void @llvm.x86.wrssd(i32 %a, ptr %__p) + ret void +} + +declare void @llvm.x86.wrssd(i32, ptr) + +define void @test_wrssq(i64 %a, ptr %__p) { +; CHECK-LABEL: test_wrssq: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: wrssq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfc,0x08,0x66,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +entry: + tail call void @llvm.x86.wrssq(i64 %a, ptr %__p) + ret void +} + +declare void @llvm.x86.wrssq(i64, ptr) + +define void @test_wrussd(i32 %a, ptr %__p) { +; CHECK-LABEL: test_wrussd: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: wrussd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7d,0x08,0x65,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +entry: + tail call void @llvm.x86.wrussd(i32 %a, ptr %__p) + ret void +} + +declare void @llvm.x86.wrussd(i32, ptr) + +define void @test_wrussq(i64 %a, ptr %__p) { +; CHECK-LABEL: test_wrussq: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: wrussq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfd,0x08,0x65,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +entry: + tail call void @llvm.x86.wrussq(i64 %a, ptr %__p) + ret void +} + +declare void @llvm.x86.wrussq(i64, ptr) diff --git a/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll new file mode 100644 index 0000000000000..0b51679ccd7fb --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32,+egpr --show-mc-encoding | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s + +define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind { +; CHECK-LABEL: test_mm_crc32_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; CHECK-NEXT: crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; CHECK-NEXT: retq # encoding: [0xc3] + %trunc = trunc i32 %a1 to i8 + %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc) + ret i32 %res +} +declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone + +define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind { +; CHECK-LABEL: test_mm_crc32_u16: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; CHECK-NEXT: crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] +; CHECK-NEXT: retq # encoding: [0xc3] + %trunc = trunc i32 %a1 to i16 + %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc) + ret i32 %res +} +declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone + +define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind { +; CHECK-LABEL: test_mm_crc32_u32: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; CHECK-NEXT: crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) + ret i32 %res +} +declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone + +define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{ +; CHECK-LABEL: test_mm_crc64_u8: +; CHECK: # %bb.0: +; CHECK-NEXT: crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe] +; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; CHECK-NEXT: retq # encoding: [0xc3] + %trunc = trunc i32 %a1 to i8 + %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc) + ret i64 %res +} +declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone + +define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{ +; CHECK-LABEL: test_mm_crc64_u64: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6] +; CHECK-NEXT: retq # encoding: [0xc3] + %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) + ret i64 %res +} +declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone diff --git a/llvm/test/CodeGen/X86/apx/crc32.ll b/llvm/test/CodeGen/X86/apx/crc32.ll new file mode 100644 index 0000000000000..4bcc4d15cc6b5 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/crc32.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s + +define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { +; CHECK-LABEL: crc32_32_8: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; CHECK-NEXT: retq ## encoding: [0xc3] + %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) + ret i32 %tmp +} + +define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { +; CHECK-LABEL: crc32_32_16: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; CHECK-NEXT: crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] +; CHECK-NEXT: retq ## encoding: [0xc3] + %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) + ret i32 %tmp +} + +define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: crc32_32_32: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; CHECK-NEXT: crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6] +; CHECK-NEXT: retq ## encoding: [0xc3] + %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) + ret i32 %tmp +} + +define i64 @crc32_64_8(i64 %a, i8 %b) nounwind { +; CHECK-LABEL: crc32_64_8: +; CHECK: ## %bb.0: +; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; CHECK-NEXT: retq ## encoding: [0xc3] + %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b) + ret i64 %tmp +} + +define i64 @crc32_64_64(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: crc32_64_64: +; CHECK: ## %bb.0: +; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6] +; CHECK-NEXT: retq ## encoding: [0xc3] + %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) + ret i64 %tmp +} + +declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind +declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind +declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind +declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind +declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind diff --git a/llvm/test/CodeGen/X86/apx/invpcid.ll b/llvm/test/CodeGen/X86/apx/invpcid.ll new file mode 100644 index 0000000000000..389895f492130 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/invpcid.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid,+egpr --show-mc-encoding | FileCheck %s + +define void @test_invpcid(i32 %type, ptr %descriptor) { +; CHECK-LABEL: test_invpcid: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; CHECK-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + call void @llvm.x86.invpcid(i32 %type, ptr %descriptor) + ret void +} + +define void @test_invpcid2(ptr readonly %type, ptr %descriptor) { +; CHECK-LABEL: test_invpcid2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] +; CHECK-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = load i32, ptr %type, align 4 + tail call void @llvm.x86.invpcid(i32 %0, ptr %descriptor) #1 + ret void +} + +declare void @llvm.x86.invpcid(i32, ptr) diff --git a/llvm/test/CodeGen/X86/apx/movdir.ll b/llvm/test/CodeGen/X86/apx/movdir.ll new file mode 100644 index 0000000000000..06fd7511bc143 --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/movdir.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri,+movdir64b,+egpr --show-mc-encoding | FileCheck %s + +define void @test_movdiri(ptr %p, i32 %v) { +; CHECK-LABEL: test_movdiri: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + call void @llvm.x86.directstore32(ptr %p, i32 %v) + ret void +} + +declare void @llvm.x86.directstore32(ptr, i32) + +define void @test_movdiri_64(ptr %p, i64 %v) { +; CHECK-LABEL: test_movdiri_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + call void @llvm.x86.directstore64(ptr %p, i64 %v) + ret void +} + +declare void @llvm.x86.directstore64(ptr, i64) + +define void @test_movdir64b(ptr %dst, ptr %src) { +; CHECK-LABEL: test_movdir64b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + call void @llvm.x86.movdir64b(ptr %dst, ptr %src) + ret void +} + +declare void @llvm.x86.movdir64b(ptr, ptr) diff --git a/llvm/test/CodeGen/X86/apx/sha.ll b/llvm/test/CodeGen/X86/apx/sha.ll new file mode 100644 index 0000000000000..088ee61a97f4e --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/sha.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mattr=+sha,+egpr -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s + +declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone + +define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { +; CHECK-LABEL: test_sha1rnds4rr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3) + ret <4 x i32> %0 +} + +define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable { +; CHECK-LABEL: test_sha1rnds4rm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = load <4 x i32>, ptr %b + %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) + ret <4 x i32> %1 +} + +declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { +; CHECK-LABEL: test_sha1nexterr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %0 +} + +define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable { +; CHECK-LABEL: test_sha1nexterm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = load <4 x i32>, ptr %b + %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0) + ret <4 x i32> %1 +} + +declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { +; CHECK-LABEL: test_sha1msg1rr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %0 +} + +define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { +; CHECK-LABEL: test_sha1msg1rm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = load <4 x i32>, ptr %b + %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0) + ret <4 x i32> %1 +} + +declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { +; CHECK-LABEL: test_sha1msg2rr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %0 +} + +define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { +; CHECK-LABEL: test_sha1msg2rm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = load <4 x i32>, ptr %b + %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0) + ret <4 x i32> %1 +} + +declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable { +; +; CHECK-LABEL: test_sha256rnds2rr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] +; CHECK-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2] +; CHECK-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9] +; CHECK-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + ret <4 x i32> %0 +} + +define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwind uwtable { +; +; CHECK-LABEL: test_sha256rnds2rm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0] +; CHECK-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] +; CHECK-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17] +; CHECK-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = load <4 x i32>, ptr %b + %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c) + ret <4 x i32> %1 +} + +declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { +; CHECK-LABEL: test_sha256msg1rr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %0 +} + +define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { +; CHECK-LABEL: test_sha256msg1rm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = load <4 x i32>, ptr %b + %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0) + ret <4 x i32> %1 +} + +declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { +; CHECK-LABEL: test_sha256msg2rr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %0 +} + +define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { +; CHECK-LABEL: test_sha256msg2rm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = load <4 x i32>, ptr %b + %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0) + ret <4 x i32> %1 +} + +; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM. +define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwtable { +; +; CHECK-LABEL: test_sha1rnds4_zero_extend: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03] +; CHECK-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] +; CHECK-NEXT: retq # encoding: [0xc3] +entry: + %0 = load <4 x i32>, ptr %b + %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) + %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> + ret <8 x i32> %2 +} From b6d60b595e3784df18042a858206f28f7e5a1082 Mon Sep 17 00:00:00 2001 From: "Wang, Xin10" Date: Tue, 2 Jan 2024 23:21:46 -0800 Subject: [PATCH 2/7] fix error --- llvm/lib/Target/X86/X86FastISel.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 3658af785c24e..8b4ff4c8ed878 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3047,19 +3047,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { default: llvm_unreachable("Unexpected intrinsic."); case Intrinsic::x86_sse42_crc32_32_8: - Opc = Subtarget->hasCRC32() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8; + Opc = Subtarget->hasEGPR() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8; RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_16: - Opc = Subtarget->hasCRC32() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16; + Opc = Subtarget->hasEGPR() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16; RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_32: - Opc = Subtarget->hasCRC32() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32; + Opc = Subtarget->hasEGPR() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32; RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_64_64: - Opc = Subtarget->hasCRC32() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64; + Opc = Subtarget->hasEGPR() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64; RC = &X86::GR64RegClass; break; } From e464f561838fdebddc04fbda1208cea7897b70a0 Mon Sep 17 00:00:00 2001 From: "Wang, Xin10" Date: Wed, 3 Jan 2024 01:59:28 -0800 Subject: [PATCH 3/7] use macro to avoid messy --- llvm/lib/Target/X86/X86FastISel.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 8b4ff4c8ed878..084c40b90049b 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3046,22 +3046,24 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic."); +#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC; case Intrinsic::x86_sse42_crc32_32_8: - Opc = Subtarget->hasEGPR() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_16: - Opc = Subtarget->hasEGPR() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r16); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_32_32: - Opc = Subtarget->hasEGPR() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r32); RC = &X86::GR32RegClass; break; case Intrinsic::x86_sse42_crc32_64_64: - Opc = Subtarget->hasEGPR() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64; + Opc = GET_EGPR_IF_ENABLED(X86::CRC32r64r64); RC = &X86::GR64RegClass; break; +#undef GET_EGPR_IF_ENABLED } const Value *LHS = II->getArgOperand(0); From 9a93bdf9c4567638a6c6c38421b9611656f2d248 Mon Sep 17 00:00:00 2001 From: "Wang, Xin10" Date: Wed, 3 Jan 2024 18:59:22 -0800 Subject: [PATCH 4/7] resolve comments --- llvm/lib/Target/X86/X86DomainReassignment.cpp | 47 +-- llvm/lib/Target/X86/X86FastISel.cpp | 2 +- llvm/lib/Target/X86/X86InstrSystem.td | 8 +- llvm/test/CodeGen/X86/apx/cet.ll | 50 ---- llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll | 61 ---- llvm/test/CodeGen/X86/apx/crc32.ll | 58 ---- llvm/test/CodeGen/X86/apx/invpcid.ll | 27 -- llvm/test/CodeGen/X86/apx/movdir.ll | 38 --- llvm/test/CodeGen/X86/apx/sha.ll | 186 ------------ .../X86/crc32-intrinsics-fast-isel-x86.ll | 41 ++- llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll | 39 ++- .../CodeGen/X86/crc32-intrinsics-x86_64.ll | 13 + llvm/test/CodeGen/X86/invpcid-intrinsic.ll | 27 +- llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll | 33 ++- .../CodeGen/X86/movdir-intrinsic-x86_64.ll | 12 +- llvm/test/CodeGen/X86/sha.ll | 269 ++++++++++++++---- llvm/test/CodeGen/X86/x64-cet-intrinsics.ll | 120 ++++++-- 17 files changed, 451 insertions(+), 580 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/apx/cet.ll delete mode 100644 llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll delete mode 100644 llvm/test/CodeGen/X86/apx/crc32.ll delete mode 100644 llvm/test/CodeGen/X86/apx/invpcid.ll delete mode 100644 llvm/test/CodeGen/X86/apx/movdir.ll delete mode 100644 llvm/test/CodeGen/X86/apx/sha.ll diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index bdd86e48fa543..7e6b945e5d7a2 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -619,40 +619,40 @@ void X86DomainReassignment::initConverters() { std::make_unique(From, To); }; - bool HasEGPR = STI->hasEGPR(); +#define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC createReplacerDstCOPY(X86::MOVZX32rm16, - HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); + GET_EGPR_IF_ENABLED(X86::KMOVWkm)); createReplacerDstCOPY(X86::MOVZX64rm16, - HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); + GET_EGPR_IF_ENABLED(X86::KMOVWkm)); createReplacerDstCOPY(X86::MOVZX32rr16, - HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); + GET_EGPR_IF_ENABLED(X86::KMOVWkk)); createReplacerDstCOPY(X86::MOVZX64rr16, - HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); + GET_EGPR_IF_ENABLED(X86::KMOVWkk)); if (STI->hasDQI()) { createReplacerDstCOPY(X86::MOVZX16rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); + GET_EGPR_IF_ENABLED(X86::KMOVBkm)); createReplacerDstCOPY(X86::MOVZX32rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); + GET_EGPR_IF_ENABLED(X86::KMOVBkm)); createReplacerDstCOPY(X86::MOVZX64rm8, - HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); + GET_EGPR_IF_ENABLED(X86::KMOVBkm)); createReplacerDstCOPY(X86::MOVZX16rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + GET_EGPR_IF_ENABLED(X86::KMOVBkk)); createReplacerDstCOPY(X86::MOVZX32rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + GET_EGPR_IF_ENABLED(X86::KMOVBkk)); createReplacerDstCOPY(X86::MOVZX64rr8, - HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + GET_EGPR_IF_ENABLED(X86::KMOVBkk)); } auto createReplacer = [&](unsigned From, unsigned To) { Converters[{MaskDomain, From}] = std::make_unique(From, To); }; - createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm); - createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk); - createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk); + createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); + createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk)); + createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); createReplacer(X86::SHR16ri, X86::KSHIFTRWri); createReplacer(X86::SHL16ri, X86::KSHIFTLWri); createReplacer(X86::NOT16r, X86::KNOTWrr); @@ -661,14 +661,14 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::XOR16rr, X86::KXORWrr); if (STI->hasBWI()) { - createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm); - createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm); + createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm)); + createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm)); - createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk); - createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk); + createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk)); + createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk)); - createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk); - createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk); + createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk)); + createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk)); createReplacer(X86::SHR32ri, X86::KSHIFTRDri); createReplacer(X86::SHR64ri, X86::KSHIFTRQri); @@ -706,9 +706,9 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::AND8rr, X86::KANDBrr); - createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm); - createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk); - createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk); + createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk)); + createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); createReplacer(X86::NOT8r, X86::KNOTBrr); @@ -724,6 +724,7 @@ void X86DomainReassignment::initConverters() { createReplacer(X86::XOR8rr, X86::KXORBrr); } +#undef GET_EGPR_IF_ENABLED } bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 084c40b90049b..efbc0e119060f 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3046,7 +3046,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic."); -#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC; +#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC case Intrinsic::x86_sse42_crc32_32_8: Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8); RC = &X86::GR32RegClass; diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index 30530a00809f3..b1be4739617df 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), Requires<[Not64BitMode, HasINVPCID]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD, - Requires<[In64BitMode, HasINVPCID, NoEGPR]>; + Requires<[In64BitMode]>; def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invpcid\t{$src2, $src1|$src1, $src2}", []>, - EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID, HasEGPR]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>; } // SchedRW -let Predicates = [In64BitMode, HasINVPCID, NoEGPR] in { +let Predicates = [HasINVPCID, NoEGPR] in { // The instruction can only use a 64 bit register as the register argument // in 64 bit mode, while the intrinsic only accepts a 32 bit argument // corresponding to it. @@ -714,7 +714,7 @@ let Predicates = [In64BitMode, HasINVPCID, NoEGPR] in { addr:$src2)>; } -let Predicates = [In64BitMode, HasINVPCID, HasEGPR] in { +let Predicates = [HasINVPCID, HasEGPR] in { def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2), (INVPCID64_EVEX (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit), diff --git a/llvm/test/CodeGen/X86/apx/cet.ll b/llvm/test/CodeGen/X86/apx/cet.ll deleted file mode 100644 index 98f3844d1ccd1..0000000000000 --- a/llvm/test/CodeGen/X86/apx/cet.ll +++ /dev/null @@ -1,50 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk,+egpr --show-mc-encoding | FileCheck %s - -define void @test_wrssd(i32 %a, ptr %__p) { -; CHECK-LABEL: test_wrssd: -; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrssd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7c,0x08,0x66,0x3e] -; CHECK-NEXT: retq ## encoding: [0xc3] -entry: - tail call void @llvm.x86.wrssd(i32 %a, ptr %__p) - ret void -} - -declare void @llvm.x86.wrssd(i32, ptr) - -define void @test_wrssq(i64 %a, ptr %__p) { -; CHECK-LABEL: test_wrssq: -; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrssq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfc,0x08,0x66,0x3e] -; CHECK-NEXT: retq ## encoding: [0xc3] -entry: - tail call void @llvm.x86.wrssq(i64 %a, ptr %__p) - ret void -} - -declare void @llvm.x86.wrssq(i64, ptr) - -define void @test_wrussd(i32 %a, ptr %__p) { -; CHECK-LABEL: test_wrussd: -; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrussd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7d,0x08,0x65,0x3e] -; CHECK-NEXT: retq ## encoding: [0xc3] -entry: - tail call void @llvm.x86.wrussd(i32 %a, ptr %__p) - ret void -} - -declare void @llvm.x86.wrussd(i32, ptr) - -define void @test_wrussq(i64 %a, ptr %__p) { -; CHECK-LABEL: test_wrussq: -; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrussq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfd,0x08,0x65,0x3e] -; CHECK-NEXT: retq ## encoding: [0xc3] -entry: - tail call void @llvm.x86.wrussq(i64 %a, ptr %__p) - ret void -} - -declare void @llvm.x86.wrussq(i64, ptr) diff --git a/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll deleted file mode 100644 index 0b51679ccd7fb..0000000000000 --- a/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll +++ /dev/null @@ -1,61 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32,+egpr --show-mc-encoding | FileCheck %s -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s - -define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind { -; CHECK-LABEL: test_mm_crc32_u8: -; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; CHECK-NEXT: crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] -; CHECK-NEXT: retq # encoding: [0xc3] - %trunc = trunc i32 %a1 to i8 - %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc) - ret i32 %res -} -declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone - -define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind { -; CHECK-LABEL: test_mm_crc32_u16: -; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; CHECK-NEXT: crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] -; CHECK-NEXT: retq # encoding: [0xc3] - %trunc = trunc i32 %a1 to i16 - %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc) - ret i32 %res -} -declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone - -define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind { -; CHECK-LABEL: test_mm_crc32_u32: -; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; CHECK-NEXT: crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6] -; CHECK-NEXT: retq # encoding: [0xc3] - %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) - ret i32 %res -} -declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone - -define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{ -; CHECK-LABEL: test_mm_crc64_u8: -; CHECK: # %bb.0: -; CHECK-NEXT: crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe] -; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; CHECK-NEXT: retq # encoding: [0xc3] - %trunc = trunc i32 %a1 to i8 - %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc) - ret i64 %res -} -declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone - -define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{ -; CHECK-LABEL: test_mm_crc64_u64: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] -; CHECK-NEXT: crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6] -; CHECK-NEXT: retq # encoding: [0xc3] - %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) - ret i64 %res -} -declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone diff --git a/llvm/test/CodeGen/X86/apx/crc32.ll b/llvm/test/CodeGen/X86/apx/crc32.ll deleted file mode 100644 index 4bcc4d15cc6b5..0000000000000 --- a/llvm/test/CodeGen/X86/apx/crc32.ll +++ /dev/null @@ -1,58 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s - -define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { -; CHECK-LABEL: crc32_32_8: -; CHECK: ## %bb.0: -; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] -; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] -; CHECK-NEXT: retq ## encoding: [0xc3] - %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) - ret i32 %tmp -} - -define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { -; CHECK-LABEL: crc32_32_16: -; CHECK: ## %bb.0: -; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] -; CHECK-NEXT: crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] -; CHECK-NEXT: retq ## encoding: [0xc3] - %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) - ret i32 %tmp -} - -define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: crc32_32_32: -; CHECK: ## %bb.0: -; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] -; CHECK-NEXT: crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6] -; CHECK-NEXT: retq ## encoding: [0xc3] - %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) - ret i32 %tmp -} - -define i64 @crc32_64_8(i64 %a, i8 %b) nounwind { -; CHECK-LABEL: crc32_64_8: -; CHECK: ## %bb.0: -; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] -; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] -; CHECK-NEXT: retq ## encoding: [0xc3] - %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b) - ret i64 %tmp -} - -define i64 @crc32_64_64(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: crc32_64_64: -; CHECK: ## %bb.0: -; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] -; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6] -; CHECK-NEXT: retq ## encoding: [0xc3] - %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) - ret i64 %tmp -} - -declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind -declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind -declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind -declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind -declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind diff --git a/llvm/test/CodeGen/X86/apx/invpcid.ll b/llvm/test/CodeGen/X86/apx/invpcid.ll deleted file mode 100644 index 389895f492130..0000000000000 --- a/llvm/test/CodeGen/X86/apx/invpcid.ll +++ /dev/null @@ -1,27 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid,+egpr --show-mc-encoding | FileCheck %s - -define void @test_invpcid(i32 %type, ptr %descriptor) { -; CHECK-LABEL: test_invpcid: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; CHECK-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - call void @llvm.x86.invpcid(i32 %type, ptr %descriptor) - ret void -} - -define void @test_invpcid2(ptr readonly %type, ptr %descriptor) { -; CHECK-LABEL: test_invpcid2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] -; CHECK-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = load i32, ptr %type, align 4 - tail call void @llvm.x86.invpcid(i32 %0, ptr %descriptor) #1 - ret void -} - -declare void @llvm.x86.invpcid(i32, ptr) diff --git a/llvm/test/CodeGen/X86/apx/movdir.ll b/llvm/test/CodeGen/X86/apx/movdir.ll deleted file mode 100644 index 06fd7511bc143..0000000000000 --- a/llvm/test/CodeGen/X86/apx/movdir.ll +++ /dev/null @@ -1,38 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri,+movdir64b,+egpr --show-mc-encoding | FileCheck %s - -define void @test_movdiri(ptr %p, i32 %v) { -; CHECK-LABEL: test_movdiri: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - call void @llvm.x86.directstore32(ptr %p, i32 %v) - ret void -} - -declare void @llvm.x86.directstore32(ptr, i32) - -define void @test_movdiri_64(ptr %p, i64 %v) { -; CHECK-LABEL: test_movdiri_64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - call void @llvm.x86.directstore64(ptr %p, i64 %v) - ret void -} - -declare void @llvm.x86.directstore64(ptr, i64) - -define void @test_movdir64b(ptr %dst, ptr %src) { -; CHECK-LABEL: test_movdir64b: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - call void @llvm.x86.movdir64b(ptr %dst, ptr %src) - ret void -} - -declare void @llvm.x86.movdir64b(ptr, ptr) diff --git a/llvm/test/CodeGen/X86/apx/sha.ll b/llvm/test/CodeGen/X86/apx/sha.ll deleted file mode 100644 index 088ee61a97f4e..0000000000000 --- a/llvm/test/CodeGen/X86/apx/sha.ll +++ /dev/null @@ -1,186 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc < %s -mattr=+sha,+egpr -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s - -declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone - -define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1rnds4rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3) - ret <4 x i32> %0 -} - -define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1rnds4rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = load <4 x i32>, ptr %b - %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) - ret <4 x i32> %1 -} - -declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone - -define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1nexterr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b) - ret <4 x i32> %0 -} - -define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1nexterm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = load <4 x i32>, ptr %b - %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0) - ret <4 x i32> %1 -} - -declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone - -define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg1rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b) - ret <4 x i32> %0 -} - -define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg1rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = load <4 x i32>, ptr %b - %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0) - ret <4 x i32> %1 -} - -declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone - -define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg2rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b) - ret <4 x i32> %0 -} - -define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg2rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = load <4 x i32>, ptr %b - %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0) - ret <4 x i32> %1 -} - -declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone - -define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable { -; -; CHECK-LABEL: test_sha256rnds2rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] -; CHECK-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2] -; CHECK-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9] -; CHECK-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) - ret <4 x i32> %0 -} - -define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwind uwtable { -; -; CHECK-LABEL: test_sha256rnds2rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0] -; CHECK-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] -; CHECK-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17] -; CHECK-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = load <4 x i32>, ptr %b - %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c) - ret <4 x i32> %1 -} - -declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone - -define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg1rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b) - ret <4 x i32> %0 -} - -define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg1rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = load <4 x i32>, ptr %b - %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0) - ret <4 x i32> %1 -} - -declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone - -define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg2rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b) - ret <4 x i32> %0 -} - -define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg2rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = load <4 x i32>, ptr %b - %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0) - ret <4 x i32> %1 -} - -; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM. -define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwtable { -; -; CHECK-LABEL: test_sha1rnds4_zero_extend: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03] -; CHECK-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] -; CHECK-NEXT: retq # encoding: [0xc3] -entry: - %0 = load <4 x i32>, ptr %b - %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) - %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> - ret <8 x i32> %2 -} diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll index 056d79f379fd1..cb93e7a4f4362 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll @@ -3,8 +3,9 @@ ; RUN: llc < %s -fast-isel -pass-remarks-missed=sdagisel -mtriple=i686-unknown-unknown -mattr=+crc32 2>&1 >/dev/null | FileCheck %s -check-prefix=STDERR-X86 -allow-empty ; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X86 ; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86 -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=EGPR ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c @@ -21,9 +22,15 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind { ; ; X64-LABEL: test_mm_crc32_u8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: crc32b %sil, %eax -; X64-NEXT: retq +; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; X64-NEXT: crc32b %sil, %eax # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] +; X64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_mm_crc32_u8: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: crc32b %sil, %eax # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i8 %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc) ret i32 %res @@ -41,9 +48,15 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind { ; ; X64-LABEL: test_mm_crc32_u16: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: crc32w %si, %eax -; X64-NEXT: retq +; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; X64-NEXT: crc32w %si, %eax # encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] +; X64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_mm_crc32_u16: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: crc32w %si, %eax # encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i16 %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc) ret i32 %res @@ -59,9 +72,15 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind { ; ; X64-LABEL: test_mm_crc32_u32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: crc32l %esi, %eax -; X64-NEXT: retq +; X64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; X64-NEXT: crc32l %esi, %eax # encoding: [0xf2,0x0f,0x38,0xf1,0xc6] +; X64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_mm_crc32_u32: +; EGPR: # %bb.0: +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: crc32l %esi, %eax # encoding: [0xf2,0x0f,0x38,0xf1,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) ret i32 %res } diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll index 7623ba68353e1..96258ea9bcbb7 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll @@ -1,19 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 | FileCheck %s --check-prefixes=X86 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { ; X86-LABEL: crc32_32_8: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08] -; X86-NEXT: retl ## encoding: [0xc3] +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax +; X86-NEXT: retl ; ; X64-LABEL: crc32_32_8: ; X64: ## %bb.0: ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] ; X64-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; X64-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: crc32_32_8: +; EGPR: ## %bb.0: +; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) ret i32 %tmp } @@ -22,15 +29,21 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { ; X86-LABEL: crc32_32_16: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08] -; X86-NEXT: retl ## encoding: [0xc3] +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax +; X86-NEXT: retl ; ; X64-LABEL: crc32_32_16: ; X64: ## %bb.0: ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] ; X64-NEXT: crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] ; X64-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: crc32_32_16: +; EGPR: ## %bb.0: +; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; EGPR-NEXT: crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] +; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) ret i32 %tmp } @@ -39,15 +52,21 @@ declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { ; X86-LABEL: crc32_32_32: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08] -; X86-NEXT: retl ## encoding: [0xc3] +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax +; X86-NEXT: retl ; ; X64-LABEL: crc32_32_32: ; X64: ## %bb.0: ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] ; X64-NEXT: crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6] ; X64-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: crc32_32_32: +; EGPR: ## %bb.0: +; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; EGPR-NEXT: crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6] +; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) ret i32 %tmp } diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll index b0f7a394f07b7..bda26a15b277a 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind @@ -10,6 +11,12 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind { ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] ; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: crc32_64_8: +; EGPR: ## %bb.0: +; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] +; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b) ret i64 %tmp } @@ -20,6 +27,12 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind { ; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] ; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6] ; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: crc32_64_64: +; EGPR: ## %bb.0: +; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6] +; EGPR-NEXT: retq ## encoding: [0xc3] %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b) ret i64 %tmp } diff --git a/llvm/test/CodeGen/X86/invpcid-intrinsic.ll b/llvm/test/CodeGen/X86/invpcid-intrinsic.ll index 3aa9fde35e23f..19a6249fc708f 100644 --- a/llvm/test/CodeGen/X86/invpcid-intrinsic.ll +++ b/llvm/test/CodeGen/X86/invpcid-intrinsic.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+invpcid | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid | FileCheck %s --check-prefix=X86_64 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid --show-mc-encoding | FileCheck %s --check-prefix=X86_64 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR define void @test_invpcid(i32 %type, ptr %descriptor) { ; X86-LABEL: test_invpcid: @@ -12,9 +13,15 @@ define void @test_invpcid(i32 %type, ptr %descriptor) { ; ; X86_64-LABEL: test_invpcid: ; X86_64: # %bb.0: # %entry -; X86_64-NEXT: movl %edi, %eax -; X86_64-NEXT: invpcid (%rsi), %rax -; X86_64-NEXT: retq +; X86_64-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; X86_64-NEXT: invpcid (%rsi), %rax # encoding: [0x66,0x0f,0x38,0x82,0x06] +; X86_64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_invpcid: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06] +; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.invpcid(i32 %type, ptr %descriptor) ret void @@ -31,9 +38,15 @@ define void @test_invpcid2(ptr readonly %type, ptr %descriptor) { ; ; X86_64-LABEL: test_invpcid2: ; X86_64: # %bb.0: # %entry -; X86_64-NEXT: movl (%rdi), %eax -; X86_64-NEXT: invpcid (%rsi), %rax -; X86_64-NEXT: retq +; X86_64-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] +; X86_64-NEXT: invpcid (%rsi), %rax # encoding: [0x66,0x0f,0x38,0x82,0x06] +; X86_64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_invpcid2: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07] +; EGPR-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load i32, ptr %type, align 4 tail call void @llvm.x86.invpcid(i32 %0, ptr %descriptor) #1 diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll index 1769bcbf6f605..4d03510ad5d4f 100644 --- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll +++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll @@ -1,19 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri -mattr=+movdir64b | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+movdiri -mattr=+movdir64b | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri -mattr=+movdir64b --show-mc-encoding | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri -mattr=+movdir64b,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR define void @test_movdiri(ptr %p, i32 %v) { -; X64-LABEL: test_movdiri: -; X64: # %bb.0: # %entry -; X64-NEXT: movdiri %esi, (%rdi) -; X64-NEXT: retq -; ; X32-LABEL: test_movdiri: ; X32: # %bb.0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movdiri %eax, (%ecx) ; X32-NEXT: retl +; +; X64-LABEL: test_movdiri: +; X64: # %bb.0: # %entry +; X64-NEXT: movdiri %esi, (%rdi) # encoding: [0x0f,0x38,0xf9,0x37] +; X64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_movdiri: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37] +; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.directstore32(ptr %p, i32 %v) ret void @@ -22,17 +28,22 @@ entry: declare void @llvm.x86.directstore32(ptr, i32) define void @test_movdir64b(ptr %dst, ptr %src) { -; X64-LABEL: test_movdir64b: -; X64: # %bb.0: # %entry -; X64-NEXT: movdir64b (%rsi), %rdi -; X64-NEXT: retq -; ; X32-LABEL: test_movdir64b: ; X32: # %bb.0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movdir64b (%eax), %ecx ; X32-NEXT: retl +; +; X64-LABEL: test_movdir64b: +; X64: # %bb.0: # %entry +; X64-NEXT: movdir64b (%rsi), %rdi # encoding: [0x66,0x0f,0x38,0xf8,0x3e] +; X64-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_movdir64b: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e] +; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.movdir64b(ptr %dst, ptr %src) ret void diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll index b20d7df26515d..ddd44f6d73d59 100644 --- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll +++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR define void @test_movdiri(ptr %p, i64 %v) { ; CHECK-LABEL: test_movdiri: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movdiri %rsi, (%rdi) -; CHECK-NEXT: retq +; CHECK-NEXT: movdiri %rsi, (%rdi) # encoding: [0x48,0x0f,0x38,0xf9,0x37] +; CHECK-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_movdiri: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37] +; EGPR-NEXT: retq # encoding: [0xc3] entry: call void @llvm.x86.directstore64(ptr %p, i64 %v) ret void diff --git a/llvm/test/CodeGen/X86/sha.ll b/llvm/test/CodeGen/X86/sha.ll index d5427556dc0bb..d8fa354a39135 100644 --- a/llvm/test/CodeGen/X86/sha.ll +++ b/llvm/test/CodeGen/X86/sha.ll @@ -1,24 +1,45 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX +; RUN: llc < %s -mattr=+sha,+egpr -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=EGPR declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1rnds4rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1rnds4 $3, %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1rnds4rr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1rnds4rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x0f,0x3a,0xcc,0xc1,0x03] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1rnds4rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3) ret <4 x i32> %0 } define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1rnds4rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1rnds4rm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1rnds4rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1rnds4rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) @@ -28,20 +49,40 @@ entry: declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1nexterr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1nexte %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1nexterr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1nexte %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1nexterr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc8,0xc1] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1nexterr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %0 } define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1nexterm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1nexte (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1nexterm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1nexte (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1nexterm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc8,0x07] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1nexterm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0) @@ -51,20 +92,40 @@ entry: declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg1rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg1 %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1msg1rr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1msg1 %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1msg1rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc9,0xc1] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1msg1rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %0 } define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg1rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg1 (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1msg1rm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1msg1 (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1msg1rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc9,0x07] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1msg1rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0) @@ -74,20 +135,40 @@ entry: declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg2rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg2 %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1msg2rr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1msg2 %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1msg2rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xca,0xc1] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1msg2rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %0 } define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha1msg2rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha1msg2 (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha1msg2rm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha1msg2 (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha1msg2rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xca,0x07] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1msg2rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0) @@ -107,11 +188,19 @@ define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) n ; ; AVX-LABEL: test_sha256rnds2rr: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovaps %xmm0, %xmm3 -; AVX-NEXT: vmovaps %xmm2, %xmm0 -; AVX-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 -; AVX-NEXT: vmovaps %xmm3, %xmm0 -; AVX-NEXT: retq +; AVX-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8] +; AVX-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] +; AVX-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x0f,0x38,0xcb,0xd9] +; AVX-NEXT: vmovaps %xmm3, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc3] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256rnds2rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] +; EGPR-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2] +; EGPR-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9] +; EGPR-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) ret <4 x i32> %0 @@ -128,11 +217,19 @@ define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwin ; ; AVX-LABEL: test_sha256rnds2rm: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovaps %xmm0, %xmm2 -; AVX-NEXT: vmovaps %xmm1, %xmm0 -; AVX-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 -; AVX-NEXT: vmovaps %xmm2, %xmm0 -; AVX-NEXT: retq +; AVX-NEXT: vmovaps %xmm0, %xmm2 # encoding: [0xc5,0xf8,0x28,0xd0] +; AVX-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] +; AVX-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x0f,0x38,0xcb,0x17] +; AVX-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256rnds2rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0] +; EGPR-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] +; EGPR-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17] +; EGPR-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c) @@ -142,20 +239,40 @@ entry: declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg1rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg1 %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha256msg1rr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha256msg1 %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha256msg1rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcc,0xc1] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256msg1rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %0 } define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg1rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg1 (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha256msg1rm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha256msg1 (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha256msg1rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha256msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcc,0x07] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256msg1rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0) @@ -165,20 +282,40 @@ entry: declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg2rr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg2 %xmm1, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha256msg2rr: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha256msg2 %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha256msg2rr: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha256msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcd,0xc1] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256msg2rr: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %0 } define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { -; CHECK-LABEL: test_sha256msg2rm: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sha256msg2 (%rdi), %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: test_sha256msg2rm: +; SSE: # %bb.0: # %entry +; SSE-NEXT: sha256msg2 (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: test_sha256msg2rm: +; AVX: # %bb.0: # %entry +; AVX-NEXT: sha256msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcd,0x07] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha256msg2rm: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0) @@ -195,12 +332,20 @@ define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwta ; ; AVX-LABEL: test_sha1rnds4_zero_extend: ; AVX: # %bb.0: # %entry -; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 -; AVX-NEXT: vmovaps %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03] +; AVX-NEXT: vmovaps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc0] +; AVX-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_sha1rnds4_zero_extend: +; EGPR: # %bb.0: # %entry +; EGPR-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03] +; EGPR-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] +; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = load <4 x i32>, ptr %b %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> ret <8 x i32> %2 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll b/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll index 4c28c8ab43699..bf87ae5cac05a 100644 --- a/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll +++ b/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR define void @test_incsspd(i32 %a) local_unnamed_addr { ; CHECK-LABEL: test_incsspd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: incsspd %edi -; CHECK-NEXT: retq +; CHECK-NEXT: incsspd %edi ## encoding: [0xf3,0x0f,0xae,0xef] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_incsspd: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: incsspd %edi ## encoding: [0xf3,0x0f,0xae,0xef] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.incsspd(i32 %a) ret void @@ -16,9 +22,15 @@ declare void @llvm.x86.incsspd(i32) define void @test_incsspq(i32 %a) local_unnamed_addr { ; CHECK-LABEL: test_incsspq: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movslq %edi, %rax -; CHECK-NEXT: incsspq %rax -; CHECK-NEXT: retq +; CHECK-NEXT: movslq %edi, %rax ## encoding: [0x48,0x63,0xc7] +; CHECK-NEXT: incsspq %rax ## encoding: [0xf3,0x48,0x0f,0xae,0xe8] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_incsspq: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: movslq %edi, %rax ## encoding: [0x48,0x63,0xc7] +; EGPR-NEXT: incsspq %rax ## encoding: [0xf3,0x48,0x0f,0xae,0xe8] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: %conv.i = sext i32 %a to i64 tail call void @llvm.x86.incsspq(i64 %conv.i) @@ -30,9 +42,15 @@ declare void @llvm.x86.incsspq(i64) define i32 @test_rdsspd(i32 %a) { ; CHECK-LABEL: test_rdsspd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: rdsspd %eax -; CHECK-NEXT: retq +; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; CHECK-NEXT: rdsspd %eax ## encoding: [0xf3,0x0f,0x1e,0xc8] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_rdsspd: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8] +; EGPR-NEXT: rdsspd %eax ## encoding: [0xf3,0x0f,0x1e,0xc8] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: %0 = call i32 @llvm.x86.rdsspd(i32 %a) ret i32 %0 @@ -43,9 +61,15 @@ declare i32 @llvm.x86.rdsspd(i32) define i64 @test_rdsspq(i64 %a) { ; CHECK-LABEL: test_rdsspq: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: rdsspq %rax -; CHECK-NEXT: retq +; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: rdsspq %rax ## encoding: [0xf3,0x48,0x0f,0x1e,0xc8] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_rdsspq: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: rdsspq %rax ## encoding: [0xf3,0x48,0x0f,0x1e,0xc8] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: %0 = call i64 @llvm.x86.rdsspq(i64 %a) ret i64 %0 @@ -56,8 +80,13 @@ declare i64 @llvm.x86.rdsspq(i64) define void @test_saveprevssp() { ; CHECK-LABEL: test_saveprevssp: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: saveprevssp -; CHECK-NEXT: retq +; CHECK-NEXT: saveprevssp ## encoding: [0xf3,0x0f,0x01,0xea] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_saveprevssp: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: saveprevssp ## encoding: [0xf3,0x0f,0x01,0xea] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.saveprevssp() ret void @@ -68,8 +97,13 @@ declare void @llvm.x86.saveprevssp() define void @test_rstorssp(ptr %__p) { ; CHECK-LABEL: test_rstorssp: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: rstorssp (%rdi) -; CHECK-NEXT: retq +; CHECK-NEXT: rstorssp (%rdi) ## encoding: [0xf3,0x0f,0x01,0x2f] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_rstorssp: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: rstorssp (%rdi) ## encoding: [0xf3,0x0f,0x01,0x2f] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.rstorssp(ptr %__p) ret void @@ -80,8 +114,13 @@ declare void @llvm.x86.rstorssp(ptr) define void @test_wrssd(i32 %a, ptr %__p) { ; CHECK-LABEL: test_wrssd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrssd %edi, (%rsi) -; CHECK-NEXT: retq +; CHECK-NEXT: wrssd %edi, (%rsi) ## encoding: [0x0f,0x38,0xf6,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_wrssd: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: wrssd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7c,0x08,0x66,0x3e] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.wrssd(i32 %a, ptr %__p) ret void @@ -92,8 +131,13 @@ declare void @llvm.x86.wrssd(i32, ptr) define void @test_wrssq(i64 %a, ptr %__p) { ; CHECK-LABEL: test_wrssq: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrssq %rdi, (%rsi) -; CHECK-NEXT: retq +; CHECK-NEXT: wrssq %rdi, (%rsi) ## encoding: [0x48,0x0f,0x38,0xf6,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_wrssq: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: wrssq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfc,0x08,0x66,0x3e] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.wrssq(i64 %a, ptr %__p) ret void @@ -104,8 +148,13 @@ declare void @llvm.x86.wrssq(i64, ptr) define void @test_wrussd(i32 %a, ptr %__p) { ; CHECK-LABEL: test_wrussd: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrussd %edi, (%rsi) -; CHECK-NEXT: retq +; CHECK-NEXT: wrussd %edi, (%rsi) ## encoding: [0x66,0x0f,0x38,0xf5,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_wrussd: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: wrussd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7d,0x08,0x65,0x3e] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.wrussd(i32 %a, ptr %__p) ret void @@ -116,8 +165,13 @@ declare void @llvm.x86.wrussd(i32, ptr) define void @test_wrussq(i64 %a, ptr %__p) { ; CHECK-LABEL: test_wrussq: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: wrussq %rdi, (%rsi) -; CHECK-NEXT: retq +; CHECK-NEXT: wrussq %rdi, (%rsi) ## encoding: [0x66,0x48,0x0f,0x38,0xf5,0x3e] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_wrussq: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: wrussq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfd,0x08,0x65,0x3e] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.wrussq(i64 %a, ptr %__p) ret void @@ -128,8 +182,13 @@ declare void @llvm.x86.wrussq(i64, ptr) define void @test_setssbsy() { ; CHECK-LABEL: test_setssbsy: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: setssbsy -; CHECK-NEXT: retq +; CHECK-NEXT: setssbsy ## encoding: [0xf3,0x0f,0x01,0xe8] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_setssbsy: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: setssbsy ## encoding: [0xf3,0x0f,0x01,0xe8] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.setssbsy() ret void @@ -140,8 +199,13 @@ declare void @llvm.x86.setssbsy() define void @test_clrssbsy(ptr %__p) { ; CHECK-LABEL: test_clrssbsy: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: clrssbsy (%rdi) -; CHECK-NEXT: retq +; CHECK-NEXT: clrssbsy (%rdi) ## encoding: [0xf3,0x0f,0xae,0x37] +; CHECK-NEXT: retq ## encoding: [0xc3] +; +; EGPR-LABEL: test_clrssbsy: +; EGPR: ## %bb.0: ## %entry +; EGPR-NEXT: clrssbsy (%rdi) ## encoding: [0xf3,0x0f,0xae,0x37] +; EGPR-NEXT: retq ## encoding: [0xc3] entry: tail call void @llvm.x86.clrssbsy(ptr %__p) ret void From c7c63c61ce5f5abd7ba2628b837469bbe015a7fb Mon Sep 17 00:00:00 2001 From: "Wang, Xin10" Date: Wed, 3 Jan 2024 19:04:02 -0800 Subject: [PATCH 5/7] clang format --- llvm/lib/Target/X86/X86DomainReassignment.cpp | 40 +++++++------------ 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index 7e6b945e5d7a2..20dbaf797e327 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -620,30 +620,20 @@ void X86DomainReassignment::initConverters() { }; #define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC - createReplacerDstCOPY(X86::MOVZX32rm16, - GET_EGPR_IF_ENABLED(X86::KMOVWkm)); - createReplacerDstCOPY(X86::MOVZX64rm16, - GET_EGPR_IF_ENABLED(X86::KMOVWkm)); + createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); + createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); - createReplacerDstCOPY(X86::MOVZX32rr16, - GET_EGPR_IF_ENABLED(X86::KMOVWkk)); - createReplacerDstCOPY(X86::MOVZX64rr16, - GET_EGPR_IF_ENABLED(X86::KMOVWkk)); + createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); + createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); if (STI->hasDQI()) { - createReplacerDstCOPY(X86::MOVZX16rm8, - GET_EGPR_IF_ENABLED(X86::KMOVBkm)); - createReplacerDstCOPY(X86::MOVZX32rm8, - GET_EGPR_IF_ENABLED(X86::KMOVBkm)); - createReplacerDstCOPY(X86::MOVZX64rm8, - GET_EGPR_IF_ENABLED(X86::KMOVBkm)); - - createReplacerDstCOPY(X86::MOVZX16rr8, - GET_EGPR_IF_ENABLED(X86::KMOVBkk)); - createReplacerDstCOPY(X86::MOVZX32rr8, - GET_EGPR_IF_ENABLED(X86::KMOVBkk)); - createReplacerDstCOPY(X86::MOVZX64rr8, - GET_EGPR_IF_ENABLED(X86::KMOVBkk)); + createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); + + createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); + createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); + createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); } auto createReplacer = [&](unsigned From, unsigned To) { @@ -696,8 +686,8 @@ void X86DomainReassignment::initConverters() { // TODO: KTEST is not a replacement for TEST due to flag differences. Need // to prove only Z flag is used. - //createReplacer(X86::TEST32rr, X86::KTESTDrr); - //createReplacer(X86::TEST64rr, X86::KTESTQrr); + // createReplacer(X86::TEST32rr, X86::KTESTDrr); + // createReplacer(X86::TEST64rr, X86::KTESTQrr); } if (STI->hasDQI()) { @@ -719,8 +709,8 @@ void X86DomainReassignment::initConverters() { // TODO: KTEST is not a replacement for TEST due to flag differences. Need // to prove only Z flag is used. - //createReplacer(X86::TEST8rr, X86::KTESTBrr); - //createReplacer(X86::TEST16rr, X86::KTESTWrr); + // createReplacer(X86::TEST8rr, X86::KTESTBrr); + // createReplacer(X86::TEST16rr, X86::KTESTWrr); createReplacer(X86::XOR8rr, X86::KXORBrr); } From 337494dbe2ee7ec3773f2f3716b1a15dba5cefd8 Mon Sep 17 00:00:00 2001 From: "Wang, Xin10" Date: Thu, 4 Jan 2024 01:52:23 -0800 Subject: [PATCH 6/7] resolve comment --- llvm/lib/Target/X86/X86InstrVMX.td | 8 ++--- .../X86/crc32-intrinsics-fast-isel-x86.ll | 8 ++--- .../X86/crc32-intrinsics-fast-isel-x86_64.ll | 29 ++++++++++++++----- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrVMX.td b/llvm/lib/Target/X86/X86InstrVMX.td index e672246789721..7cc468fe15ad4 100644 --- a/llvm/lib/Target/X86/X86InstrVMX.td +++ b/llvm/lib/Target/X86/X86InstrVMX.td @@ -21,10 +21,10 @@ def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), Requires<[Not64BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD, - Requires<[In64BitMode, NoEGPR]>; + Requires<[In64BitMode]>; def INVEPT64_EVEX : I<0xF0, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invept\t{$src2, $src1|$src1, $src2}", []>, - EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>; // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), @@ -32,10 +32,10 @@ def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), Requires<[Not64BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD, - Requires<[In64BitMode, NoEGPR]>; + Requires<[In64BitMode]>; def INVVPID64_EVEX : I<0xF1, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), "invvpid\t{$src2, $src1|$src1, $src2}", []>, - EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>; + EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll index cb93e7a4f4362..873986e99777d 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86 ; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64 ; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=EGPR +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c @@ -29,7 +29,7 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind { ; EGPR-LABEL: test_mm_crc32_u8: ; EGPR: # %bb.0: ; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; EGPR-NEXT: crc32b %sil, %eax # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6] +; EGPR-NEXT: crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6] ; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i8 %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc) @@ -55,7 +55,7 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind { ; EGPR-LABEL: test_mm_crc32_u16: ; EGPR: # %bb.0: ; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; EGPR-NEXT: crc32w %si, %eax # encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6] +; EGPR-NEXT: crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6] ; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i16 %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc) @@ -79,7 +79,7 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind { ; EGPR-LABEL: test_mm_crc32_u32: ; EGPR: # %bb.0: ; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] -; EGPR-NEXT: crc32l %esi, %eax # encoding: [0xf2,0x0f,0x38,0xf1,0xc6] +; EGPR-NEXT: crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6] ; EGPR-NEXT: retq # encoding: [0xc3] %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) ret i32 %res diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll index e0ec432b38549..71d955bda7523 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s +; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c @@ -8,9 +9,15 @@ define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{ ; CHECK-LABEL: test_mm_crc64_u8: ; CHECK: # %bb.0: -; CHECK-NEXT: crc32b %sil, %edi -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: retq +; CHECK-NEXT: crc32b %sil, %edi # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe] +; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; CHECK-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_mm_crc64_u8: +; EGPR: # %bb.0: +; EGPR-NEXT: crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe] +; EGPR-NEXT: movl %edi, %eax # encoding: [0x89,0xf8] +; EGPR-NEXT: retq # encoding: [0xc3] %trunc = trunc i32 %a1 to i8 %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc) ret i64 %res @@ -20,9 +27,15 @@ declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{ ; CHECK-LABEL: test_mm_crc64_u64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: crc32q %rsi, %rax -; CHECK-NEXT: retq +; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; CHECK-NEXT: crc32q %rsi, %rax # encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6] +; CHECK-NEXT: retq # encoding: [0xc3] +; +; EGPR-LABEL: test_mm_crc64_u64: +; EGPR: # %bb.0: +; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8] +; EGPR-NEXT: crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6] +; EGPR-NEXT: retq # encoding: [0xc3] %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) ret i64 %res } From b1beebe1515783ddfe1d5743b5449703b2daa579 Mon Sep 17 00:00:00 2001 From: "Wang, Xin10" Date: Thu, 4 Jan 2024 18:00:27 -0800 Subject: [PATCH 7/7] restore mistake change --- llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll index 96258ea9bcbb7..84c7f90cfe3c3 100644 --- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll @@ -1,14 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { ; X86-LABEL: crc32_32_8: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08] +; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: crc32_32_8: ; X64: ## %bb.0: @@ -29,9 +29,9 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { ; X86-LABEL: crc32_32_16: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08] +; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: crc32_32_16: ; X64: ## %bb.0: @@ -52,9 +52,9 @@ declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { ; X86-LABEL: crc32_32_32: ; X86: ## %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08] +; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: crc32_32_32: ; X64: ## %bb.0: