From c0c6ab1f2a91fa96a119eac8e1bb3bf0deb8dfdc Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang@intel.com>
Date: Tue, 2 Jan 2024 23:10:13 -0800
Subject: [PATCH 1/7] Support Lowering for APX Promoted
 SHA/MOVDIR/CRC32/INVPCID instructions

---
 llvm/lib/Target/X86/X86FastISel.cpp          |   8 +-
 llvm/lib/Target/X86/X86InstrSystem.td        |  13 +-
 llvm/lib/Target/X86/X86InstrVMX.td           |   8 +-
 llvm/test/CodeGen/X86/apx/cet.ll             |  50 +++++
 llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll |  61 ++++++
 llvm/test/CodeGen/X86/apx/crc32.ll           |  58 ++++++
 llvm/test/CodeGen/X86/apx/invpcid.ll         |  27 +++
 llvm/test/CodeGen/X86/apx/movdir.ll          |  38 ++++
 llvm/test/CodeGen/X86/apx/sha.ll             | 186 +++++++++++++++++++
 9 files changed, 438 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/apx/cet.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/crc32.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/invpcid.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/movdir.ll
 create mode 100644 llvm/test/CodeGen/X86/apx/sha.ll

diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 0ba31e173a1a7..3658af785c24e 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3047,19 +3047,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     default:
       llvm_unreachable("Unexpected intrinsic.");
     case Intrinsic::x86_sse42_crc32_32_8:
-      Opc = X86::CRC32r32r8;
+      Opc = Subtarget->hasCRC32() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8;
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_32_16:
-      Opc = X86::CRC32r32r16;
+      Opc = Subtarget->hasCRC32() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16;
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_32_32:
-      Opc = X86::CRC32r32r32;
+      Opc = Subtarget->hasCRC32() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32;
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_64_64:
-      Opc = X86::CRC32r64r64;
+      Opc = Subtarget->hasCRC32() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64;
       RC = &X86::GR64RegClass;
       break;
     }
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index 699e5847e63fb..30530a00809f3 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
                   Requires<[Not64BitMode, HasINVPCID]>;
 def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                   "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
-                  Requires<[In64BitMode, HasINVPCID]>;
+                  Requires<[In64BitMode, HasINVPCID, NoEGPR]>;
 
 def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                        "invpcid\t{$src2, $src1|$src1, $src2}", []>,
-                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>;
+                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID, HasEGPR]>;
 } // SchedRW
 
-let Predicates = [In64BitMode, HasINVPCID] in {
+let Predicates = [In64BitMode, HasINVPCID, NoEGPR] in {
   // The instruction can only use a 64 bit register as the register argument
   // in 64 bit mode, while the intrinsic only accepts a 32 bit argument
   // corresponding to it.
@@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in {
               addr:$src2)>;
 }
 
+let Predicates = [In64BitMode, HasINVPCID, HasEGPR] in {
+  def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
+            (INVPCID64_EVEX
+              (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
+              addr:$src2)>;
+}
+
 
 //===----------------------------------------------------------------------===//
 // SMAP Instruction
diff --git a/llvm/lib/Target/X86/X86InstrVMX.td b/llvm/lib/Target/X86/X86InstrVMX.td
index 7cc468fe15ad4..e672246789721 100644
--- a/llvm/lib/Target/X86/X86InstrVMX.td
+++ b/llvm/lib/Target/X86/X86InstrVMX.td
@@ -21,10 +21,10 @@ def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
                Requires<[Not64BitMode]>;
 def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                "invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
-               Requires<[In64BitMode]>;
+               Requires<[In64BitMode, NoEGPR]>;
 def INVEPT64_EVEX : I<0xF0, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                       "invept\t{$src2, $src1|$src1, $src2}", []>,
-                    EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
+                    EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>;
 
 // 66 0F 38 81
 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
@@ -32,10 +32,10 @@ def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
                 Requires<[Not64BitMode]>;
 def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                 "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
-                Requires<[In64BitMode]>;
+                Requires<[In64BitMode, NoEGPR]>;
 def INVVPID64_EVEX : I<0xF1, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                        "invvpid\t{$src2, $src1|$src1, $src2}", []>,
-                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
+                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>;
 
 // 0F 01 C1
 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
diff --git a/llvm/test/CodeGen/X86/apx/cet.ll b/llvm/test/CodeGen/X86/apx/cet.ll
new file mode 100644
index 0000000000000..98f3844d1ccd1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/cet.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_wrssd(i32 %a, ptr %__p) {
+; CHECK-LABEL: test_wrssd:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    wrssd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7c,0x08,0x66,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+entry:
+  tail call void @llvm.x86.wrssd(i32 %a, ptr %__p)
+  ret void
+}
+
+declare void @llvm.x86.wrssd(i32, ptr)
+
+define void @test_wrssq(i64 %a, ptr %__p) {
+; CHECK-LABEL: test_wrssq:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    wrssq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfc,0x08,0x66,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+entry:
+  tail call void @llvm.x86.wrssq(i64 %a, ptr %__p)
+  ret void
+}
+
+declare void @llvm.x86.wrssq(i64, ptr)
+
+define void @test_wrussd(i32 %a, ptr %__p) {
+; CHECK-LABEL: test_wrussd:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    wrussd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7d,0x08,0x65,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+entry:
+  tail call void @llvm.x86.wrussd(i32 %a, ptr %__p)
+  ret void
+}
+
+declare void @llvm.x86.wrussd(i32, ptr)
+
+define void @test_wrussq(i64 %a, ptr %__p) {
+; CHECK-LABEL: test_wrussq:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    wrussq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfd,0x08,0x65,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+entry:
+  tail call void @llvm.x86.wrussq(i64 %a, ptr %__p)
+  ret void
+}
+
+declare void @llvm.x86.wrussq(i64, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
new file mode 100644
index 0000000000000..0b51679ccd7fb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32,+egpr --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s
+
+define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %trunc = trunc i32 %a1 to i8
+  %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
+
+define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %trunc = trunc i32 %a1 to i16
+  %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
+
+define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone
+
+define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
+; CHECK-LABEL: test_mm_crc64_u8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %trunc = trunc i32 %a1 to i8
+  %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone
+
+define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
+; CHECK-LABEL: test_mm_crc64_u64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/apx/crc32.ll b/llvm/test/CodeGen/X86/apx/crc32.ll
new file mode 100644
index 0000000000000..4bcc4d15cc6b5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/crc32.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s
+
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+; CHECK-LABEL: crc32_32_8:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+; CHECK-LABEL: crc32_32_16:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+}
+
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: crc32_32_32:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
+; CHECK-LABEL: crc32_64_8:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
+  ret i64 %tmp
+}
+
+define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: crc32_64_64:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
diff --git a/llvm/test/CodeGen/X86/apx/invpcid.ll b/llvm/test/CodeGen/X86/apx/invpcid.ll
new file mode 100644
index 0000000000000..389895f492130
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/invpcid.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_invpcid(i32 %type, ptr %descriptor) {
+; CHECK-LABEL: test_invpcid:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  call void @llvm.x86.invpcid(i32 %type, ptr %descriptor)
+  ret void
+}
+
+define void @test_invpcid2(ptr readonly %type, ptr %descriptor) {
+; CHECK-LABEL: test_invpcid2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; CHECK-NEXT:    invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load i32, ptr %type, align 4
+  tail call void @llvm.x86.invpcid(i32 %0, ptr %descriptor) #1
+  ret void
+}
+
+declare void @llvm.x86.invpcid(i32, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/movdir.ll b/llvm/test/CodeGen/X86/apx/movdir.ll
new file mode 100644
index 0000000000000..06fd7511bc143
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/movdir.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri,+movdir64b,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_movdiri(ptr %p, i32 %v) {
+; CHECK-LABEL: test_movdiri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  call void @llvm.x86.directstore32(ptr %p, i32 %v)
+  ret void
+}
+
+declare void @llvm.x86.directstore32(ptr, i32)
+
+define void @test_movdiri_64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_movdiri_64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  call void @llvm.x86.directstore64(ptr %p, i64 %v)
+  ret void
+}
+
+declare void @llvm.x86.directstore64(ptr, i64)
+
+define void @test_movdir64b(ptr %dst, ptr %src) {
+; CHECK-LABEL: test_movdir64b:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  call void @llvm.x86.movdir64b(ptr %dst, ptr %src)
+  ret void
+}
+
+declare void @llvm.x86.movdir64b(ptr, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/sha.ll b/llvm/test/CodeGen/X86/apx/sha.ll
new file mode 100644
index 0000000000000..088ee61a97f4e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/sha.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mattr=+sha,+egpr -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s
+
+declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1rnds4rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1rnds4rm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1nexterr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1nexterm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg1rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg1rm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg2rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg2rm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
+;
+; CHECK-LABEL: test_sha256rnds2rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
+; CHECK-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
+; CHECK-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9]
+; CHECK-NEXT:    movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwind uwtable {
+;
+; CHECK-LABEL: test_sha256rnds2rm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0]
+; CHECK-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
+; CHECK-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17]
+; CHECK-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha256msg1rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha256msg1rm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha256msg2rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha256msg2rm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+}
+
+; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM.
+define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwtable {
+;
+; CHECK-LABEL: test_sha1rnds4_zero_extend:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; CHECK-NEXT:    xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
+  %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %2
+}

From b6d60b595e3784df18042a858206f28f7e5a1082 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang@intel.com>
Date: Tue, 2 Jan 2024 23:21:46 -0800
Subject: [PATCH 2/7] fix error

---
 llvm/lib/Target/X86/X86FastISel.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 3658af785c24e..8b4ff4c8ed878 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3047,19 +3047,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     default:
       llvm_unreachable("Unexpected intrinsic.");
     case Intrinsic::x86_sse42_crc32_32_8:
-      Opc = Subtarget->hasCRC32() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8;
+      Opc = Subtarget->hasEGPR() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8;
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_32_16:
-      Opc = Subtarget->hasCRC32() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16;
+      Opc = Subtarget->hasEGPR() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16;
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_32_32:
-      Opc = Subtarget->hasCRC32() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32;
+      Opc = Subtarget->hasEGPR() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32;
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_64_64:
-      Opc = Subtarget->hasCRC32() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64;
+      Opc = Subtarget->hasEGPR() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64;
       RC = &X86::GR64RegClass;
       break;
     }

From e464f561838fdebddc04fbda1208cea7897b70a0 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang@intel.com>
Date: Wed, 3 Jan 2024 01:59:28 -0800
Subject: [PATCH 3/7] use macro to avoid messy

---
 llvm/lib/Target/X86/X86FastISel.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 8b4ff4c8ed878..084c40b90049b 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3046,22 +3046,24 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     switch (II->getIntrinsicID()) {
     default:
       llvm_unreachable("Unexpected intrinsic.");
+#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC;
     case Intrinsic::x86_sse42_crc32_32_8:
-      Opc = Subtarget->hasEGPR() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8;
+      Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8);
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_32_16:
-      Opc = Subtarget->hasEGPR() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16;
+      Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r16);
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_32_32:
-      Opc = Subtarget->hasEGPR() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32;
+      Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r32);
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_64_64:
-      Opc = Subtarget->hasEGPR() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64;
+      Opc = GET_EGPR_IF_ENABLED(X86::CRC32r64r64);
       RC = &X86::GR64RegClass;
       break;
+#undef GET_EGPR_IF_ENABLED
     }
 
     const Value *LHS = II->getArgOperand(0);

From 9a93bdf9c4567638a6c6c38421b9611656f2d248 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang@intel.com>
Date: Wed, 3 Jan 2024 18:59:22 -0800
Subject: [PATCH 4/7] resolve comments

---
 llvm/lib/Target/X86/X86DomainReassignment.cpp |  47 +--
 llvm/lib/Target/X86/X86FastISel.cpp           |   2 +-
 llvm/lib/Target/X86/X86InstrSystem.td         |   8 +-
 llvm/test/CodeGen/X86/apx/cet.ll              |  50 ----
 llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll  |  61 ----
 llvm/test/CodeGen/X86/apx/crc32.ll            |  58 ----
 llvm/test/CodeGen/X86/apx/invpcid.ll          |  27 --
 llvm/test/CodeGen/X86/apx/movdir.ll           |  38 ---
 llvm/test/CodeGen/X86/apx/sha.ll              | 186 ------------
 .../X86/crc32-intrinsics-fast-isel-x86.ll     |  41 ++-
 llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll |  39 ++-
 .../CodeGen/X86/crc32-intrinsics-x86_64.ll    |  13 +
 llvm/test/CodeGen/X86/invpcid-intrinsic.ll    |  27 +-
 llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll |  33 ++-
 .../CodeGen/X86/movdir-intrinsic-x86_64.ll    |  12 +-
 llvm/test/CodeGen/X86/sha.ll                  | 269 ++++++++++++++----
 llvm/test/CodeGen/X86/x64-cet-intrinsics.ll   | 120 ++++++--
 17 files changed, 451 insertions(+), 580 deletions(-)
 delete mode 100644 llvm/test/CodeGen/X86/apx/cet.ll
 delete mode 100644 llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
 delete mode 100644 llvm/test/CodeGen/X86/apx/crc32.ll
 delete mode 100644 llvm/test/CodeGen/X86/apx/invpcid.ll
 delete mode 100644 llvm/test/CodeGen/X86/apx/movdir.ll
 delete mode 100644 llvm/test/CodeGen/X86/apx/sha.ll

diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index bdd86e48fa543..7e6b945e5d7a2 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -619,40 +619,40 @@ void X86DomainReassignment::initConverters() {
         std::make_unique<InstrReplacerDstCOPY>(From, To);
   };
 
-  bool HasEGPR = STI->hasEGPR();
+#define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC
   createReplacerDstCOPY(X86::MOVZX32rm16,
-                        HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
+                        GET_EGPR_IF_ENABLED(X86::KMOVWkm));
   createReplacerDstCOPY(X86::MOVZX64rm16,
-                        HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
+                        GET_EGPR_IF_ENABLED(X86::KMOVWkm));
 
   createReplacerDstCOPY(X86::MOVZX32rr16,
-                        HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
+                        GET_EGPR_IF_ENABLED(X86::KMOVWkk));
   createReplacerDstCOPY(X86::MOVZX64rr16,
-                        HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
+                        GET_EGPR_IF_ENABLED(X86::KMOVWkk));
 
   if (STI->hasDQI()) {
     createReplacerDstCOPY(X86::MOVZX16rm8,
-                          HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+                          GET_EGPR_IF_ENABLED(X86::KMOVBkm));
     createReplacerDstCOPY(X86::MOVZX32rm8,
-                          HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+                          GET_EGPR_IF_ENABLED(X86::KMOVBkm));
     createReplacerDstCOPY(X86::MOVZX64rm8,
-                          HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
+                          GET_EGPR_IF_ENABLED(X86::KMOVBkm));
 
     createReplacerDstCOPY(X86::MOVZX16rr8,
-                          HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
+                          GET_EGPR_IF_ENABLED(X86::KMOVBkk));
     createReplacerDstCOPY(X86::MOVZX32rr8,
-                          HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
+                          GET_EGPR_IF_ENABLED(X86::KMOVBkk));
     createReplacerDstCOPY(X86::MOVZX64rr8,
-                          HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
+                          GET_EGPR_IF_ENABLED(X86::KMOVBkk));
   }
 
   auto createReplacer = [&](unsigned From, unsigned To) {
     Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
   };
 
-  createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
-  createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
-  createReplacer(X86::MOV16rr, HasEGPR ? X86::KMOVWkk_EVEX : X86::KMOVWkk);
+  createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
+  createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk));
+  createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
   createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
   createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
   createReplacer(X86::NOT16r, X86::KNOTWrr);
@@ -661,14 +661,14 @@ void X86DomainReassignment::initConverters() {
   createReplacer(X86::XOR16rr, X86::KXORWrr);
 
   if (STI->hasBWI()) {
-    createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
-    createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
+    createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm));
+    createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm));
 
-    createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
-    createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
+    createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk));
+    createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk));
 
-    createReplacer(X86::MOV32rr, HasEGPR ? X86::KMOVDkk_EVEX : X86::KMOVDkk);
-    createReplacer(X86::MOV64rr, HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk);
+    createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk));
+    createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk));
 
     createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
     createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
@@ -706,9 +706,9 @@ void X86DomainReassignment::initConverters() {
 
     createReplacer(X86::AND8rr, X86::KANDBrr);
 
-    createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
-    createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
-    createReplacer(X86::MOV8rr, HasEGPR ? X86::KMOVBkk_EVEX : X86::KMOVBkk);
+    createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
+    createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk));
+    createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
 
     createReplacer(X86::NOT8r, X86::KNOTBrr);
 
@@ -724,6 +724,7 @@ void X86DomainReassignment::initConverters() {
 
     createReplacer(X86::XOR8rr, X86::KXORBrr);
   }
+#undef GET_EGPR_IF_ENABLED
 }
 
 bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 084c40b90049b..efbc0e119060f 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3046,7 +3046,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     switch (II->getIntrinsicID()) {
     default:
       llvm_unreachable("Unexpected intrinsic.");
-#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC;
+#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
     case Intrinsic::x86_sse42_crc32_32_8:
       Opc = GET_EGPR_IF_ENABLED(X86::CRC32r32r8);
       RC = &X86::GR32RegClass;
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index 30530a00809f3..b1be4739617df 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
                   Requires<[Not64BitMode, HasINVPCID]>;
 def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                   "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
-                  Requires<[In64BitMode, HasINVPCID, NoEGPR]>;
+                  Requires<[In64BitMode]>;
 
 def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                        "invpcid\t{$src2, $src1|$src1, $src2}", []>,
-                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID, HasEGPR]>;
+                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
 } // SchedRW
 
-let Predicates = [In64BitMode, HasINVPCID, NoEGPR] in {
+let Predicates = [HasINVPCID, NoEGPR] in {
   // The instruction can only use a 64 bit register as the register argument
   // in 64 bit mode, while the intrinsic only accepts a 32 bit argument
   // corresponding to it.
@@ -714,7 +714,7 @@ let Predicates = [In64BitMode, HasINVPCID, NoEGPR] in {
               addr:$src2)>;
 }
 
-let Predicates = [In64BitMode, HasINVPCID, HasEGPR] in {
+let Predicates = [HasINVPCID, HasEGPR] in {
   def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
             (INVPCID64_EVEX
               (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
diff --git a/llvm/test/CodeGen/X86/apx/cet.ll b/llvm/test/CodeGen/X86/apx/cet.ll
deleted file mode 100644
index 98f3844d1ccd1..0000000000000
--- a/llvm/test/CodeGen/X86/apx/cet.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk,+egpr --show-mc-encoding | FileCheck %s
-
-define void @test_wrssd(i32 %a, ptr %__p) {
-; CHECK-LABEL: test_wrssd:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    wrssd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7c,0x08,0x66,0x3e]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-entry:
-  tail call void @llvm.x86.wrssd(i32 %a, ptr %__p)
-  ret void
-}
-
-declare void @llvm.x86.wrssd(i32, ptr)
-
-define void @test_wrssq(i64 %a, ptr %__p) {
-; CHECK-LABEL: test_wrssq:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    wrssq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfc,0x08,0x66,0x3e]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-entry:
-  tail call void @llvm.x86.wrssq(i64 %a, ptr %__p)
-  ret void
-}
-
-declare void @llvm.x86.wrssq(i64, ptr)
-
-define void @test_wrussd(i32 %a, ptr %__p) {
-; CHECK-LABEL: test_wrussd:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    wrussd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7d,0x08,0x65,0x3e]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-entry:
-  tail call void @llvm.x86.wrussd(i32 %a, ptr %__p)
-  ret void
-}
-
-declare void @llvm.x86.wrussd(i32, ptr)
-
-define void @test_wrussq(i64 %a, ptr %__p) {
-; CHECK-LABEL: test_wrussq:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    wrussq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfd,0x08,0x65,0x3e]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-entry:
-  tail call void @llvm.x86.wrussq(i64 %a, ptr %__p)
-  ret void
-}
-
-declare void @llvm.x86.wrussq(i64, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
deleted file mode 100644
index 0b51679ccd7fb..0000000000000
--- a/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32,+egpr --show-mc-encoding | FileCheck %s
-; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s
-
-define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
-; CHECK-LABEL: test_mm_crc32_u8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; CHECK-NEXT:    crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-  %trunc = trunc i32 %a1 to i8
-  %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
-  ret i32 %res
-}
-declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
-
-define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
-; CHECK-LABEL: test_mm_crc32_u16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; CHECK-NEXT:    crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-  %trunc = trunc i32 %a1 to i16
-  %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
-  ret i32 %res
-}
-declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
-
-define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
-; CHECK-LABEL: test_mm_crc32_u32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; CHECK-NEXT:    crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-  %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
-  ret i32 %res
-}
-declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone
-
-define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
-; CHECK-LABEL: test_mm_crc64_u8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
-; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-  %trunc = trunc i32 %a1 to i8
-  %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
-  ret i64 %res
-}
-declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone
-
-define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
-; CHECK-LABEL: test_mm_crc64_u64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
-; CHECK-NEXT:    crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-  %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
-  ret i64 %res
-}
-declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/apx/crc32.ll b/llvm/test/CodeGen/X86/apx/crc32.ll
deleted file mode 100644
index 4bcc4d15cc6b5..0000000000000
--- a/llvm/test/CodeGen/X86/apx/crc32.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s
-
-define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
-; CHECK-LABEL: crc32_32_8:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
-; CHECK-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
-  ret i32 %tmp
-}
-
-define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
-; CHECK-LABEL: crc32_32_16:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
-; CHECK-NEXT:    crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
-  ret i32 %tmp
-}
-
-define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
-; CHECK-LABEL: crc32_32_32:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
-; CHECK-NEXT:    crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
-  ret i32 %tmp
-}
-
-define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
-; CHECK-LABEL: crc32_64_8:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
-; CHECK-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
-  ret i64 %tmp
-}
-
-define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
-; CHECK-LABEL: crc32_64_64:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
-; CHECK-NEXT:    crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
-  ret i64 %tmp
-}
-
-declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
-declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
-declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
-declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
-declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
diff --git a/llvm/test/CodeGen/X86/apx/invpcid.ll b/llvm/test/CodeGen/X86/apx/invpcid.ll
deleted file mode 100644
index 389895f492130..0000000000000
--- a/llvm/test/CodeGen/X86/apx/invpcid.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid,+egpr --show-mc-encoding | FileCheck %s
-
-define void @test_invpcid(i32 %type, ptr %descriptor) {
-; CHECK-LABEL: test_invpcid:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; CHECK-NEXT:    invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  call void @llvm.x86.invpcid(i32 %type, ptr %descriptor)
-  ret void
-}
-
-define void @test_invpcid2(ptr readonly %type, ptr %descriptor) {
-; CHECK-LABEL: test_invpcid2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
-; CHECK-NEXT:    invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = load i32, ptr %type, align 4
-  tail call void @llvm.x86.invpcid(i32 %0, ptr %descriptor) #1
-  ret void
-}
-
-declare void @llvm.x86.invpcid(i32, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/movdir.ll b/llvm/test/CodeGen/X86/apx/movdir.ll
deleted file mode 100644
index 06fd7511bc143..0000000000000
--- a/llvm/test/CodeGen/X86/apx/movdir.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri,+movdir64b,+egpr --show-mc-encoding | FileCheck %s
-
-define void @test_movdiri(ptr %p, i32 %v) {
-; CHECK-LABEL: test_movdiri:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  call void @llvm.x86.directstore32(ptr %p, i32 %v)
-  ret void
-}
-
-declare void @llvm.x86.directstore32(ptr, i32)
-
-define void @test_movdiri_64(ptr %p, i64 %v) {
-; CHECK-LABEL: test_movdiri_64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  call void @llvm.x86.directstore64(ptr %p, i64 %v)
-  ret void
-}
-
-declare void @llvm.x86.directstore64(ptr, i64)
-
-define void @test_movdir64b(ptr %dst, ptr %src) {
-; CHECK-LABEL: test_movdir64b:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  call void @llvm.x86.movdir64b(ptr %dst, ptr %src)
-  ret void
-}
-
-declare void @llvm.x86.movdir64b(ptr, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/sha.ll b/llvm/test/CodeGen/X86/apx/sha.ll
deleted file mode 100644
index 088ee61a97f4e..0000000000000
--- a/llvm/test/CodeGen/X86/apx/sha.ll
+++ /dev/null
@@ -1,186 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc < %s -mattr=+sha,+egpr -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s
-
-declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
-
-define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1rnds4rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
-  ret <4 x i32> %0
-}
-
-define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1rnds4rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = load <4 x i32>, ptr %b
-  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
-  ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1nexterr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i32> %0
-}
-
-define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1nexterm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = load <4 x i32>, ptr %b
-  %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
-  ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1msg1rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i32> %0
-}
-
-define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1msg1rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = load <4 x i32>, ptr %b
-  %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
-  ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1msg2rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i32> %0
-}
-
-define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1msg2rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = load <4 x i32>, ptr %b
-  %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
-  ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
-;
-; CHECK-LABEL: test_sha256rnds2rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
-; CHECK-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
-; CHECK-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9]
-; CHECK-NEXT:    movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
-  ret <4 x i32> %0
-}
-
-define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwind uwtable {
-;
-; CHECK-LABEL: test_sha256rnds2rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0]
-; CHECK-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
-; CHECK-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17]
-; CHECK-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = load <4 x i32>, ptr %b
-  %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
-  ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha256msg1rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i32> %0
-}
-
-define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha256msg1rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = load <4 x i32>, ptr %b
-  %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
-  ret <4 x i32> %1
-}
-
-declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha256msg2rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i32> %0
-}
-
-define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha256msg2rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = load <4 x i32>, ptr %b
-  %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
-  ret <4 x i32> %1
-}
-
-; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM.
-define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwtable {
-;
-; CHECK-LABEL: test_sha1rnds4_zero_extend:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
-; CHECK-NEXT:    xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9]
-; CHECK-NEXT:    retq # encoding: [0xc3]
-entry:
-  %0 = load <4 x i32>, ptr %b
-  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
-  %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i32> %2
-}
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
index 056d79f379fd1..cb93e7a4f4362 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
@@ -3,8 +3,9 @@
 ; RUN: llc < %s -fast-isel -pass-remarks-missed=sdagisel -mtriple=i686-unknown-unknown -mattr=+crc32 2>&1 >/dev/null | FileCheck %s -check-prefix=STDERR-X86 -allow-empty
 ; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X86
 ; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86
-; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=EGPR
 
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
 
@@ -21,9 +22,15 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
 ;
 ; X64-LABEL: test_mm_crc32_u8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    crc32b %sil, %eax
-; X64-NEXT:    retq
+; X64-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; X64-NEXT:    crc32b %sil, %eax # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_mm_crc32_u8:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT:    crc32b %sil, %eax # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i8
   %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
   ret i32 %res
@@ -41,9 +48,15 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
 ;
 ; X64-LABEL: test_mm_crc32_u16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    crc32w %si, %eax
-; X64-NEXT:    retq
+; X64-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; X64-NEXT:    crc32w %si, %eax # encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_mm_crc32_u16:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT:    crc32w %si, %eax # encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i16
   %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
   ret i32 %res
@@ -59,9 +72,15 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
 ;
 ; X64-LABEL: test_mm_crc32_u32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    crc32l %esi, %eax
-; X64-NEXT:    retq
+; X64-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; X64-NEXT:    crc32l %esi, %eax # encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_mm_crc32_u32:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT:    crc32l %esi, %eax # encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
   ret i32 %res
 }
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
index 7623ba68353e1..96258ea9bcbb7 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
@@ -1,19 +1,26 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 | FileCheck %s --check-prefixes=X86
 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR
 
 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
 ; X86-LABEL: crc32_32_8:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08]
-; X86-NEXT:    retl ## encoding: [0xc3]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    crc32b {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: crc32_32_8:
 ; X64:       ## %bb.0:
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
 ; X64-NEXT:    crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
 ; X64-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: crc32_32_8:
+; EGPR:       ## %bb.0:
+; EGPR-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; EGPR-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
   ret i32 %tmp
 }
@@ -22,15 +29,21 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
 ; X86-LABEL: crc32_32_16:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
-; X86-NEXT:    retl ## encoding: [0xc3]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    crc32w {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: crc32_32_16:
 ; X64:       ## %bb.0:
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
 ; X64-NEXT:    crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
 ; X64-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: crc32_32_16:
+; EGPR:       ## %bb.0:
+; EGPR-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; EGPR-NEXT:    crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
   ret i32 %tmp
 }
@@ -39,15 +52,21 @@ declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
 ; X86-LABEL: crc32_32_32:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
-; X86-NEXT:    retl ## encoding: [0xc3]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: crc32_32_32:
 ; X64:       ## %bb.0:
 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
 ; X64-NEXT:    crc32l %esi, %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
 ; X64-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: crc32_32_32:
+; EGPR:       ## %bb.0:
+; EGPR-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; EGPR-NEXT:    crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
   ret i32 %tmp
 }
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
index b0f7a394f07b7..bda26a15b277a 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86_64.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR
 
 declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
 declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
@@ -10,6 +11,12 @@ define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
 ; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
 ; CHECK-NEXT:    crc32b %sil, %eax ## encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: crc32_64_8:
+; EGPR:       ## %bb.0:
+; EGPR-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
   ret i64 %tmp
 }
@@ -20,6 +27,12 @@ define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
 ; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
 ; CHECK-NEXT:    crc32q %rsi, %rax ## encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: crc32_64_64:
+; EGPR:       ## %bb.0:
+; EGPR-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT:    crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
   %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
   ret i64 %tmp
 }
diff --git a/llvm/test/CodeGen/X86/invpcid-intrinsic.ll b/llvm/test/CodeGen/X86/invpcid-intrinsic.ll
index 3aa9fde35e23f..19a6249fc708f 100644
--- a/llvm/test/CodeGen/X86/invpcid-intrinsic.ll
+++ b/llvm/test/CodeGen/X86/invpcid-intrinsic.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+invpcid | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid | FileCheck %s --check-prefix=X86_64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid --show-mc-encoding | FileCheck %s --check-prefix=X86_64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
 
 define void @test_invpcid(i32 %type, ptr %descriptor) {
 ; X86-LABEL: test_invpcid:
@@ -12,9 +13,15 @@ define void @test_invpcid(i32 %type, ptr %descriptor) {
 ;
 ; X86_64-LABEL: test_invpcid:
 ; X86_64:       # %bb.0: # %entry
-; X86_64-NEXT:    movl %edi, %eax
-; X86_64-NEXT:    invpcid (%rsi), %rax
-; X86_64-NEXT:    retq
+; X86_64-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; X86_64-NEXT:    invpcid (%rsi), %rax # encoding: [0x66,0x0f,0x38,0x82,0x06]
+; X86_64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_invpcid:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT:    invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   call void @llvm.x86.invpcid(i32 %type, ptr %descriptor)
   ret void
@@ -31,9 +38,15 @@ define void @test_invpcid2(ptr readonly %type, ptr %descriptor) {
 ;
 ; X86_64-LABEL: test_invpcid2:
 ; X86_64:       # %bb.0: # %entry
-; X86_64-NEXT:    movl (%rdi), %eax
-; X86_64-NEXT:    invpcid (%rsi), %rax
-; X86_64-NEXT:    retq
+; X86_64-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; X86_64-NEXT:    invpcid (%rsi), %rax # encoding: [0x66,0x0f,0x38,0x82,0x06]
+; X86_64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_invpcid2:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; EGPR-NEXT:    invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load i32, ptr %type, align 4
   tail call void @llvm.x86.invpcid(i32 %0, ptr %descriptor) #1
diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll
index 1769bcbf6f605..4d03510ad5d4f 100644
--- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll
+++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86.ll
@@ -1,19 +1,25 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri -mattr=+movdir64b | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+movdiri -mattr=+movdir64b | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri -mattr=+movdir64b --show-mc-encoding | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri -mattr=+movdir64b,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
 
 define void @test_movdiri(ptr %p, i32 %v) {
-; X64-LABEL: test_movdiri:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdiri %esi, (%rdi)
-; X64-NEXT:    retq
-;
 ; X32-LABEL: test_movdiri:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movdiri %eax, (%ecx)
 ; X32-NEXT:    retl
+;
+; X64-LABEL: test_movdiri:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movdiri %esi, (%rdi) # encoding: [0x0f,0x38,0xf9,0x37]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_movdiri:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   call void @llvm.x86.directstore32(ptr %p, i32 %v)
   ret void
@@ -22,17 +28,22 @@ entry:
 declare void @llvm.x86.directstore32(ptr, i32)
 
 define void @test_movdir64b(ptr %dst, ptr %src) {
-; X64-LABEL: test_movdir64b:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    movdir64b (%rsi), %rdi
-; X64-NEXT:    retq
-;
 ; X32-LABEL: test_movdir64b:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    movdir64b (%eax), %ecx
 ; X32-NEXT:    retl
+;
+; X64-LABEL: test_movdir64b:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movdir64b (%rsi), %rdi # encoding: [0x66,0x0f,0x38,0xf8,0x3e]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_movdir64b:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   call void @llvm.x86.movdir64b(ptr %dst, ptr %src)
   ret void
diff --git a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll
index b20d7df26515d..ddd44f6d73d59 100644
--- a/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll
+++ b/llvm/test/CodeGen/X86/movdir-intrinsic-x86_64.ll
@@ -1,11 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR
 
 define void @test_movdiri(ptr %p, i64 %v) {
 ; CHECK-LABEL: test_movdiri:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movdiri %rsi, (%rdi)
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    movdiri %rsi, (%rdi) # encoding: [0x48,0x0f,0x38,0xf9,0x37]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_movdiri:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   call void @llvm.x86.directstore64(ptr %p, i64 %v)
   ret void
diff --git a/llvm/test/CodeGen/X86/sha.ll b/llvm/test/CodeGen/X86/sha.ll
index d5427556dc0bb..d8fa354a39135 100644
--- a/llvm/test/CodeGen/X86/sha.ll
+++ b/llvm/test/CodeGen/X86/sha.ll
@@ -1,24 +1,45 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
-; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: llc < %s -mattr=+sha,+egpr -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=EGPR
 
 declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
 
 define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1rnds4rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1rnds4 $3, %xmm1, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha1rnds4rr:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha1rnds4 $3, %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha1rnds4rr:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x0f,0x3a,0xcc,0xc1,0x03]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha1rnds4rr:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
   ret <4 x i32> %0
 }
 
 define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1rnds4rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha1rnds4rm:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha1rnds4rm:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha1rnds4rm:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
   %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
@@ -28,20 +49,40 @@ entry:
 declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1nexterr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1nexte %xmm1, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha1nexterr:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha1nexte %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha1nexterr:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha1nexte %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc8,0xc1]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha1nexterr:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i32> %0
 }
 
 define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1nexterm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1nexte (%rdi), %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha1nexterm:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha1nexte (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha1nexterm:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha1nexte (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc8,0x07]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha1nexterm:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
   %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
@@ -51,20 +92,40 @@ entry:
 declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1msg1rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1msg1 %xmm1, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha1msg1rr:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha1msg1 %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha1msg1rr:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha1msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc9,0xc1]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha1msg1rr:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i32> %0
 }
 
 define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1msg1rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1msg1 (%rdi), %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha1msg1rm:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha1msg1 (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha1msg1rm:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha1msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc9,0x07]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha1msg1rm:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
   %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
@@ -74,20 +135,40 @@ entry:
 declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1msg2rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1msg2 %xmm1, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha1msg2rr:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha1msg2 %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha1msg2rr:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha1msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xca,0xc1]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha1msg2rr:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i32> %0
 }
 
 define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha1msg2rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha1msg2 (%rdi), %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha1msg2rm:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha1msg2 (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha1msg2rm:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha1msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xca,0x07]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha1msg2rm:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
   %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
@@ -107,11 +188,19 @@ define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) n
 ;
 ; AVX-LABEL: test_sha256rnds2rr:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovaps %xmm0, %xmm3
-; AVX-NEXT:    vmovaps %xmm2, %xmm0
-; AVX-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3
-; AVX-NEXT:    vmovaps %xmm3, %xmm0
-; AVX-NEXT:    retq
+; AVX-NEXT:    vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
+; AVX-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
+; AVX-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x0f,0x38,0xcb,0xd9]
+; AVX-NEXT:    vmovaps %xmm3, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc3]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha256rnds2rr:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
+; EGPR-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
+; EGPR-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9]
+; EGPR-NEXT:    movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
   ret <4 x i32> %0
@@ -128,11 +217,19 @@ define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwin
 ;
 ; AVX-LABEL: test_sha256rnds2rm:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    vmovaps %xmm0, %xmm2
-; AVX-NEXT:    vmovaps %xmm1, %xmm0
-; AVX-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2
-; AVX-NEXT:    vmovaps %xmm2, %xmm0
-; AVX-NEXT:    retq
+; AVX-NEXT:    vmovaps %xmm0, %xmm2 # encoding: [0xc5,0xf8,0x28,0xd0]
+; AVX-NEXT:    vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
+; AVX-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x0f,0x38,0xcb,0x17]
+; AVX-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha256rnds2rm:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0]
+; EGPR-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
+; EGPR-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17]
+; EGPR-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
   %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
@@ -142,20 +239,40 @@ entry:
 declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha256msg1rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha256msg1 %xmm1, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha256msg1rr:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha256msg1 %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha256msg1rr:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha256msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcc,0xc1]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha256msg1rr:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i32> %0
 }
 
 define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha256msg1rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha256msg1 (%rdi), %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha256msg1rm:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha256msg1 (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha256msg1rm:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha256msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcc,0x07]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha256msg1rm:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
   %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
@@ -165,20 +282,40 @@ entry:
 declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
-; CHECK-LABEL: test_sha256msg2rr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha256msg2 %xmm1, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha256msg2rr:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha256msg2 %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha256msg2rr:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha256msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcd,0xc1]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha256msg2rr:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i32> %0
 }
 
 define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
-; CHECK-LABEL: test_sha256msg2rm:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sha256msg2 (%rdi), %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: test_sha256msg2rm:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    sha256msg2 (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sha256msg2rm:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    sha256msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcd,0x07]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha256msg2rm:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
   %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
@@ -195,12 +332,20 @@ define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwta
 ;
 ; AVX-LABEL: test_sha1rnds4_zero_extend:
 ; AVX:       # %bb.0: # %entry
-; AVX-NEXT:    sha1rnds4 $3, (%rdi), %xmm0
-; AVX-NEXT:    vmovaps %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03]
+; AVX-NEXT:    vmovaps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc0]
+; AVX-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_sha1rnds4_zero_extend:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; EGPR-NEXT:    xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <4 x i32>, ptr %b
   %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
   %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i32> %2
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll b/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll
index 4c28c8ab43699..bf87ae5cac05a 100644
--- a/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/x64-cet-intrinsics.ll
@@ -1,11 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR
 
 define void @test_incsspd(i32 %a) local_unnamed_addr {
 ; CHECK-LABEL: test_incsspd:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    incsspd %edi
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    incsspd %edi ## encoding: [0xf3,0x0f,0xae,0xef]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_incsspd:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    incsspd %edi ## encoding: [0xf3,0x0f,0xae,0xef]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   tail call void @llvm.x86.incsspd(i32 %a)
   ret void
@@ -16,9 +22,15 @@ declare void @llvm.x86.incsspd(i32)
 define void @test_incsspq(i32 %a) local_unnamed_addr {
 ; CHECK-LABEL: test_incsspq:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    incsspq %rax
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    movslq %edi, %rax ## encoding: [0x48,0x63,0xc7]
+; CHECK-NEXT:    incsspq %rax ## encoding: [0xf3,0x48,0x0f,0xae,0xe8]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_incsspq:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    movslq %edi, %rax ## encoding: [0x48,0x63,0xc7]
+; EGPR-NEXT:    incsspq %rax ## encoding: [0xf3,0x48,0x0f,0xae,0xe8]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   %conv.i = sext i32 %a to i64
   tail call void @llvm.x86.incsspq(i64 %conv.i)
@@ -30,9 +42,15 @@ declare void @llvm.x86.incsspq(i64)
 define i32 @test_rdsspd(i32 %a) {
 ; CHECK-LABEL: test_rdsspd:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    rdsspd %eax
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT:    rdsspd %eax ## encoding: [0xf3,0x0f,0x1e,0xc8]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_rdsspd:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; EGPR-NEXT:    rdsspd %eax ## encoding: [0xf3,0x0f,0x1e,0xc8]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   %0 = call i32 @llvm.x86.rdsspd(i32 %a)
   ret i32 %0
@@ -43,9 +61,15 @@ declare i32 @llvm.x86.rdsspd(i32)
 define i64 @test_rdsspq(i64 %a) {
 ; CHECK-LABEL: test_rdsspq:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    rdsspq %rax
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    rdsspq %rax ## encoding: [0xf3,0x48,0x0f,0x1e,0xc8]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_rdsspq:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT:    rdsspq %rax ## encoding: [0xf3,0x48,0x0f,0x1e,0xc8]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   %0 = call i64 @llvm.x86.rdsspq(i64 %a)
   ret i64 %0
@@ -56,8 +80,13 @@ declare i64 @llvm.x86.rdsspq(i64)
 define void @test_saveprevssp() {
 ; CHECK-LABEL: test_saveprevssp:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    saveprevssp
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    saveprevssp ## encoding: [0xf3,0x0f,0x01,0xea]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_saveprevssp:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    saveprevssp ## encoding: [0xf3,0x0f,0x01,0xea]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   tail call void @llvm.x86.saveprevssp()
   ret void
@@ -68,8 +97,13 @@ declare void @llvm.x86.saveprevssp()
 define void @test_rstorssp(ptr %__p) {
 ; CHECK-LABEL: test_rstorssp:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    rstorssp (%rdi)
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    rstorssp (%rdi) ## encoding: [0xf3,0x0f,0x01,0x2f]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_rstorssp:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    rstorssp (%rdi) ## encoding: [0xf3,0x0f,0x01,0x2f]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   tail call void @llvm.x86.rstorssp(ptr %__p)
   ret void
@@ -80,8 +114,13 @@ declare void @llvm.x86.rstorssp(ptr)
 define void @test_wrssd(i32 %a, ptr %__p) {
 ; CHECK-LABEL: test_wrssd:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    wrssd %edi, (%rsi)
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    wrssd %edi, (%rsi) ## encoding: [0x0f,0x38,0xf6,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_wrssd:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    wrssd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7c,0x08,0x66,0x3e]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   tail call void @llvm.x86.wrssd(i32 %a, ptr %__p)
   ret void
@@ -92,8 +131,13 @@ declare void @llvm.x86.wrssd(i32, ptr)
 define void @test_wrssq(i64 %a, ptr %__p) {
 ; CHECK-LABEL: test_wrssq:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    wrssq %rdi, (%rsi)
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    wrssq %rdi, (%rsi) ## encoding: [0x48,0x0f,0x38,0xf6,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_wrssq:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    wrssq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfc,0x08,0x66,0x3e]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   tail call void @llvm.x86.wrssq(i64 %a, ptr %__p)
   ret void
@@ -104,8 +148,13 @@ declare void @llvm.x86.wrssq(i64, ptr)
 define void @test_wrussd(i32 %a, ptr %__p) {
 ; CHECK-LABEL: test_wrussd:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    wrussd %edi, (%rsi)
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    wrussd %edi, (%rsi) ## encoding: [0x66,0x0f,0x38,0xf5,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_wrussd:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    wrussd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7d,0x08,0x65,0x3e]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   tail call void @llvm.x86.wrussd(i32 %a, ptr %__p)
   ret void
@@ -116,8 +165,13 @@ declare void @llvm.x86.wrussd(i32, ptr)
 define void @test_wrussq(i64 %a, ptr %__p) {
 ; CHECK-LABEL: test_wrussq:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    wrussq %rdi, (%rsi)
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    wrussq %rdi, (%rsi) ## encoding: [0x66,0x48,0x0f,0x38,0xf5,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_wrussq:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    wrussq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfd,0x08,0x65,0x3e]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   tail call void @llvm.x86.wrussq(i64 %a, ptr %__p)
   ret void
@@ -128,8 +182,13 @@ declare void @llvm.x86.wrussq(i64, ptr)
 define void @test_setssbsy() {
 ; CHECK-LABEL: test_setssbsy:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    setssbsy
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    setssbsy ## encoding: [0xf3,0x0f,0x01,0xe8]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_setssbsy:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    setssbsy ## encoding: [0xf3,0x0f,0x01,0xe8]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   tail call void @llvm.x86.setssbsy()
   ret void
@@ -140,8 +199,13 @@ declare void @llvm.x86.setssbsy()
 define void @test_clrssbsy(ptr %__p) {
 ; CHECK-LABEL: test_clrssbsy:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    clrssbsy (%rdi)
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    clrssbsy (%rdi) ## encoding: [0xf3,0x0f,0xae,0x37]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+;
+; EGPR-LABEL: test_clrssbsy:
+; EGPR:       ## %bb.0: ## %entry
+; EGPR-NEXT:    clrssbsy (%rdi) ## encoding: [0xf3,0x0f,0xae,0x37]
+; EGPR-NEXT:    retq ## encoding: [0xc3]
 entry:
   tail call void @llvm.x86.clrssbsy(ptr %__p)
   ret void

From c7c63c61ce5f5abd7ba2628b837469bbe015a7fb Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang@intel.com>
Date: Wed, 3 Jan 2024 19:04:02 -0800
Subject: [PATCH 5/7] clang format

---
 llvm/lib/Target/X86/X86DomainReassignment.cpp | 40 +++++++------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index 7e6b945e5d7a2..20dbaf797e327 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -620,30 +620,20 @@ void X86DomainReassignment::initConverters() {
   };
 
 #define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC
-  createReplacerDstCOPY(X86::MOVZX32rm16,
-                        GET_EGPR_IF_ENABLED(X86::KMOVWkm));
-  createReplacerDstCOPY(X86::MOVZX64rm16,
-                        GET_EGPR_IF_ENABLED(X86::KMOVWkm));
+  createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
+  createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm));
 
-  createReplacerDstCOPY(X86::MOVZX32rr16,
-                        GET_EGPR_IF_ENABLED(X86::KMOVWkk));
-  createReplacerDstCOPY(X86::MOVZX64rr16,
-                        GET_EGPR_IF_ENABLED(X86::KMOVWkk));
+  createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
+  createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk));
 
   if (STI->hasDQI()) {
-    createReplacerDstCOPY(X86::MOVZX16rm8,
-                          GET_EGPR_IF_ENABLED(X86::KMOVBkm));
-    createReplacerDstCOPY(X86::MOVZX32rm8,
-                          GET_EGPR_IF_ENABLED(X86::KMOVBkm));
-    createReplacerDstCOPY(X86::MOVZX64rm8,
-                          GET_EGPR_IF_ENABLED(X86::KMOVBkm));
-
-    createReplacerDstCOPY(X86::MOVZX16rr8,
-                          GET_EGPR_IF_ENABLED(X86::KMOVBkk));
-    createReplacerDstCOPY(X86::MOVZX32rr8,
-                          GET_EGPR_IF_ENABLED(X86::KMOVBkk));
-    createReplacerDstCOPY(X86::MOVZX64rr8,
-                          GET_EGPR_IF_ENABLED(X86::KMOVBkk));
+    createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
+    createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
+    createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm));
+
+    createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
+    createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
+    createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk));
   }
 
   auto createReplacer = [&](unsigned From, unsigned To) {
@@ -696,8 +686,8 @@ void X86DomainReassignment::initConverters() {
 
     // TODO: KTEST is not a replacement for TEST due to flag differences. Need
     // to prove only Z flag is used.
-    //createReplacer(X86::TEST32rr, X86::KTESTDrr);
-    //createReplacer(X86::TEST64rr, X86::KTESTQrr);
+    // createReplacer(X86::TEST32rr, X86::KTESTDrr);
+    // createReplacer(X86::TEST64rr, X86::KTESTQrr);
   }
 
   if (STI->hasDQI()) {
@@ -719,8 +709,8 @@ void X86DomainReassignment::initConverters() {
 
     // TODO: KTEST is not a replacement for TEST due to flag differences. Need
     // to prove only Z flag is used.
-    //createReplacer(X86::TEST8rr, X86::KTESTBrr);
-    //createReplacer(X86::TEST16rr, X86::KTESTWrr);
+    // createReplacer(X86::TEST8rr, X86::KTESTBrr);
+    // createReplacer(X86::TEST16rr, X86::KTESTWrr);
 
     createReplacer(X86::XOR8rr, X86::KXORBrr);
   }

From 337494dbe2ee7ec3773f2f3716b1a15dba5cefd8 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang@intel.com>
Date: Thu, 4 Jan 2024 01:52:23 -0800
Subject: [PATCH 6/7] resolve comment

---
 llvm/lib/Target/X86/X86InstrVMX.td            |  8 ++---
 .../X86/crc32-intrinsics-fast-isel-x86.ll     |  8 ++---
 .../X86/crc32-intrinsics-fast-isel-x86_64.ll  | 29 ++++++++++++++-----
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrVMX.td b/llvm/lib/Target/X86/X86InstrVMX.td
index e672246789721..7cc468fe15ad4 100644
--- a/llvm/lib/Target/X86/X86InstrVMX.td
+++ b/llvm/lib/Target/X86/X86InstrVMX.td
@@ -21,10 +21,10 @@ def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
                Requires<[Not64BitMode]>;
 def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                "invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
-               Requires<[In64BitMode, NoEGPR]>;
+               Requires<[In64BitMode]>;
 def INVEPT64_EVEX : I<0xF0, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                       "invept\t{$src2, $src1|$src1, $src2}", []>,
-                    EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>;
+                    EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
 
 // 66 0F 38 81
 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
@@ -32,10 +32,10 @@ def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
                 Requires<[Not64BitMode]>;
 def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                 "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
-                Requires<[In64BitMode, NoEGPR]>;
+                Requires<[In64BitMode]>;
 def INVVPID64_EVEX : I<0xF1, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                        "invvpid\t{$src2, $src1|$src1, $src2}", []>,
-                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>;
+                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
 
 // 0F 01 C1
 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
index cb93e7a4f4362..873986e99777d 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
@@ -5,7 +5,7 @@
 ; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86
 ; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64
 ; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s -check-prefix=EGPR
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR
 
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
 
@@ -29,7 +29,7 @@ define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
 ; EGPR-LABEL: test_mm_crc32_u8:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32b %sil, %eax # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xc6]
+; EGPR-NEXT:    crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i8
   %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
@@ -55,7 +55,7 @@ define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
 ; EGPR-LABEL: test_mm_crc32_u16:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32w %si, %eax # encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
+; EGPR-NEXT:    crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i16
   %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
@@ -79,7 +79,7 @@ define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
 ; EGPR-LABEL: test_mm_crc32_u32:
 ; EGPR:       # %bb.0:
 ; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; EGPR-NEXT:    crc32l %esi, %eax # encoding: [0xf2,0x0f,0x38,0xf1,0xc6]
+; EGPR-NEXT:    crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
   %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
   ret i32 %res
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
index e0ec432b38549..71d955bda7523 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
-; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s -check-prefix=EGPR
 
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
 
@@ -8,9 +9,15 @@
 define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
 ; CHECK-LABEL: test_mm_crc64_u8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    crc32b %sil, %edi
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    crc32b %sil, %edi # encoding: [0xf2,0x40,0x0f,0x38,0xf0,0xfe]
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_mm_crc64_u8:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
+; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %trunc = trunc i32 %a1 to i8
   %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
   ret i64 %res
@@ -20,9 +27,15 @@ declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone
 define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
 ; CHECK-LABEL: test_mm_crc64_u64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    crc32q %rsi, %rax
-; CHECK-NEXT:    retq
+; CHECK-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    crc32q %rsi, %rax # encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc6]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; EGPR-LABEL: test_mm_crc64_u64:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT:    crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; EGPR-NEXT:    retq # encoding: [0xc3]
   %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
   ret i64 %res
 }

From b1beebe1515783ddfe1d5743b5449703b2daa579 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang@intel.com>
Date: Thu, 4 Jan 2024 18:00:27 -0800
Subject: [PATCH 7/7] restore mistake change

---
 llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
index 96258ea9bcbb7..84c7f90cfe3c3 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-x86.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X86
 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32 -show-mc-encoding | FileCheck %s --check-prefixes=X64
 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s --check-prefixes=EGPR
 
 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
 ; X86-LABEL: crc32_32_8:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    crc32b {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    retl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    crc32b {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf0,0x44,0x24,0x08]
+; X86-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X64-LABEL: crc32_32_8:
 ; X64:       ## %bb.0:
@@ -29,9 +29,9 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
 ; X86-LABEL: crc32_32_16:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    crc32w {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    retl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    crc32w {{[0-9]+}}(%esp), %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
+; X86-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X64-LABEL: crc32_32_16:
 ; X64:       ## %bb.0:
@@ -52,9 +52,9 @@ declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
 ; X86-LABEL: crc32_32_32:
 ; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    retl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    crc32l {{[0-9]+}}(%esp), %eax ## encoding: [0xf2,0x0f,0x38,0xf1,0x44,0x24,0x08]
+; X86-NEXT:    retl ## encoding: [0xc3]
 ;
 ; X64-LABEL: crc32_32_32:
 ; X64:       ## %bb.0: