diff --git a/library/stdarch/Cargo.lock b/library/stdarch/Cargo.lock
index 3b76eed770bd0..80f424dfdd8da 100644
--- a/library/stdarch/Cargo.lock
+++ b/library/stdarch/Cargo.lock
@@ -147,7 +147,6 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
 name = "core_arch"
 version = "0.1.5"
 dependencies = [
- "std_detect",
  "stdarch-test",
  "syscalls",
 ]
diff --git a/library/stdarch/README.md b/library/stdarch/README.md
index 70ec256e681e0..9a35f4cd6ff58 100644
--- a/library/stdarch/README.md
+++ b/library/stdarch/README.md
@@ -16,3 +16,9 @@ This repository contains two main crates:
 
 The `std::simd` component now lives in the
 [`packed_simd_2`](https://github.com/rust-lang/packed_simd) crate.
+
+## Synchronizing josh subtree with rustc
+
+This repository is linked to `rust-lang/rust` as a [josh](https://josh-project.github.io/josh/intro.html) subtree. You can use the [rustc-josh-sync](https://github.com/rust-lang/josh-sync) tool to perform synchronization.
+
+You can find a guide on how to perform the synchronization [here](https://rustc-dev-guide.rust-lang.org/external-repos.html#synchronizing-a-josh-subtree).
diff --git a/library/stdarch/crates/core_arch/Cargo.toml b/library/stdarch/crates/core_arch/Cargo.toml
index f4bd5fc552afe..670447a2d5a8b 100644
--- a/library/stdarch/crates/core_arch/Cargo.toml
+++ b/library/stdarch/crates/core_arch/Cargo.toml
@@ -22,7 +22,6 @@ maintenance = { status = "experimental" }
 
 [dev-dependencies]
 stdarch-test = { version = "0.*", path = "../stdarch-test" }
-std_detect = { version = "0.*", path = "../std_detect" }
 
 [target.'cfg(all(target_arch = "x86_64", target_os = "linux"))'.dev-dependencies]
 syscalls = { version = "0.6.18", default-features = false }
diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
index 96ed82021b4b2..32f144bc7adc4 100644
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@@ -7925,7 +7925,7 @@ pub fn vcvth_f16_u64(a: u64) -> f16 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 pub fn vcvth_n_f16_s16<const N: i32>(a: i16) -> f16 {
     static_assert!(N >= 1 && N <= 16);
-    vcvth_n_f16_s32::<N>(a as i32) as f16
+    vcvth_n_f16_s32::<N>(a as i32)
 }
 #[doc = "Fixed-point convert to floating-point"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_s32)"]
@@ -7972,7 +7972,7 @@ pub fn vcvth_n_f16_s64<const N: i32>(a: i64) -> f16 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 pub fn vcvth_n_f16_u16<const N: i32>(a: u16) -> f16 {
     static_assert!(N >= 1 && N <= 16);
-    vcvth_n_f16_u32::<N>(a as u32) as f16
+    vcvth_n_f16_u32::<N>(a as u32)
 }
 #[doc = "Fixed-point convert to floating-point"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvth_n_f16_u32)"]
@@ -17158,7 +17158,7 @@ pub fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
     let x: i64 = vqaddd_s64(a, vqdmulls_s32(b, c));
-    x as i64
+    x
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s16)"]
@@ -17324,7 +17324,7 @@ pub fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
     let x: i64 = vqsubd_s64(a, vqdmulls_s32(b, c));
-    x as i64
+    x
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s16)"]
@@ -19495,10 +19495,7 @@ pub fn vqtbl1q_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let x = transmute(vqtbl1(transmute(a), b));
-        x
-    }
+    unsafe { transmute(vqtbl1(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_u8)"]
@@ -19507,10 +19504,7 @@ pub fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let x = transmute(vqtbl1q(transmute(a), b));
-        x
-    }
+    unsafe { transmute(vqtbl1q(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_p8)"]
@@ -19519,10 +19513,7 @@ pub fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        let x = transmute(vqtbl1(transmute(a), b));
-        x
-    }
+    unsafe { transmute(vqtbl1(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_p8)"]
@@ -19531,10 +19522,7 @@ pub fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbl1q_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t {
-    unsafe {
-        let x = transmute(vqtbl1q(transmute(a), b));
-        x
-    }
+    unsafe { transmute(vqtbl1q(transmute(a), b)) }
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2)"]
@@ -20397,10 +20385,7 @@ pub fn vqtbx1q_s8(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let x = transmute(vqtbx1(transmute(a), transmute(b), c));
-        x
-    }
+    unsafe { transmute(vqtbx1(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_u8)"]
@@ -20409,10 +20394,7 @@ pub fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let x = transmute(vqtbx1q(transmute(a), transmute(b), c));
-        x
-    }
+    unsafe { transmute(vqtbx1q(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_p8)"]
@@ -20421,10 +20403,7 @@ pub fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        let x = transmute(vqtbx1(transmute(a), transmute(b), c));
-        x
-    }
+    unsafe { transmute(vqtbx1(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_p8)"]
@@ -20433,10 +20412,7 @@ pub fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqtbx1q_p8(a: poly8x16_t, b: poly8x16_t, c: uint8x16_t) -> poly8x16_t {
-    unsafe {
-        let x = transmute(vqtbx1q(transmute(a), transmute(b), c));
-        x
-    }
+    unsafe { transmute(vqtbx1q(transmute(a), transmute(b), c)) }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2)"]
@@ -23785,14 +23761,7 @@ pub fn vrndph_f16(a: f16) -> f16 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v4f16"
-        )]
-        fn _vrndx_f16(a: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vrndx_f16(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral exact, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"]
@@ -23801,14 +23770,7 @@ pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v8f16"
-        )]
-        fn _vrndxq_f16(a: float16x8_t) -> float16x8_t;
-    }
-    unsafe { _vrndxq_f16(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral exact, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"]
@@ -23817,14 +23779,7 @@ pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndx_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v2f32"
-        )]
-        fn _vrndx_f32(a: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vrndx_f32(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral exact, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"]
@@ -23833,14 +23788,7 @@ pub fn vrndx_f32(a: float32x2_t) -> float32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v4f32"
-        )]
-        fn _vrndxq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vrndxq_f32(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral exact, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"]
@@ -23849,14 +23797,7 @@ pub fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndx_f64(a: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v1f64"
-        )]
-        fn _vrndx_f64(a: float64x1_t) -> float64x1_t;
-    }
-    unsafe { _vrndx_f64(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral exact, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"]
@@ -23865,14 +23806,7 @@ pub fn vrndx_f64(a: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v2f64"
-        )]
-        fn _vrndxq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    unsafe { _vrndxq_f64(a) }
+    unsafe { simd_round_ties_even(a) }
 }
 #[doc = "Floating-point round to integral, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxh_f16)"]
@@ -24082,7 +24016,6 @@ pub fn vrsqrtes_f32(a: f32) -> f32 {
 #[doc = "Reciprocal square-root estimate."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteh_f16)"]
 #[inline]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(test, assert_instr(frsqrte))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/mod.rs
index 527b53de99d95..8074648a28a28 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/mod.rs
@@ -20,10 +20,10 @@
 //! Section 10.1 of ACLE says:
 //!
 //! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes
-//! its predecessor instruction set."
+//!   its predecessor's instruction set."
 //!
 //! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes
-//! its predecessor instruction set."
+//!   its predecessor's instruction set."
 //!
 //! From that info and from looking at how LLVM features work (using custom targets) we can identify
 //! features that are subsets of others:
@@ -38,7 +38,7 @@
 //! *NOTE*: Section 5.4.7 of ACLE says:
 //!
 //! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the
-//! intrinsics defined in Saturating intrinsics are available."
+//!   intrinsics defined in Saturating intrinsics are available."
 //!
 //! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te
 //! targets so we have to work around this difference.
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
index 286f1868852aa..4df1b741485b9 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -40758,16 +40758,7 @@ pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
         fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
     }
-    unsafe {
-        _vqshlu_n_s8(
-            a,
-            const {
-                int8x8_t([
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
@@ -40783,17 +40774,7 @@ pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
         fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
     }
-    unsafe {
-        _vqshluq_n_s8(
-            a,
-            const {
-                int8x16_t([
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
@@ -40809,12 +40790,7 @@ pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
         fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
     }
-    unsafe {
-        _vqshlu_n_s16(
-            a,
-            const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) },
-        )
-    }
+    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
@@ -40830,16 +40806,7 @@ pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
         fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
     }
-    unsafe {
-        _vqshluq_n_s16(
-            a,
-            const {
-                int16x8_t([
-                    N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
@@ -40855,7 +40822,7 @@ pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
         fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
     }
-    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }) }
+    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
@@ -40871,12 +40838,7 @@ pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
         fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
     }
-    unsafe {
-        _vqshluq_n_s32(
-            a,
-            const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) },
-        )
-    }
+    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
@@ -40908,7 +40870,7 @@ pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
         fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
     }
-    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }) }
+    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
@@ -40927,16 +40889,7 @@ pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
         )]
         fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
     }
-    unsafe {
-        _vqshlu_n_s8(
-            a,
-            const {
-                int8x8_t([
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
@@ -40955,17 +40908,7 @@ pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
         )]
         fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
     }
-    unsafe {
-        _vqshluq_n_s8(
-            a,
-            const {
-                int8x16_t([
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                    N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
@@ -40984,12 +40927,7 @@ pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
         )]
         fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
     }
-    unsafe {
-        _vqshlu_n_s16(
-            a,
-            const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) },
-        )
-    }
+    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
@@ -41008,16 +40946,7 @@ pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
         )]
         fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
     }
-    unsafe {
-        _vqshluq_n_s16(
-            a,
-            const {
-                int16x8_t([
-                    N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16,
-                ])
-            },
-        )
-    }
+    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
@@ -41036,7 +40965,7 @@ pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
         )]
         fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
     }
-    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }) }
+    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
@@ -41055,12 +40984,7 @@ pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
         )]
         fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
     }
-    unsafe {
-        _vqshluq_n_s32(
-            a,
-            const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) },
-        )
-    }
+    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
@@ -41098,7 +41022,7 @@ pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
         )]
         fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
     }
-    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }) }
+    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
index 0683d48ed3271..60c9daef68c42 100644
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -777,8 +777,8 @@ pub struct float16x8x2_t(pub float16x8_t, pub float16x8_t);
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-
 pub struct float16x8x3_t(pub float16x8_t, pub float16x8_t, pub float16x8_t);
+
 /// Arm-specific type containing four `float16x8_t` vectors.
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
diff --git a/library/stdarch/crates/core_arch/src/lib.rs b/library/stdarch/crates/core_arch/src/lib.rs
index 340c4c510d784..c58580f641780 100644
--- a/library/stdarch/crates/core_arch/src/lib.rs
+++ b/library/stdarch/crates/core_arch/src/lib.rs
@@ -75,9 +75,7 @@
 #[cfg(test)]
 #[macro_use]
 extern crate std;
-#[cfg(test)]
-#[macro_use]
-extern crate std_detect;
+
 #[path = "mod.rs"]
 mod core_arch;
 
diff --git a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
index 2deeb53c20995..a7bbf35ed8d0b 100644
--- a/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/altivec.rs
@@ -360,25 +360,6 @@ unsafe extern "C" {
     #[link_name = "llvm.ppc.altivec.vsrv"]
     fn vsrv(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;
 
-    #[link_name = "llvm.fshl.v16i8"]
-    fn fshlb(
-        a: vector_unsigned_char,
-        b: vector_unsigned_char,
-        c: vector_unsigned_char,
-    ) -> vector_unsigned_char;
-    #[link_name = "llvm.fshl.v8i16"]
-    fn fshlh(
-        a: vector_unsigned_short,
-        b: vector_unsigned_short,
-        c: vector_unsigned_short,
-    ) -> vector_unsigned_short;
-    #[link_name = "llvm.fshl.v4i32"]
-    fn fshlw(
-        a: vector_unsigned_int,
-        b: vector_unsigned_int,
-        c: vector_unsigned_int,
-    ) -> vector_unsigned_int;
-
     #[link_name = "llvm.nearbyint.v4f32"]
     fn vrfin(a: vector_float) -> vector_float;
 }
@@ -3193,19 +3174,19 @@ mod sealed {
     impl_vec_cntlz! { vec_vcntlzw(vector_unsigned_int) }
 
     macro_rules! impl_vrl {
-        ($fun:ident $intr:ident $ty:ident) => {
+        ($fun:ident $ty:ident) => {
             #[inline]
             #[target_feature(enable = "altivec")]
             #[cfg_attr(test, assert_instr($fun))]
             unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) {
-                transmute($intr(transmute(a), transmute(a), transmute(b)))
+                simd_funnel_shl(a, a, b)
             }
         };
     }
 
-    impl_vrl! { vrlb fshlb u8 }
-    impl_vrl! { vrlh fshlh u16 }
-    impl_vrl! { vrlw fshlw u32 }
+    impl_vrl! { vrlb u8 }
+    impl_vrl! { vrlh u16 }
+    impl_vrl! { vrlw u32 }
 
     #[unstable(feature = "stdarch_powerpc", issue = "111145")]
     pub trait VectorRl {
diff --git a/library/stdarch/crates/core_arch/src/powerpc/macros.rs b/library/stdarch/crates/core_arch/src/powerpc/macros.rs
index af47494e8fb40..24d86f1018c8f 100644
--- a/library/stdarch/crates/core_arch/src/powerpc/macros.rs
+++ b/library/stdarch/crates/core_arch/src/powerpc/macros.rs
@@ -278,6 +278,7 @@ macro_rules! impl_from {
     ($s: ident) => {
         #[unstable(feature = "stdarch_powerpc", issue = "111145")]
         impl From<$s> for s_t_l!($s) {
+            #[inline]
             fn from (v: $s) -> Self {
                 unsafe {
                     transmute(v)
@@ -297,6 +298,7 @@ macro_rules! impl_neg {
         #[unstable(feature = "stdarch_powerpc", issue = "111145")]
         impl crate::ops::Neg for s_t_l!($s) {
             type Output = s_t_l!($s);
+            #[inline]
             fn neg(self) -> Self::Output {
                 unsafe { simd_neg(self) }
             }
diff --git a/library/stdarch/crates/core_arch/src/s390x/macros.rs b/library/stdarch/crates/core_arch/src/s390x/macros.rs
index 4f0f84ec912b7..26afbaa45a741 100644
--- a/library/stdarch/crates/core_arch/src/s390x/macros.rs
+++ b/library/stdarch/crates/core_arch/src/s390x/macros.rs
@@ -435,6 +435,7 @@ macro_rules! impl_from {
     ($s: ident) => {
         #[unstable(feature = "stdarch_s390x", issue = "135681")]
         impl From<$s> for s_t_l!($s) {
+            #[inline]
             fn from (v: $s) -> Self {
                 unsafe {
                     transmute(v)
@@ -454,6 +455,7 @@ macro_rules! impl_neg {
         #[unstable(feature = "stdarch_s390x", issue = "135681")]
         impl crate::ops::Neg for s_t_l!($s) {
             type Output = s_t_l!($s);
+            #[inline]
             fn neg(self) -> Self::Output {
                 unsafe { simd_neg(self) }
             }
diff --git a/library/stdarch/crates/core_arch/src/s390x/vector.rs b/library/stdarch/crates/core_arch/src/s390x/vector.rs
index ae5c37ce0178b..1cd33c3554bde 100644
--- a/library/stdarch/crates/core_arch/src/s390x/vector.rs
+++ b/library/stdarch/crates/core_arch/src/s390x/vector.rs
@@ -51,7 +51,7 @@ types! {
     pub struct vector_double(2 x f64);
 }
 
-#[repr(packed)]
+#[repr(C, packed)]
 struct PackedTuple<T, U> {
     x: T,
     y: U,
@@ -83,9 +83,6 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.nearbyint.v4f32"] fn nearbyint_v4f32(a: vector_float) -> vector_float;
     #[link_name = "llvm.nearbyint.v2f64"] fn nearbyint_v2f64(a: vector_double) -> vector_double;
 
-    #[link_name = "llvm.rint.v4f32"] fn rint_v4f32(a: vector_float) -> vector_float;
-    #[link_name = "llvm.rint.v2f64"] fn rint_v2f64(a: vector_double) -> vector_double;
-
     #[link_name = "llvm.roundeven.v4f32"] fn roundeven_v4f32(a: vector_float) -> vector_float;
     #[link_name = "llvm.roundeven.v2f64"] fn roundeven_v2f64(a: vector_double) -> vector_double;
 
@@ -101,11 +98,6 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16;
     #[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16;
 
-    #[link_name = "llvm.fshl.v16i8"] fn fshlb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char;
-    #[link_name = "llvm.fshl.v8i16"] fn fshlh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short;
-    #[link_name = "llvm.fshl.v4i32"] fn fshlf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int;
-    #[link_name = "llvm.fshl.v2i64"] fn fshlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: vector_unsigned_long_long) -> vector_unsigned_long_long;
-
     #[link_name = "llvm.s390.verimb"] fn verimb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char, d: i32) -> vector_signed_char;
     #[link_name = "llvm.s390.verimh"] fn verimh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, d: i32) -> vector_signed_short;
     #[link_name = "llvm.s390.verimf"] fn verimf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int, d: i32) -> vector_signed_int;
@@ -1197,8 +1189,8 @@ mod sealed {
     test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
     test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }
 
-    test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [rint_v4f32, "vector-enhancements-1" vfisb] }
-    test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [rint_v2f64, vfidb] }
+    test_impl! { vec_rint_f32 (a: vector_float) -> vector_float [simd_round_ties_even, "vector-enhancements-1" vfisb] }
+    test_impl! { vec_rint_f64 (a: vector_double) -> vector_double [simd_round_ties_even, vfidb] }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorRoundc {
@@ -1221,8 +1213,8 @@ mod sealed {
     impl_vec_trait! { [VectorRound vec_round] vec_round_f32 (vector_float) }
     impl_vec_trait! { [VectorRound vec_round] vec_round_f64 (vector_double) }
 
-    impl_vec_trait! { [VectorRint vec_rint] vec_rint_f32 (vector_float) }
-    impl_vec_trait! { [VectorRint vec_rint] vec_rint_f64 (vector_double) }
+    impl_vec_trait! { [VectorRint vec_rint] simd_round_ties_even (vector_float) }
+    impl_vec_trait! { [VectorRint vec_rint] simd_round_ties_even (vector_double) }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorTrunc {
@@ -1411,43 +1403,42 @@ mod sealed {
     }
 
     macro_rules! impl_rot {
-        ($fun:ident $intr:ident $ty:ident) => {
+        ($fun:ident $ty:ident) => {
             #[inline]
             #[target_feature(enable = "vector")]
             #[cfg_attr(test, assert_instr($fun))]
             unsafe fn $fun(a: t_t_l!($ty), b: t_t_l!($ty)) -> t_t_l!($ty) {
-                transmute($intr(transmute(a), transmute(a), transmute(b)))
+                simd_funnel_shl(a, a, b)
             }
         };
     }
 
-    impl_rot! { verllvb fshlb u8 }
-    impl_rot! { verllvh fshlh u16 }
-    impl_rot! { verllvf fshlf u32 }
-    impl_rot! { verllvg fshlg u64 }
+    impl_rot! { verllvb u8 }
+    impl_rot! { verllvh u16 }
+    impl_rot! { verllvf u32 }
+    impl_rot! { verllvg u64 }
 
     impl_vec_shift! { [VectorRl vec_rl] (verllvb, verllvh, verllvf, verllvg) }
 
     macro_rules! test_rot_imm {
-        ($fun:ident $instr:ident $intr:ident $ty:ident) => {
+        ($fun:ident $instr:ident $ty:ident) => {
             #[inline]
             #[target_feature(enable = "vector")]
             #[cfg_attr(test, assert_instr($instr))]
             unsafe fn $fun(a: t_t_l!($ty), bits: core::ffi::c_ulong) -> t_t_l!($ty) {
                 // mod by the number of bits in a's element type to prevent UB
                 let bits = (bits % $ty::BITS as core::ffi::c_ulong) as $ty;
-                let a = transmute(a);
                 let b = <t_t_s!($ty)>::splat(bits);
 
-                transmute($intr(a, a, transmute(b)))
+                simd_funnel_shl(a, a, transmute(b))
             }
         };
     }
 
-    test_rot_imm! { verllvb_imm verllb fshlb u8 }
-    test_rot_imm! { verllvh_imm verllh fshlh u16 }
-    test_rot_imm! { verllvf_imm verllf fshlf u32 }
-    test_rot_imm! { verllvg_imm verllg fshlg u64 }
+    test_rot_imm! { verllvb_imm verllb u8 }
+    test_rot_imm! { verllvh_imm verllh u16 }
+    test_rot_imm! { verllvf_imm verllf u32 }
+    test_rot_imm! { verllvg_imm verllg u64 }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorRli {
@@ -4787,7 +4778,7 @@ pub unsafe fn vec_splat_s8<const IMM: i8>() -> vector_signed_char {
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
 #[cfg_attr(test, assert_instr(vrepih, IMM = 42))]
 pub unsafe fn vec_splat_s16<const IMM: i16>() -> vector_signed_short {
-    vector_signed_short([IMM as i16; 8])
+    vector_signed_short([IMM; 8])
 }
 
 /// Vector Splat Signed Word
diff --git a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
index fc0d7723fa014..108bc3125c5f3 100644
--- a/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
+++ b/library/stdarch/crates/core_arch/src/wasm32/simd128.rs
@@ -141,7 +141,7 @@ unsafe extern "unadjusted" {
     fn llvm_f64x2_max(x: simd::f64x2, y: simd::f64x2) -> simd::f64x2;
 }
 
-#[repr(packed)]
+#[repr(C, packed)]
 #[derive(Copy)]
 struct Unaligned<T>(T);
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index df1cb63be30f0..24e0cf6ba1afb 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -1258,7 +1258,7 @@ pub fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::trivially_copy_pass_by_ref)]
-pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
+pub fn _mm256_broadcast_ss(f: &f32) -> __m256 {
     _mm256_set1_ps(*f)
 }
 
@@ -1271,7 +1271,7 @@ pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::trivially_copy_pass_by_ref)]
-pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
+pub fn _mm_broadcast_ss(f: &f32) -> __m128 {
     _mm_set1_ps(*f)
 }
 
@@ -1284,7 +1284,7 @@ pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::trivially_copy_pass_by_ref)]
-pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
+pub fn _mm256_broadcast_sd(f: &f64) -> __m256d {
     _mm256_set1_pd(*f)
 }
 
@@ -1296,8 +1296,8 @@ pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
-    simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3])
+pub fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
+    unsafe { simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3]) }
 }
 
 /// Broadcasts 128 bits from memory (composed of 2 packed double-precision
@@ -1308,8 +1308,8 @@ pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
-    simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1])
+pub fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
+    unsafe { simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1]) }
 }
 
 /// Copies `a` to result, then inserts 128 bits (composed of 4 packed
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index dd224616764d6..d53f83c0a10bc 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -33248,7 +33248,7 @@ pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
         );
         let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
         let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
-        simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1)
+        simd_extract!(a, 0, f32) + simd_extract!(a, 1, f32)
     }
 }
 
@@ -33275,7 +33275,7 @@ pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
             _mm512_extractf64x4_pd::<1>(a),
         );
         let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
-        simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1)
+        simd_extract!(a, 0, f64) + simd_extract!(a, 1, f64)
     }
 }
 
@@ -33356,7 +33356,7 @@ pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
         );
         let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
         let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
-        simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1)
+        simd_extract!(a, 0, f32) * simd_extract!(a, 1, f32)
     }
 }
 
@@ -33383,7 +33383,7 @@ pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
             _mm512_extractf64x4_pd::<1>(a),
         );
         let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
-        simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1)
+        simd_extract!(a, 0, f64) * simd_extract!(a, 1, f64)
     }
 }
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
index 0a81a0581f97a..8c914803c665d 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
@@ -11032,7 +11032,7 @@ pub fn _mm_reduce_add_ph(a: __m128h) -> f16 {
         let a = _mm_add_ph(a, b);
         let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
         let a = _mm_add_ph(a, b);
-        simd_extract::<_, f16>(a, 0) + simd_extract::<_, f16>(a, 1)
+        simd_extract!(a, 0, f16) + simd_extract!(a, 1, f16)
     }
 }
 
@@ -11085,7 +11085,7 @@ pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
         let a = _mm_mul_ph(a, b);
         let b = simd_shuffle!(a, a, [2, 3, 0, 1, 4, 5, 6, 7]);
         let a = _mm_mul_ph(a, b);
-        simd_extract::<_, f16>(a, 0) * simd_extract::<_, f16>(a, 1)
+        simd_extract!(a, 0, f16) * simd_extract!(a, 1, f16)
     }
 }
 
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs b/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs
index c722f7b370ffe..09a90e29bf088 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs
@@ -500,7 +500,7 @@ pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
 pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i64x8(), b.as_i64x8(), c.as_i64x8())) }
 }
 
 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -539,7 +539,7 @@ pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
 pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i64x4(), b.as_i64x4(), c.as_i64x4())) }
 }
 
 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -578,7 +578,7 @@ pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
 pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i64x2(), b.as_i64x2(), c.as_i64x2())) }
 }
 
 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -617,7 +617,7 @@ pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
 pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i32x16(), b.as_i32x16(), c.as_i32x16())) }
 }
 
 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -656,7 +656,7 @@ pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
 pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i32x8(), b.as_i32x8(), c.as_i32x8())) }
 }
 
 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -695,7 +695,7 @@ pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
 pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
 }
 
 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -734,7 +734,7 @@ pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
 pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i16x32(), b.as_i16x32(), c.as_i16x32())) }
 }
 
 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -773,7 +773,7 @@ pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
 pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i16x16(), b.as_i16x16(), c.as_i16x16())) }
 }
 
 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -812,7 +812,7 @@ pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
 pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) }
+    unsafe { transmute(simd_funnel_shl(a.as_i16x8(), b.as_i16x8(), c.as_i16x8())) }
 }
 
 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -851,7 +851,7 @@ pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
 pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshrdvq(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i64x8(), a.as_i64x8(), c.as_i64x8())) }
 }
 
 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -890,7 +890,7 @@ pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
 pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshrdvq256(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i64x4(), a.as_i64x4(), c.as_i64x4())) }
 }
 
 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -929,7 +929,7 @@ pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
 pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshrdvq128(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i64x2(), a.as_i64x2(), c.as_i64x2())) }
 }
 
 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -968,7 +968,7 @@ pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
 pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshrdvd(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i32x16(), a.as_i32x16(), c.as_i32x16())) }
 }
 
 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -1007,7 +1007,7 @@ pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
 pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshrdvd256(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i32x8(), a.as_i32x8(), c.as_i32x8())) }
 }
 
 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -1046,7 +1046,7 @@ pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
 pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshrdvd128(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i32x4(), a.as_i32x4(), c.as_i32x4())) }
 }
 
 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -1085,7 +1085,7 @@ pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
 pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
-    unsafe { transmute(vpshrdvw(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i16x32(), a.as_i16x32(), c.as_i16x32())) }
 }
 
 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -1124,7 +1124,7 @@ pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
 pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
-    unsafe { transmute(vpshrdvw256(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i16x16(), a.as_i16x16(), c.as_i16x16())) }
 }
 
 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -1163,7 +1163,7 @@ pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
 pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
-    unsafe { transmute(vpshrdvw128(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) }
+    unsafe { transmute(simd_funnel_shr(b.as_i16x8(), a.as_i16x8(), c.as_i16x8())) }
 }
 
 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -2138,44 +2138,6 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.mask.expand.b.128"]
     fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
 
-    #[link_name = "llvm.fshl.v8i64"]
-    fn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
-    #[link_name = "llvm.fshl.v4i64"]
-    fn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
-    #[link_name = "llvm.fshl.v2i64"]
-    fn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
-    #[link_name = "llvm.fshl.v16i32"]
-    fn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
-    #[link_name = "llvm.fshl.v8i32"]
-    fn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
-    #[link_name = "llvm.fshl.v4i32"]
-    fn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
-    #[link_name = "llvm.fshl.v32i16"]
-    fn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
-    #[link_name = "llvm.fshl.v16i16"]
-    fn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
-    #[link_name = "llvm.fshl.v8i16"]
-    fn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
-
-    #[link_name = "llvm.fshr.v8i64"]
-    fn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
-    #[link_name = "llvm.fshr.v4i64"]
-    fn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
-    #[link_name = "llvm.fshr.v2i64"]
-    fn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
-    #[link_name = "llvm.fshr.v16i32"]
-    fn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
-    #[link_name = "llvm.fshr.v8i32"]
-    fn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
-    #[link_name = "llvm.fshr.v4i32"]
-    fn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
-    #[link_name = "llvm.fshr.v32i16"]
-    fn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
-    #[link_name = "llvm.fshr.v16i16"]
-    fn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
-    #[link_name = "llvm.fshr.v8i16"]
-    fn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
-
     #[link_name = "llvm.x86.avx512.mask.expand.load.b.128"]
     fn expandloadb_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
     #[link_name = "llvm.x86.avx512.mask.expand.load.w.128"]
diff --git a/library/stdarch/crates/core_arch/src/x86/kl.rs b/library/stdarch/crates/core_arch/src/x86/kl.rs
index eb9eb83f4115c..26e5a46c62934 100644
--- a/library/stdarch/crates/core_arch/src/x86/kl.rs
+++ b/library/stdarch/crates/core_arch/src/x86/kl.rs
@@ -127,7 +127,7 @@ unsafe extern "unadjusted" {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadiwkey)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(loadiwkey))]
 pub unsafe fn _mm_loadiwkey(
     control: u32,
@@ -153,7 +153,7 @@ pub unsafe fn _mm_loadiwkey(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_encodekey128_u32)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(encodekey128))]
 pub unsafe fn _mm_encodekey128_u32(key_params: u32, key: __m128i, handle: *mut u8) -> u32 {
     let EncodeKey128Output(control, key0, key1, key2, _, _, _) = encodekey128(key_params, key);
@@ -176,7 +176,7 @@ pub unsafe fn _mm_encodekey128_u32(key_params: u32, key: __m128i, handle: *mut u
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_encodekey256_u32)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(encodekey256))]
 pub unsafe fn _mm_encodekey256_u32(
     key_params: u32,
@@ -198,7 +198,7 @@ pub unsafe fn _mm_encodekey256_u32(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc128kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesenc128kl))]
 pub unsafe fn _mm_aesenc128kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
     let AesOutput(status, result) = aesenc128kl(input, handle);
@@ -214,7 +214,7 @@ pub unsafe fn _mm_aesenc128kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec128kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesdec128kl))]
 pub unsafe fn _mm_aesdec128kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
     let AesOutput(status, result) = aesdec128kl(input, handle);
@@ -230,7 +230,7 @@ pub unsafe fn _mm_aesdec128kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc256kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesenc256kl))]
 pub unsafe fn _mm_aesenc256kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
     let AesOutput(status, result) = aesenc256kl(input, handle);
@@ -246,7 +246,7 @@ pub unsafe fn _mm_aesenc256kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec256kl_u8)
 #[inline]
 #[target_feature(enable = "kl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesdec256kl))]
 pub unsafe fn _mm_aesdec256kl_u8(output: *mut __m128i, input: __m128i, handle: *const u8) -> u8 {
     let AesOutput(status, result) = aesdec256kl(input, handle);
@@ -262,7 +262,7 @@ pub unsafe fn _mm_aesdec256kl_u8(output: *mut __m128i, input: __m128i, handle: *
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesencwide128kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesencwide128kl))]
 pub unsafe fn _mm_aesencwide128kl_u8(
     output: *mut __m128i,
@@ -285,7 +285,7 @@ pub unsafe fn _mm_aesencwide128kl_u8(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdecwide128kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesdecwide128kl))]
 pub unsafe fn _mm_aesdecwide128kl_u8(
     output: *mut __m128i,
@@ -308,7 +308,7 @@ pub unsafe fn _mm_aesdecwide128kl_u8(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesencwide256kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesencwide256kl))]
 pub unsafe fn _mm_aesencwide256kl_u8(
     output: *mut __m128i,
@@ -331,7 +331,7 @@ pub unsafe fn _mm_aesencwide256kl_u8(
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdecwide256kl_u8)
 #[inline]
 #[target_feature(enable = "widekl")]
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 #[cfg_attr(test, assert_instr(aesdecwide256kl))]
 pub unsafe fn _mm_aesdecwide256kl_u8(
     output: *mut __m128i,
diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs
index 8897258c7dc24..79a593e647f13 100644
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@@ -772,5 +772,5 @@ mod avx512fp16;
 pub use self::avx512fp16::*;
 
 mod kl;
-#[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "keylocker_x86", since = "1.89.0")]
 pub use self::kl::*;
diff --git a/library/stdarch/crates/core_arch/src/x86/sha.rs b/library/stdarch/crates/core_arch/src/x86/sha.rs
index da568c449a6be..9ad1a9f14c155 100644
--- a/library/stdarch/crates/core_arch/src/x86/sha.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sha.rs
@@ -146,7 +146,7 @@ pub fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sha512,avx")]
 #[cfg_attr(test, assert_instr(vsha512msg1))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
     unsafe { transmute(vsha512msg1(a.as_i64x4(), b.as_i64x2())) }
 }
@@ -159,7 +159,7 @@ pub fn _mm256_sha512msg1_epi64(a: __m256i, b: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sha512,avx")]
 #[cfg_attr(test, assert_instr(vsha512msg2))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(vsha512msg2(a.as_i64x4(), b.as_i64x4())) }
 }
@@ -175,7 +175,7 @@ pub fn _mm256_sha512msg2_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sha512,avx")]
 #[cfg_attr(test, assert_instr(vsha512rnds2))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i {
     unsafe { transmute(vsha512rnds2(a.as_i64x4(), b.as_i64x4(), k.as_i64x2())) }
 }
@@ -188,7 +188,7 @@ pub fn _mm256_sha512rnds2_epi64(a: __m256i, b: __m256i, k: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sm3,avx")]
 #[cfg_attr(test, assert_instr(vsm3msg1))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe { transmute(vsm3msg1(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
 }
@@ -201,7 +201,7 @@ pub fn _mm_sm3msg1_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sm3,avx")]
 #[cfg_attr(test, assert_instr(vsm3msg2))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe { transmute(vsm3msg2(a.as_i32x4(), b.as_i32x4(), c.as_i32x4())) }
 }
@@ -219,7 +219,7 @@ pub fn _mm_sm3msg2_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[target_feature(enable = "sm3,avx")]
 #[cfg_attr(test, assert_instr(vsm3rnds2, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     static_assert!(
         IMM8 == (IMM8 & 0x3e),
@@ -235,7 +235,7 @@ pub fn _mm_sm3rnds2_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4key4))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(vsm4key4128(a.as_i32x4(), b.as_i32x4())) }
 }
@@ -247,7 +247,7 @@ pub fn _mm_sm4key4_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4key4))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(vsm4key4256(a.as_i32x8(), b.as_i32x8())) }
 }
@@ -259,7 +259,7 @@ pub fn _mm256_sm4key4_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4rnds4))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(vsm4rnds4128(a.as_i32x4(), b.as_i32x4())) }
 }
@@ -271,7 +271,7 @@ pub fn _mm_sm4rnds4_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "sm4,avx")]
 #[cfg_attr(test, assert_instr(vsm4rnds4))]
-#[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")]
+#[stable(feature = "sha512_sm_x86", since = "1.89.0")]
 pub fn _mm256_sm4rnds4_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(vsm4rnds4256(a.as_i32x8(), b.as_i32x8())) }
 }
diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
index 46a008245bf82..d3e7f62903b32 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
@@ -48,7 +48,7 @@ pub unsafe fn cmpxchg16b(
     success: Ordering,
     failure: Ordering,
 ) -> u128 {
-    debug_assert!(dst as usize % 16 == 0);
+    debug_assert!(dst.addr().is_multiple_of(16));
 
     let res = crate::sync::atomic::atomic_compare_exchange(dst, old, new, success, failure);
     res.unwrap_or_else(|x| x)
diff --git a/library/stdarch/crates/intrinsic-test/src/common/compare.rs b/library/stdarch/crates/intrinsic-test/src/common/compare.rs
index 815ccf89fc695..9e0cbe8cd6abe 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/compare.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/compare.rs
@@ -48,7 +48,7 @@ pub fn compare_outputs(
                 return Some(FailureReason::RunRust(intrinsic_name.clone()));
             }
 
-            info!("Comparing intrinsic: {}", intrinsic_name);
+            info!("Comparing intrinsic: {intrinsic_name}");
 
             let c = std::str::from_utf8(&c.stdout)
                 .unwrap()
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
index 84c28cc4bf439..1cfb66c39b90f 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs
@@ -79,7 +79,7 @@ pub fn compile_c_programs(compiler_commands: &[String]) -> bool {
                     false
                 }
             } else {
-                error!("Command failed: {:#?}", output);
+                error!("Command failed: {output:#?}");
                 false
             }
         })
diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
index a2878502ac944..52bccaf905c51 100644
--- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
+++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs
@@ -120,7 +120,7 @@ path = "{binary}/main.rs""#,
             false
         }
     } else {
-        error!("Command failed: {:#?}", output);
+        error!("Command failed: {output:#?}");
         false
     }
 }
diff --git a/library/stdarch/crates/simd-test-macro/src/lib.rs b/library/stdarch/crates/simd-test-macro/src/lib.rs
index 18e4747d94d91..855e969e1eb79 100644
--- a/library/stdarch/crates/simd-test-macro/src/lib.rs
+++ b/library/stdarch/crates/simd-test-macro/src/lib.rs
@@ -89,7 +89,7 @@ pub fn simd_test(
     for feature in target_features {
         let q = quote_spanned! {
             proc_macro2::Span::call_site() =>
-            if !#macro_test!(#feature) {
+            if !::std::arch::#macro_test!(#feature) {
                 missing_features.push(#feature);
             }
         };
diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
index f23cfc334170f..28b3e3cfb35b7 100644
--- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
+++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs
@@ -157,11 +157,11 @@ features! {
     /// AVX (Advanced Vector Extensions)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2";
     /// AVX2 (Advanced Vector Extensions 2)
-    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sha512: "sha512";
+    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "1.89.0")] sha512: "sha512";
     /// SHA512
-    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sm3: "sm3";
+    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "1.89.0")] sm3: "sm3";
     /// SM3
-    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "CURRENT_RUSTC_VERSION")] sm4: "sm4";
+    @FEATURE: #[stable(feature = "sha512_sm_x86", since = "1.89.0")] sm4: "sm4";
     /// SM4
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512f: "avx512f" ;
     /// AVX-512 F (Foundation)
@@ -259,9 +259,9 @@ features! {
     /// XSAVEC (Save Processor Extended States Compacted)
     @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] cmpxchg16b: "cmpxchg16b";
     /// CMPXCH16B (16-byte compare-and-swap instruction)
-    @FEATURE: #[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] kl: "kl";
+    @FEATURE: #[stable(feature = "keylocker_x86", since = "1.89.0")] kl: "kl";
     /// Intel Key Locker
-    @FEATURE: #[stable(feature = "keylocker_x86", since = "CURRENT_RUSTC_VERSION")] widekl: "widekl";
+    @FEATURE: #[stable(feature = "keylocker_x86", since = "1.89.0")] widekl: "widekl";
     /// Intel Key Locker Wide
     @FEATURE: #[stable(feature = "simd_x86_adx", since = "1.33.0")] adx: "adx";
     /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
index 5506ff31fc792..db20538af9512 100644
--- a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
+++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs
@@ -25,6 +25,13 @@ struct riscv_hwprobe {
     value: u64,
 }
 
+impl riscv_hwprobe {
+    // key is overwritten to -1 if not supported by riscv_hwprobe syscall.
+    pub fn get(&self) -> Option<u64> {
+        (self.key != -1).then_some(self.value)
+    }
+}
+
 #[allow(non_upper_case_globals)]
 const __NR_riscv_hwprobe: libc::c_long = 258;
 
@@ -124,8 +131,7 @@ fn _riscv_hwprobe(out: &mut [riscv_hwprobe]) -> bool {
         }
     }
 
-    let len = out.len();
-    unsafe { __riscv_hwprobe(out.as_mut_ptr(), len, 0, ptr::null_mut(), 0) == 0 }
+    unsafe { __riscv_hwprobe(out.as_mut_ptr(), out.len(), 0, ptr::null_mut(), 0) == 0 }
 }
 
 /// Read list of supported features from (1) the auxiliary vector
@@ -156,49 +162,45 @@ pub(crate) fn detect_features() -> cache::Initializer {
     // Use riscv_hwprobe syscall to query more extensions and
     // performance-related capabilities.
     'hwprobe: {
-        let mut out = [
-            riscv_hwprobe {
-                key: RISCV_HWPROBE_KEY_BASE_BEHAVIOR,
-                value: 0,
-            },
-            riscv_hwprobe {
-                key: RISCV_HWPROBE_KEY_IMA_EXT_0,
-                value: 0,
-            },
-            riscv_hwprobe {
-                key: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF,
-                value: 0,
-            },
-            riscv_hwprobe {
-                key: RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF,
-                value: 0,
-            },
-            riscv_hwprobe {
-                key: RISCV_HWPROBE_KEY_CPUPERF_0,
-                value: 0,
-            },
-        ];
-        if !_riscv_hwprobe(&mut out) {
+        macro_rules! init {
+            { $($name: ident : $key: expr),* $(,)? } => {
+                #[repr(usize)]
+                enum Indices { $($name),* }
+                let mut t = [$(riscv_hwprobe { key: $key, value: 0 }),*];
+                macro_rules! data_mut { () => { &mut t } }
+                macro_rules! query { [$idx: ident] => { t[Indices::$idx as usize].get() } }
+            }
+        }
+        init! {
+            BaseBehavior: RISCV_HWPROBE_KEY_BASE_BEHAVIOR,
+            Extensions:   RISCV_HWPROBE_KEY_IMA_EXT_0,
+            MisalignedScalarPerf: RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF,
+            MisalignedVectorPerf: RISCV_HWPROBE_KEY_MISALIGNED_VECTOR_PERF,
+            MisalignedScalarPerfFallback: RISCV_HWPROBE_KEY_CPUPERF_0,
+        };
+        if !_riscv_hwprobe(data_mut!()) {
             break 'hwprobe;
         }
 
-        // Query scalar/vector misaligned behavior.
-        if out[2].key != -1 {
+        // Query scalar misaligned behavior.
+        if let Some(value) = query![MisalignedScalarPerf] {
             enable_feature(
                 Feature::unaligned_scalar_mem,
-                out[2].value == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST,
+                value == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST,
             );
-        } else if out[4].key != -1 {
+        } else if let Some(value) = query![MisalignedScalarPerfFallback] {
             // Deprecated method for fallback
             enable_feature(
                 Feature::unaligned_scalar_mem,
-                out[4].value & RISCV_HWPROBE_MISALIGNED_MASK == RISCV_HWPROBE_MISALIGNED_FAST,
+                value & RISCV_HWPROBE_MISALIGNED_MASK == RISCV_HWPROBE_MISALIGNED_FAST,
             );
         }
-        if out[3].key != -1 {
+
+        // Query vector misaligned behavior.
+        if let Some(value) = query![MisalignedVectorPerf] {
             enable_feature(
                 Feature::unaligned_vector_mem,
-                out[3].value == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST,
+                value == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST,
             );
         }
 
@@ -208,22 +210,20 @@ pub(crate) fn detect_features() -> cache::Initializer {
         // 20240411).
         // This is a current requirement of
         // `RISCV_HWPROBE_KEY_IMA_EXT_0`-based tests.
-        let has_ima = (out[0].key != -1) && (out[0].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA != 0);
-        if !has_ima {
+        if query![BaseBehavior].is_none_or(|value| value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA == 0) {
             break 'hwprobe;
         }
-        has_i |= has_ima;
-        enable_feature(Feature::zicsr, has_ima);
-        enable_feature(Feature::zicntr, has_ima);
-        enable_feature(Feature::zifencei, has_ima);
-        enable_feature(Feature::m, has_ima);
-        enable_feature(Feature::a, has_ima);
+        has_i = true;
+        enable_feature(Feature::zicsr, true);
+        enable_feature(Feature::zicntr, true);
+        enable_feature(Feature::zifencei, true);
+        enable_feature(Feature::m, true);
+        enable_feature(Feature::a, true);
 
         // Enable features based on `RISCV_HWPROBE_KEY_IMA_EXT_0`.
-        if out[1].key == -1 {
+        let Some(ima_ext_0) = query![Extensions] else {
             break 'hwprobe;
-        }
-        let ima_ext_0 = out[1].value;
+        };
         let test = |mask| (ima_ext_0 & mask) != 0;
 
         enable_feature(Feature::d, test(RISCV_HWPROBE_IMA_FD)); // F is implied.
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index f658267b9a19b..f0dce681d9c30 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -1252,7 +1252,7 @@ intrinsics:
       - [i16, f16, 'h', 'i32', 'as i32']
     compose:
       - FnCall: [static_assert!, ['N >= 1 && N <= 16']]
-      - "vcvt{type[2]}_n_{type[1]}_{type[3]}::<N>(a {type[4]}) as {type[1]}"
+      - "vcvt{type[2]}_n_{type[1]}_{type[3]}::<N>(a {type[4]})"
 
 
   - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}"
@@ -1270,7 +1270,7 @@ intrinsics:
       - [u16, f16, 'h', u32]
     compose:
       - FnCall: [static_assert!, ['N >= 1 && N <= 16']]
-      - "vcvt{type[2]}_n_{type[1]}_{type[3]}::<N>(a as {type[3]}) as {type[1]}"
+      - "vcvt{type[2]}_n_{type[1]}_{type[3]}::<N>(a as {type[3]})"
 
 
   - name: "vcvt{type[2]}"
@@ -2976,11 +2976,7 @@ intrinsics:
       - float64x1_t
       - float64x2_t
     compose:
-      - LLVMLink:
-          name: "llvm.rint.{neon_type}"
-          links:
-            - link: "llvm.rint.{neon_type}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_round_ties_even, [a]]
 
 
   - name: "vrndx{neon_type.no}"
@@ -2996,11 +2992,7 @@ intrinsics:
       - float16x4_t
       - float16x8_t
     compose:
-      - LLVMLink:
-          name: "llvm.rint.{neon_type}"
-          links:
-            - link: "llvm.rint.{neon_type}"
-              arch: aarch64,arm64ec
+      - FnCall: [simd_round_ties_even, [a]]
 
 
   - name: "vrndx{type[1]}{type[0]}"
@@ -5391,7 +5383,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - *neon-fp16 
+      - *neon-fp16
       - *neon-unstable-f16
     static_defs: ["const LANE: i32"]
     safety: safe
@@ -5444,7 +5436,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - *neon-fp16 
+      - *neon-fp16
       - *neon-unstable-f16
     static_defs: ["const LANE: i32"]
     safety: safe
@@ -5468,7 +5460,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx]]}]]
-      - *neon-fp16 
+      - *neon-fp16
       - *neon-unstable-f16
     safety: safe
     types:
@@ -5552,7 +5544,7 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - *neon-fp16 
+      - *neon-fp16
       - *neon-unstable-f16
     assert_instr: [fmla]
     safety: safe
@@ -7320,7 +7312,7 @@ intrinsics:
       - ["i64", "i32", "i32", "i64"]
     compose:
       - Let: [x, i64, {FnCall: [vqaddd_s64, [a, {FnCall: [vqdmulls_s32, [b, c]]}]]}]
-      - Identifier: ['x as i64', Symbol]
+      - Identifier: ['x', Symbol]
 
   - name: "vqdmlal{type[4]}"
     doc: "Signed saturating doubling multiply-add long"
@@ -7434,7 +7426,7 @@ intrinsics:
       - ["i64", "i32", "i32", "i64"]
     compose:
       - Let: [x, i64, {FnCall: [vqsubd_s64, [a, {FnCall: [vqdmulls_s32, [b, c]]}]]}]
-      - Identifier: ['x as i64', Symbol]
+      - Identifier: ['x', Symbol]
 
   - name: "vqdmlsl{type[4]}"
     doc: "Signed saturating doubling multiply-subtract long"
@@ -11697,7 +11689,6 @@ intrinsics:
     arguments: ["a: {type[1]}"]
     return_type: "{type[1]}"
     attr:
-      - *neon-fp16
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
       - *neon-fp16
       - *neon-unstable-f16
@@ -12104,7 +12095,7 @@ intrinsics:
       - [uint8x8_t, 'uint8x8_t', 'b']
       - [poly8x8_t, 'uint8x8_t', 'b']
     compose:
-      - FnCall: 
+      - FnCall:
           - 'vqtbl1{neon_type[0].no}'
           - - FnCall:
                 - 'vcombine{neon_type[0].no}'
@@ -12174,7 +12165,7 @@ intrinsics:
               - '{type[2]}_t'
               - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
                 - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
-      - FnCall: 
+      - FnCall:
           - transmute
           - - FnCall:
                 - vqtbl2
@@ -12193,7 +12184,7 @@ intrinsics:
     types:
       - [uint8x8x3_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
       - [poly8x8x3_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t']
-    big_endian_inverse: true 
+    big_endian_inverse: true
     compose:
       - Let:
           - x
@@ -12201,7 +12192,7 @@ intrinsics:
               - '{type[2]}_t'
               - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']]
                 - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
-      - FnCall: 
+      - FnCall:
           - transmute
           - - FnCall:
                 - vqtbl2
@@ -12288,18 +12279,16 @@ intrinsics:
       - [poly8x8_t, "poly8x16_t", uint8x8_t, "vqtbx1", "_p8"]
       - [uint8x16_t, "uint8x16_t", uint8x16_t, "vqtbx1q", "q_u8"]
       - [poly8x16_t, "poly8x16_t", uint8x16_t, "vqtbx1q", "q_p8"]
+    big_endian_inverse: false
     compose:
-      - Let:
-        - x
-        - FnCall:
-            - transmute
-            - - FnCall:
-                  - "{type[3]}"
-                  - - FnCall: [transmute, [a]]
-                    - FnCall: [transmute, [b]]
-                    - c
-      - Identifier: [x, Symbol]
-  
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "{type[3]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+                  - c
+
   - name: "vtbx1{neon_type[0].no}"
     doc: "Extended table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
@@ -12315,13 +12304,13 @@ intrinsics:
     compose:
       - FnCall:
           - simd_select
-          - - FnCall: 
+          - - FnCall:
                 - "simd_lt::<{type[4]}_t, int8x8_t>"
                 - - c
                   - FnCall: [transmute, ["{type[3]}"]]
             - FnCall:
                 - transmute
-                - - FnCall: 
+                - - FnCall:
                       - "vqtbx1"
                       - - "transmute(a)"
                         - FnCall:
@@ -12470,16 +12459,14 @@ intrinsics:
       - ['poly8x16_t', uint8x8_t, 'vqtbl1', 'poly8x8_t']
       - ['uint8x16_t', uint8x16_t, 'vqtbl1q', 'uint8x16_t']
       - ['poly8x16_t', uint8x16_t, 'vqtbl1q', 'poly8x16_t']
+    big_endian_inverse: false
     compose:
-      - Let:
-          - x
-          - FnCall:
-              - transmute
-              - - FnCall: 
-                    - '{type[2]}'
-                    - - FnCall: [transmute, ['a']]
-                      - b
-      - Identifier: [x, Symbol]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - '{type[2]}'
+                - - FnCall: [transmute, ['a']]
+                  - b
 
   - name: "vqtbl2{neon_type[3].no}"
     doc: "Table look-up"
@@ -12511,7 +12498,7 @@ intrinsics:
     compose:
       - FnCall:
           - transmute
-          - - FnCall: 
+          - - FnCall:
                 - '{type[2]}'
                 - - FnCall: [transmute, ['a.0']]
                   - FnCall: [transmute, ['a.1']]
@@ -12547,7 +12534,7 @@ intrinsics:
     compose:
       - FnCall:
           - transmute
-          - - FnCall: 
+          - - FnCall:
                 - '{type[3]}'
                 - - FnCall: [transmute, [a]]
                   - FnCall: [transmute, ['b.0']]
@@ -12584,7 +12571,7 @@ intrinsics:
     compose:
       - FnCall:
           - transmute
-          - - FnCall: 
+          - - FnCall:
                 - '{type[3]}'
                 - - FnCall: [transmute, ['a.0']]
                   - FnCall: [transmute, ['a.1']]
@@ -12621,7 +12608,7 @@ intrinsics:
     compose:
       - FnCall:
           - transmute
-          - - FnCall: 
+          - - FnCall:
                 - '{type[3]}'
                 - - FnCall: [transmute, [a]]
                   - FnCall: [transmute, ['b.0']]
@@ -12659,7 +12646,7 @@ intrinsics:
     compose:
       - FnCall:
           - transmute
-          - - FnCall: 
+          - - FnCall:
                 - '{type[2]}'
                 - - FnCall: [transmute, ['a.0']]
                   - FnCall: [transmute, ['a.1']]
@@ -12697,7 +12684,7 @@ intrinsics:
     compose:
       - FnCall:
           - transmute
-          - - FnCall: 
+          - - FnCall:
                 - '{type[3]}'
                 - - FnCall: [transmute, [a]]
                   - FnCall: [transmute, ['b.0']]
@@ -13204,7 +13191,7 @@ intrinsics:
     assert_instr: [addp]
     safety: safe
     types:
-      - [int32x2_t, i32] 
+      - [int32x2_t, i32]
     compose:
       - LLVMLink:
           name: "vaddv{neon_type[0].no}"
@@ -13259,7 +13246,7 @@ intrinsics:
     assert_instr: [addp]
     safety: safe
     types:
-      - [uint32x2_t, u32, i32] 
+      - [uint32x2_t, u32, i32]
     compose:
       - LLVMLink:
           name: "vaddv{neon_type[0].no}"
@@ -13335,7 +13322,7 @@ intrinsics:
     types:
       - [int8x8_t, i8, 'smaxv']
       - [int16x4_t, i16, 'smaxv']
-      - [int32x2_t, i32, 'smaxp'] 
+      - [int32x2_t, i32, 'smaxp']
       - [int8x16_t, i8, 'smaxv']
       - [int16x8_t, i16, 'smaxv']
       - [int32x4_t, i32, 'smaxv']
@@ -13357,7 +13344,7 @@ intrinsics:
     types:
       - [uint8x8_t, u8, 'umaxv']
       - [uint16x4_t, u16, 'umaxv']
-      - [uint32x2_t, u32, 'umaxp'] 
+      - [uint32x2_t, u32, 'umaxp']
       - [uint8x16_t, u8, 'umaxv']
       - [uint16x8_t, u16, 'umaxv']
       - [uint32x4_t, u32, 'umaxv']
@@ -13379,7 +13366,7 @@ intrinsics:
     types:
       - [float32x2_t, f32, 'fmaxp']
       - [float32x4_t, f32, 'fmaxv']
-      - [float64x2_t, f64, 'fmaxp'] 
+      - [float64x2_t, f64, 'fmaxp']
     compose:
       - LLVMLink:
           name: "vmaxv{neon_type[0].no}"
@@ -13398,7 +13385,7 @@ intrinsics:
     types:
       - [int8x8_t, i8, 'sminv']
       - [int16x4_t, i16, 'sminv']
-      - [int32x2_t, i32, 'sminp'] 
+      - [int32x2_t, i32, 'sminp']
       - [int8x16_t, i8, 'sminv']
       - [int16x8_t, i16, 'sminv']
       - [int32x4_t, i32, 'sminv']
@@ -13420,7 +13407,7 @@ intrinsics:
     types:
       - [uint8x8_t, u8, 'uminv']
       - [uint16x4_t, u16, 'uminv']
-      - [uint32x2_t, u32, 'uminp'] 
+      - [uint32x2_t, u32, 'uminp']
       - [uint8x16_t, u8, 'uminv']
       - [uint16x8_t, u16, 'uminv']
       - [uint32x4_t, u32, 'uminv']
@@ -13442,7 +13429,7 @@ intrinsics:
     types:
       - [float32x2_t, f32, 'fminp']
       - [float32x4_t, f32, 'fminv']
-      - [float64x2_t, f64, 'fminp'] 
+      - [float64x2_t, f64, 'fminp']
     compose:
       - LLVMLink:
           name: "vminv{neon_type[0].no}"
@@ -13498,7 +13485,7 @@ intrinsics:
     safety: safe
     types:
       - float32x4_t
-      - float64x2_t 
+      - float64x2_t
     compose:
       - LLVMLink:
           name: "vpmin{neon_type.no}"
@@ -13554,7 +13541,7 @@ intrinsics:
     safety: safe
     types:
       - float32x4_t
-      - float64x2_t 
+      - float64x2_t
     compose:
       - LLVMLink:
           name: "vpmax{neon_type.no}"
diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 118f5808f758f..07959cf380e8a 100644
--- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -11447,14 +11447,14 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
-      - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) }']
-      - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N as i32, N as i32]) }']
+      - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8; 8]) }']
+      - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16; 4]) }']
+      - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N; 2]) }']
       - [int64x1_t, uint64x1_t, '6', 'const { int64x1_t([N as i64]) }']
-      - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
-      - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16]) }']
-      - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) }']
-      - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64, N as i64]) }']
+      - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8; 16]) }']
+      - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16; 8]) }']
+      - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N; 4]) }']
+      - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64; 2]) }']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
       - LLVMLink:
@@ -11479,14 +11479,14 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
-      - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) }']
-      - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N as i32, N as i32]) }']
+      - [int8x8_t, uint8x8_t, '3', 'const { int8x8_t([N as i8; 8]) }']
+      - [int16x4_t, uint16x4_t, '4', 'const { int16x4_t([N as i16; 4]) }']
+      - [int32x2_t, uint32x2_t, '5', 'const { int32x2_t([N; 2]) }']
       - [int64x1_t, uint64x1_t, '6', 'const { int64x1_t([N as i64]) }']
-      - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8]) }']
-      - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16]) }']
-      - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) }']
-      - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64, N as i64]) }']
+      - [int8x16_t, uint8x16_t, '3', 'const { int8x16_t([N as i8; 16]) }']
+      - [int16x8_t, uint16x8_t, '4', 'const { int16x8_t([N as i16; 8]) }']
+      - [int32x4_t, uint32x4_t, '5', 'const { int32x4_t([N; 4]) }']
+      - [int64x2_t, uint64x2_t, '6', 'const { int64x2_t([N as i64; 2]) }']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
       - LLVMLink:
diff --git a/library/stdarch/josh-sync.toml b/library/stdarch/josh-sync.toml
new file mode 100644
index 0000000000000..ebdb4576287c8
--- /dev/null
+++ b/library/stdarch/josh-sync.toml
@@ -0,0 +1,3 @@
+org = "rust-lang"
+repo = "stdarch"
+path = "library/stdarch"
diff --git a/library/stdarch/rust-version b/library/stdarch/rust-version
new file mode 100644
index 0000000000000..5102178848e7f
--- /dev/null
+++ b/library/stdarch/rust-version
@@ -0,0 +1 @@
+040e2f8b9ff2d76fbe2146d6003e297ed4532088