diff --git a/crates/core_arch/src/aarch64/armclang.rs b/crates/core_arch/src/aarch64/armclang.rs new file mode 100644 index 0000000000..54847be7b1 --- /dev/null +++ b/crates/core_arch/src/aarch64/armclang.rs @@ -0,0 +1,23 @@ +//! ARM compiler specific intrinsics +//! +//! # References +//! +//! - [ARM Compiler v 6.10 - armclang Reference Guide][arm_comp_ref] +//! +//! [arm_comp_ref]: https://developer.arm.com/docs/100067/0610 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Inserts a breakpoint instruction. +/// +/// `VAL` is a compile-time constant integer in range `[0, 65535]`. +/// +/// The breakpoint instruction inserted is `BRK` on A64. +#[cfg_attr(test, assert_instr(brk, VAL = 0))] +#[inline(always)] +#[rustc_legacy_const_generics(0)] +pub unsafe fn __breakpoint() { + static_assert_imm16!(VAL); + asm!("brk {}", const VAL); +} diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs index f6d0fc9dbd..0411fc1068 100644 --- a/crates/core_arch/src/aarch64/mod.rs +++ b/crates/core_arch/src/aarch64/mod.rs @@ -21,7 +21,11 @@ pub use self::crc::*; mod prefetch; pub use self::prefetch::*; -pub use super::acle::*; +pub use super::arm_shared::*; + +mod armclang; + +pub use self::armclang::*; #[cfg(test)] use stdarch_test::assert_instr; diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index a925fbc504..71e6b83a63 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -10,7 +10,7 @@ pub use self::generated::*; // FIXME: replace neon with asimd use crate::{ - core_arch::{arm::*, simd::*, simd_llvm::*}, + core_arch::{arm_shared::*, simd::*, simd_llvm::*}, hint::unreachable_unchecked, mem::{transmute, zeroed}, }; @@ -2812,7 +2812,7 @@ pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x #[cfg(test)] mod tests { use crate::core_arch::aarch64::test_support::*; - use crate::core_arch::arm::test_support::*; + use crate::core_arch::arm_shared::test_support::*; use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*}; use std::mem::transmute; use stdarch_test::simd_test; @@ -4261,13 +4261,13 @@ mod tests { #[cfg(test)] #[cfg(target_endian = "little")] -#[path = "../../arm/neon/table_lookup_tests.rs"] +#[path = "../../arm_shared/neon/table_lookup_tests.rs"] mod table_lookup_tests; #[cfg(test)] -#[path = "../../arm/neon/shift_and_insert_tests.rs"] +#[path = "../../arm_shared/neon/shift_and_insert_tests.rs"] mod shift_and_insert_tests; #[cfg(test)] -#[path = "../../arm/neon/load_tests.rs"] +#[path = "../../arm_shared/neon/load_tests.rs"] mod load_tests; diff --git a/crates/core_arch/src/aarch64/test_support.rs b/crates/core_arch/src/aarch64/test_support.rs index e08c39a545..9c5994b150 100644 --- a/crates/core_arch/src/aarch64/test_support.rs +++ b/crates/core_arch/src/aarch64/test_support.rs @@ -1,4 +1,4 @@ -use crate::core_arch::{aarch64::neon::*, arm::*, simd::*}; +use crate::core_arch::{aarch64::neon::*, arm_shared::*, simd::*}; use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; macro_rules! V_u64 { diff --git a/crates/core_arch/src/arm/armclang.rs b/crates/core_arch/src/arm/armclang.rs index aa4bab49f1..6b332e82c0 100644 --- a/crates/core_arch/src/arm/armclang.rs +++ b/crates/core_arch/src/arm/armclang.rs @@ -9,20 +9,6 @@ #[cfg(test)] use stdarch_test::assert_instr; -/// Inserts a breakpoint instruction. -/// -/// `VAL` is a compile-time constant integer in range `[0, 65535]`. -/// -/// The breakpoint instruction inserted is `BRK` on A64. -#[cfg(all(target_arch = "aarch64", not(doc)))] -#[cfg_attr(test, assert_instr(brk, VAL = 0))] -#[inline(always)] -#[rustc_legacy_const_generics(0)] -pub unsafe fn __breakpoint() { - static_assert_imm16!(VAL); - asm!("brk {}", const VAL); -} - /// Inserts a breakpoint instruction. /// /// `VAL` is a compile-time constant integer in range `[0, 255]`. @@ -40,8 +26,6 @@ pub unsafe fn __breakpoint() { /// The current implementation only accepts values in range `[0, 255]`. /// /// [arm_docs]: https://developer.arm.com/docs/100067/latest/compiler-specific-intrinsics/__breakpoint-intrinsic -#[cfg(any(target_arch = "arm", doc))] -#[doc(cfg(target_arch = "arm"))] #[cfg_attr(test, assert_instr(bkpt, VAL = 0))] #[inline(always)] #[rustc_legacy_const_generics(0)] diff --git a/crates/core_arch/src/acle/dsp.rs b/crates/core_arch/src/arm/dsp.rs similarity index 100% rename from crates/core_arch/src/acle/dsp.rs rename to crates/core_arch/src/arm/dsp.rs diff --git a/crates/core_arch/src/acle/ex.rs b/crates/core_arch/src/arm/ex.rs similarity index 95% rename from crates/core_arch/src/acle/ex.rs rename to crates/core_arch/src/arm/ex.rs index 0426c65186..b9d5047a05 100644 --- a/crates/core_arch/src/acle/ex.rs +++ b/crates/core_arch/src/arm/ex.rs @@ -8,6 +8,7 @@ #[cfg(any( all(target_feature = "v6k", not(target_feature = "mclass")), // excludes v6-M all(target_feature = "v7", target_feature = "mclass"), // v7-M + doc ))] pub unsafe fn __clrex() { extern "C" { @@ -21,9 +22,10 @@ pub unsafe fn __clrex() { /// Executes a exclusive LDR instruction for 8 bit value. // Supported: v6K, v7-M, v7-A, v7-R // Not supported: v5, v6, v6-M -#[cfg( +#[cfg(any( target_feature = "v6k", // includes v7-M but excludes v6-M -)] + doc +))] pub unsafe fn __ldrexb(p: *const u8) -> u8 { extern "C" { #[link_name = "llvm.arm.ldrex.p0i8"] @@ -36,9 +38,10 @@ pub unsafe fn __ldrexb(p: *const u8) -> u8 { /// Executes a exclusive LDR instruction for 16 bit value. // Supported: v6K, v7-M, v7-A, v7-R, v8 // Not supported: v5, v6, v6-M -#[cfg( +#[cfg(any( target_feature = "v6k", // includes v7-M but excludes v6-M -)] + doc +))] pub unsafe fn __ldrexh(p: *const u16) -> u16 { extern "C" { #[link_name = "llvm.arm.ldrex.p0i16"] @@ -54,6 +57,7 @@ pub unsafe fn __ldrexh(p: *const u16) -> u16 { #[cfg(any( all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M all(target_feature = "v7", target_feature = "mclass"), // v7-M + doc ))] pub unsafe fn __ldrex(p: *const u32) -> u32 { extern "C" { @@ -69,9 +73,10 @@ pub unsafe fn __ldrex(p: *const u32) -> u32 { /// Returns `0` if the operation succeeded, or `1` if it failed // supported: v6K, v7-M, v7-A, v7-R // Not supported: v5, v6, v6-M -#[cfg( +#[cfg(any( target_feature = "v6k", // includes v7-M but excludes v6-M -)] + doc +))] pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 { extern "C" { #[link_name = "llvm.arm.strex.p0i8"] @@ -86,9 +91,11 @@ pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 { /// Returns `0` if the operation succeeded, or `1` if it failed // Supported: v6K, v7-M, v7-A, v7-R, v8 // Not supported: v5, v6, v6-M -#[cfg( +#[cfg(target_feature = "aarch64")] +#[cfg(any( target_feature = "v6k", // includes v7-M but excludes v6-M -)] + doc +))] pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 { extern "C" { #[link_name = "llvm.arm.strex.p0i16"] @@ -106,6 +113,7 @@ pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 { #[cfg(any( all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M all(target_feature = "v7", target_feature = "mclass"), // v7-M + doc ))] pub unsafe fn __strex(value: u32, addr: *mut u32) -> u32 { extern "C" { diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index fd0cb2cf8d..d6b12b8292 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -5,40 +5,81 @@ //! //! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf //! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics -#![allow(non_camel_case_types)] mod armclang; - pub use self::armclang::*; mod v6; pub use self::v6::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -mod v7; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -pub use self::v7::*; +// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT) +#[cfg(any(target_feature = "v6", doc))] +mod sat; + +#[cfg(any(target_feature = "v6", doc))] +pub use self::sat::*; + +// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) +// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see +// section 5.4.7) +// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on +// '+v5te' rather than on '+dsp' +#[cfg(any( + // >= v5TE but excludes v7-M + all(target_feature = "v5te", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + doc, +))] +pub mod dsp; + +#[cfg(any( + // >= v5TE but excludes v7-M + all(target_feature = "v5te", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + doc, +))] +pub use self::dsp::*; + +// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says +// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated +#[cfg(any( + // v7-A, v7-R + all(target_feature = "v6", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + doc, +))] +mod simd32; -#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] -mod neon; -#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] -pub use self::neon::*; +#[cfg(any( + // v7-A, v7-R + all(target_feature = "v6", not(target_feature = "mclass")), + // v7E-M + all(target_feature = "mclass", target_feature = "dsp"), + doc, +))] +pub use self::simd32::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -mod crc; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -pub use self::crc::*; +#[cfg(any(target_feature = "v7", doc))] +mod v7; +#[cfg(any(target_feature = "v7", doc))] +pub use self::v7::*; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -mod crypto; -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -pub use self::crypto::*; +mod ex; +pub use self::ex::*; -pub use crate::core_arch::acle::*; +pub use crate::core_arch::arm_shared::*; #[cfg(test)] use stdarch_test::assert_instr; +#[cfg(any(target_feature = "v7", doc))] +pub(crate) mod neon; +#[cfg(any(target_feature = "v7", doc))] +pub use neon::*; + /// Generates the trap instruction `UDF` #[cfg(target_arch = "arm")] #[cfg_attr(test, assert_instr(udf))] @@ -47,6 +88,26 @@ pub unsafe fn udf() -> ! { crate::intrinsics::abort() } -#[cfg(test)] -#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] -pub(crate) mod test_support; +/// Generates a DBG instruction. +/// +/// This provides a hint to debugging and related systems. The argument must be +/// a constant integer from 0 to 15 inclusive. See implementation documentation +/// for the effect (if any) of this instruction and the meaning of the +/// argument. This is available only when compliling for AArch32. +// Section 10.1 of ACLE says that the supported arches are: 7, 7-M +// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and +// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A +// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 +// "Thumb instruction set support" +#[cfg(any(target_feature = "v7", doc))] +#[inline(always)] +#[rustc_legacy_const_generics(0)] +pub unsafe fn __dbg() { + static_assert_imm4!(IMM4); + dbg(IMM4); +} + +extern "C" { + #[link_name = "llvm.arm.dbg"] + fn dbg(_: i32); +} diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs new file mode 100644 index 0000000000..6bb1d0bfd4 --- /dev/null +++ b/crates/core_arch/src/arm/neon.rs @@ -0,0 +1,1091 @@ +use crate::core_arch::arm_shared::neon::*; +use crate::core_arch::simd::{f32x4, i32x4, u32x4}; +use crate::core_arch::simd_llvm::*; +use crate::mem::transmute; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(non_camel_case_types)] +pub(crate) type p8 = u8; +#[allow(non_camel_case_types)] +pub(crate) type p16 = u16; + +use crate::mem::align_of; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.arm.neon.vbsl.v8i8"] + fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vbsl.v16i8"] + fn vbslq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + #[link_name = "llvm.arm.neon.vpadals.v4i16.v8i8"] + pub(crate) fn vpadal_s8_(a: int16x4_t, b: int8x8_t) -> int16x4_t; + #[link_name = "llvm.arm.neon.vpadals.v2i32.v4i16"] + pub(crate) fn vpadal_s16_(a: int32x2_t, b: int16x4_t) -> int32x2_t; + #[link_name = "llvm.arm.neon.vpadals.v1i64.v2i32"] + pub(crate) fn vpadal_s32_(a: int64x1_t, b: int32x2_t) -> int64x1_t; + #[link_name = "llvm.arm.neon.vpadals.v8i16.v16i8"] + pub(crate) fn vpadalq_s8_(a: int16x8_t, b: int8x16_t) -> int16x8_t; + #[link_name = "llvm.arm.neon.vpadals.v4i32.v8i16"] + pub(crate) fn vpadalq_s16_(a: int32x4_t, b: int16x8_t) -> int32x4_t; + #[link_name = "llvm.arm.neon.vpadals.v2i64.v4i32"] + pub(crate) fn vpadalq_s32_(a: int64x2_t, b: int32x4_t) -> int64x2_t; + + #[link_name = "llvm.arm.neon.vpadalu.v4i16.v8i8"] + pub(crate) fn vpadal_u8_(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t; + #[link_name = "llvm.arm.neon.vpadalu.v2i32.v4i16"] + pub(crate) fn vpadal_u16_(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t; + #[link_name = "llvm.arm.neon.vpadalu.v1i64.v2i32"] + pub(crate) fn vpadal_u32_(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t; + #[link_name = "llvm.arm.neon.vpadalu.v8i16.v16i8"] + pub(crate) fn vpadalq_u8_(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t; + #[link_name = "llvm.arm.neon.vpadalu.v4i32.v8i16"] + pub(crate) fn vpadalq_u16_(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t; + #[link_name = "llvm.arm.neon.vpadalu.v2i64.v4i32"] + pub(crate) fn vpadalq_u32_(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t; + + #[link_name = "llvm.arm.neon.vtbl1"] + fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbl2"] + fn vtbl2(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbl3"] + fn vtbl3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbl4"] + fn vtbl4(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; + + #[link_name = "llvm.arm.neon.vtbx1"] + fn vtbx1(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbx2"] + fn vtbx2(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbx3"] + fn vtbx3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbx4"] + fn vtbx4( + a: int8x8_t, + b: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + e: int8x8_t, + ) -> int8x8_t; + + #[link_name = "llvm.arm.neon.vshiftins.v8i8"] + fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, shift: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vshiftins.v16i8"] + fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, shift: int8x16_t) -> int8x16_t; + #[link_name = "llvm.arm.neon.vshiftins.v4i16"] + fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, shift: int16x4_t) -> int16x4_t; + #[link_name = "llvm.arm.neon.vshiftins.v8i16"] + fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, shift: int16x8_t) -> int16x8_t; + #[link_name = "llvm.arm.neon.vshiftins.v2i32"] + fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, shift: int32x2_t) -> int32x2_t; + #[link_name = "llvm.arm.neon.vshiftins.v4i32"] + fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, shift: int32x4_t) -> int32x4_t; + #[link_name = "llvm.arm.neon.vshiftins.v1i64"] + fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, shift: int64x1_t) -> int64x1_t; + #[link_name = "llvm.arm.neon.vshiftins.v2i64"] + fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, shift: int64x2_t) -> int64x2_t; + + #[link_name = "llvm.arm.neon.vld1.v8i8.p0i8"] + fn vld1_v8i8(addr: *const i8, align: i32) -> int8x8_t; + #[link_name = "llvm.arm.neon.vld1.v16i8.p0i8"] + fn vld1q_v16i8(addr: *const i8, align: i32) -> int8x16_t; + #[link_name = "llvm.arm.neon.vld1.v4i16.p0i8"] + fn vld1_v4i16(addr: *const i8, align: i32) -> int16x4_t; + #[link_name = "llvm.arm.neon.vld1.v8i16.p0i8"] + fn vld1q_v8i16(addr: *const i8, align: i32) -> int16x8_t; + #[link_name = "llvm.arm.neon.vld1.v2i32.p0i8"] + fn vld1_v2i32(addr: *const i8, align: i32) -> int32x2_t; + #[link_name = "llvm.arm.neon.vld1.v4i32.p0i8"] + fn vld1q_v4i32(addr: *const i8, align: i32) -> int32x4_t; + #[link_name = "llvm.arm.neon.vld1.v1i64.p0i8"] + fn vld1_v1i64(addr: *const i8, align: i32) -> int64x1_t; + #[link_name = "llvm.arm.neon.vld1.v2i64.p0i8"] + fn vld1q_v2i64(addr: *const i8, align: i32) -> int64x2_t; + #[link_name = "llvm.arm.neon.vld1.v2f32.p0i8"] + fn vld1_v2f32(addr: *const i8, align: i32) -> float32x2_t; + #[link_name = "llvm.arm.neon.vld1.v4f32.p0i8"] + fn vld1q_v4f32(addr: *const i8, align: i32) -> float32x4_t; +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { + vld1_v8i8(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { + vld1q_v16i8(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { + vld1_v4i16(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { + vld1q_v8i16(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vldr))] +pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { + vld1_v2i32(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.32"))] +pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { + vld1q_v4i32(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vldr))] +pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { + vld1_v1i64(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.64"))] +pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { + vld1q_v2i64(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { + transmute(vld1_v8i8(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { + transmute(vld1q_v16i8(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { + transmute(vld1_v4i16(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { + transmute(vld1q_v8i16(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vldr))] +pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { + transmute(vld1_v2i32(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.32"))] +pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { + transmute(vld1q_v4i32(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vldr))] +pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { + transmute(vld1_v1i64(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.64"))] +pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { + transmute(vld1q_v2i64(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { + transmute(vld1_v8i8(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.8"))] +pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { + transmute(vld1q_v16i8(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { + transmute(vld1_v4i16(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.16"))] +pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { + transmute(vld1q_v8i16(ptr as *const i8, align_of::() as i32)) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vldr))] +pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { + vld1_v2f32(ptr as *const i8, align_of::() as i32) +} + +/// Load multiple single-element structures to one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vld1.32"))] +pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { + vld1q_v4f32(ptr as *const i8, align_of::() as i32) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vtbl1(a, b) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl1(transmute(a), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl1(transmute(a), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { + vtbl2(a.0, a.1, b) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { + vtbl3(a.0, a.1, a.2, b) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { + vtbl4(a.0, a.1, a.2, a.3, b) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) +} + +/// Table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + vtbx1(a, b, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx1(transmute(a), transmute(b), transmute(c))) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx1(transmute(a), transmute(b), transmute(c))) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { + vtbx2(a, b.0, b.1, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { + vtbx3(a, b.0, b.1, b.2, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + vtbx4(a, b.0, b.1, b.2, b.3, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) +} + +// These float-to-int implementations have undefined behaviour when `a` overflows +// the destination type. Clang has the same problem: https://llvm.org/PR47510 + +/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector) +#[inline] +#[target_feature(enable = "neon")] +#[target_feature(enable = "v7")] +#[cfg_attr(test, assert_instr("vcvt.s32.f32"))] +pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { + transmute(simd_cast::<_, i32x4>(transmute::<_, f32x4>(a))) +} + +/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector) +#[inline] +#[target_feature(enable = "neon")] +#[target_feature(enable = "v7")] +#[cfg_attr(test, assert_instr("vcvt.u32.f32"))] +pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { + transmute(simd_cast::<_, u32x4>(transmute::<_, f32x4>(a))) +} + +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + let n = N as i8; + vshiftins_v8i8(a, b, int8x8_t(n, n, n, n, n, n, n, n)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_imm3!(N); + let n = N as i8; + vshiftins_v16i8( + a, + b, + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + ) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_imm4!(N); + let n = N as i16; + vshiftins_v4i16(a, b, int16x4_t(n, n, n, n)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_imm4!(N); + let n = N as i16; + vshiftins_v8i16(a, b, int16x8_t(n, n, n, n, n, n, n, n)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N: i32 where N >= 0 && N <= 31); + vshiftins_v2i32(a, b, int32x2_t(N, N)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N: i32 where N >= 0 && N <= 31); + vshiftins_v4i32(a, b, int32x4_t(N, N, N, N)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where 0 <= N && N <= 63); + vshiftins_v1i64(a, b, int64x1_t(N as i64)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where 0 <= N && N <= 63); + vshiftins_v2i64(a, b, int64x2_t(N as i64, N as i64)) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + let n = N as i8; + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + let n = N as i8; + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + let n = N as i16; + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t(n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + let n = N as i16; + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N: i32 where N >= 0 && N <= 31); + transmute(vshiftins_v2i32(transmute(a), transmute(b), int32x2_t(N, N))) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N: i32 where N >= 0 && N <= 31); + transmute(vshiftins_v4i32( + transmute(a), + transmute(b), + int32x4_t(N, N, N, N), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where 0 <= N && N <= 63); + transmute(vshiftins_v1i64( + transmute(a), + transmute(b), + int64x1_t(N as i64), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where 0 <= N && N <= 63); + transmute(vshiftins_v2i64( + transmute(a), + transmute(b), + int64x2_t(N as i64, N as i64), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_imm3!(N); + let n = N as i8; + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_imm3!(N); + let n = N as i8; + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_imm4!(N); + let n = N as i16; + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t(n, n, n, n), + )) +} +/// Shift Left and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsli.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_imm4!(N); + let n = N as i16; + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t(n, n, n, n, n, n, n, n), + )) +} + +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + vshiftins_v8i8(a, b, int8x8_t(n, n, n, n, n, n, n, n)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + vshiftins_v16i8( + a, + b, + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + ) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + vshiftins_v4i16(a, b, int16x4_t(n, n, n, n)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + vshiftins_v8i16(a, b, int16x8_t(n, n, n, n, n, n, n, n)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where 1 <= N && N <= 32); + vshiftins_v2i32(a, b, int32x2_t(-N, -N)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where 1 <= N && N <= 32); + vshiftins_v4i32(a, b, int32x4_t(-N, -N, -N, -N)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where 1 <= N && N <= 64); + vshiftins_v1i64(a, b, int64x1_t(-N as i64)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where 1 <= N && N <= 64); + vshiftins_v2i64(a, b, int64x2_t(-N as i64, -N as i64)) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t(n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where 1 <= N && N <= 32); + transmute(vshiftins_v2i32( + transmute(a), + transmute(b), + int32x2_t(-N, -N), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.32", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where 1 <= N && N <= 32); + transmute(vshiftins_v4i32( + transmute(a), + transmute(b), + int32x4_t(-N, -N, -N, -N), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where 1 <= N && N <= 64); + transmute(vshiftins_v1i64( + transmute(a), + transmute(b), + int64x1_t(-N as i64), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.64", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where 1 <= N && N <= 64); + transmute(vshiftins_v2i64( + transmute(a), + transmute(b), + int64x2_t(-N as i64, -N as i64), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + transmute(vshiftins_v8i8( + transmute(a), + transmute(b), + int8x8_t(n, n, n, n, n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.8", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert!(N : i32 where 1 <= N && N <= 8); + let n = -N as i8; + transmute(vshiftins_v16i8( + transmute(a), + transmute(b), + int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + transmute(vshiftins_v4i16( + transmute(a), + transmute(b), + int16x4_t(n, n, n, n), + )) +} +/// Shift Right and Insert (immediate) +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr("vsri.16", N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert!(N : i32 where 1 <= N && N <= 16); + let n = -N as i16; + transmute(vshiftins_v8i16( + transmute(a), + transmute(b), + int16x8_t(n, n, n, n, n, n, n, n), + )) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core_arch::{arm::*, simd::*}; + use crate::mem::transmute; + use stdarch_test::simd_test; + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_s32_f32() { + let f = f32x4::new(-1., 2., 3., 4.); + let e = i32x4::new(-1, 2, 3, 4); + let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_u32_f32() { + let f = f32x4::new(1., 2., 3., 4.); + let e = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f))); + assert_eq!(r, e); + } +} diff --git a/crates/core_arch/src/acle/sat.rs b/crates/core_arch/src/arm/sat.rs similarity index 100% rename from crates/core_arch/src/acle/sat.rs rename to crates/core_arch/src/arm/sat.rs diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/arm/simd32.rs similarity index 99% rename from crates/core_arch/src/acle/simd32.rs rename to crates/core_arch/src/arm/simd32.rs index 04183d4b88..5cae2fc2aa 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/arm/simd32.rs @@ -65,7 +65,7 @@ #[cfg(test)] use stdarch_test::assert_instr; -use crate::{core_arch::acle::dsp::int16x2_t, mem::transmute}; +use crate::{core_arch::arm::dsp::int16x2_t, mem::transmute}; types! { /// ARM-specific 32-bit wide vector of four packed `i8`. diff --git a/crates/core_arch/src/acle/barrier/common.rs b/crates/core_arch/src/arm_shared/barrier/common.rs similarity index 100% rename from crates/core_arch/src/acle/barrier/common.rs rename to crates/core_arch/src/arm_shared/barrier/common.rs diff --git a/crates/core_arch/src/acle/barrier/cp15.rs b/crates/core_arch/src/arm_shared/barrier/cp15.rs similarity index 100% rename from crates/core_arch/src/acle/barrier/cp15.rs rename to crates/core_arch/src/arm_shared/barrier/cp15.rs diff --git a/crates/core_arch/src/acle/barrier/mod.rs b/crates/core_arch/src/arm_shared/barrier/mod.rs similarity index 100% rename from crates/core_arch/src/acle/barrier/mod.rs rename to crates/core_arch/src/arm_shared/barrier/mod.rs diff --git a/crates/core_arch/src/acle/barrier/not_mclass.rs b/crates/core_arch/src/arm_shared/barrier/not_mclass.rs similarity index 100% rename from crates/core_arch/src/acle/barrier/not_mclass.rs rename to crates/core_arch/src/arm_shared/barrier/not_mclass.rs diff --git a/crates/core_arch/src/acle/barrier/v8.rs b/crates/core_arch/src/arm_shared/barrier/v8.rs similarity index 100% rename from crates/core_arch/src/acle/barrier/v8.rs rename to crates/core_arch/src/arm_shared/barrier/v8.rs diff --git a/crates/core_arch/src/arm/crc.rs b/crates/core_arch/src/arm_shared/crc.rs similarity index 98% rename from crates/core_arch/src/arm/crc.rs rename to crates/core_arch/src/arm_shared/crc.rs index ffce20fe22..b1cfbb381b 100644 --- a/crates/core_arch/src/arm/crc.rs +++ b/crates/core_arch/src/arm_shared/crc.rs @@ -79,7 +79,7 @@ pub unsafe fn __crc32cw(crc: u32, data: u32) -> u32 { #[cfg(test)] mod tests { - use crate::core_arch::{arm::*, simd::*}; + use crate::core_arch::{arm_shared::*, simd::*}; use std::mem; use stdarch_test::simd_test; diff --git a/crates/core_arch/src/arm/crypto.rs b/crates/core_arch/src/arm_shared/crypto.rs similarity index 99% rename from crates/core_arch/src/arm/crypto.rs rename to crates/core_arch/src/arm_shared/crypto.rs index 8361e39646..b4d5b2978f 100644 --- a/crates/core_arch/src/arm/crypto.rs +++ b/crates/core_arch/src/arm_shared/crypto.rs @@ -1,4 +1,4 @@ -use crate::core_arch::arm::{uint32x4_t, uint8x16_t}; +use crate::core_arch::arm_shared::{uint32x4_t, uint8x16_t}; #[allow(improper_ctypes)] extern "C" { @@ -191,7 +191,8 @@ pub unsafe fn vsha256su1q_u32( #[cfg(test)] mod tests { - use crate::core_arch::{arm::*, simd::*}; + use super::*; + use crate::core_arch::{arm_shared::*, simd::*}; use std::mem; use stdarch_test::simd_test; diff --git a/crates/core_arch/src/acle/hints.rs b/crates/core_arch/src/arm_shared/hints.rs similarity index 69% rename from crates/core_arch/src/acle/hints.rs rename to crates/core_arch/src/arm_shared/hints.rs index 280aa00cf8..3145cde8d5 100644 --- a/crates/core_arch/src/acle/hints.rs +++ b/crates/core_arch/src/arm_shared/hints.rs @@ -9,7 +9,7 @@ /// low-power state until one of a number of asynchronous events occurs. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] #[inline(always)] pub unsafe fn __wfi() { hint(HINT_WFI); @@ -22,7 +22,7 @@ pub unsafe fn __wfi() { /// another processor. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] #[inline(always)] pub unsafe fn __wfe() { hint(HINT_WFE); @@ -34,7 +34,7 @@ pub unsafe fn __wfe() { /// system. It is a NOP on a uniprocessor system. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] #[inline(always)] pub unsafe fn __sev() { hint(HINT_SEV); @@ -49,6 +49,7 @@ pub unsafe fn __sev() { #[cfg(any( target_feature = "v8", // 32-bit ARMv8 target_arch = "aarch64", // AArch64 + doc, ))] #[inline(always)] pub unsafe fn __sevl() { @@ -62,33 +63,12 @@ pub unsafe fn __sevl() { /// improve overall system performance. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" -#[cfg(any(target_feature = "v6", target_arch = "aarch64"))] +#[cfg(any(target_feature = "v6", target_arch = "aarch64", doc))] #[inline(always)] pub unsafe fn __yield() { hint(HINT_YIELD); } -/// Generates a DBG instruction. -/// -/// This provides a hint to debugging and related systems. The argument must be -/// a constant integer from 0 to 15 inclusive. See implementation documentation -/// for the effect (if any) of this instruction and the meaning of the -/// argument. This is available only when compliling for AArch32. -// Section 10.1 of ACLE says that the supported arches are: 7, 7-M -// "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and -// executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A -// and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 -// "Thumb instruction set support" -#[cfg(target_feature = "v7")] -#[cfg(any(target_arch = "arm", doc))] -#[doc(cfg(target_arch = "arm"))] -#[inline(always)] -#[rustc_legacy_const_generics(0)] -pub unsafe fn __dbg() { - static_assert_imm4!(IMM4); - dbg(IMM4); -} - /// Generates an unspecified no-op instruction. /// /// Note that not all architectures provide a distinguished NOP instruction. On @@ -104,10 +84,6 @@ extern "C" { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")] fn hint(_: i32); - - #[cfg(target_arch = "arm")] - #[link_name = "llvm.arm.dbg"] - fn dbg(_: i32); } // from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td diff --git a/crates/core_arch/src/acle/mod.rs b/crates/core_arch/src/arm_shared/mod.rs similarity index 65% rename from crates/core_arch/src/acle/mod.rs rename to crates/core_arch/src/arm_shared/mod.rs index 5f29decf5a..4c8d19854e 100644 --- a/crates/core_arch/src/acle/mod.rs +++ b/crates/core_arch/src/arm_shared/mod.rs @@ -47,6 +47,9 @@ //! //! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest) +// Only for 'neon' submodule +#![allow(non_camel_case_types)] + // 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported // via CP15 instructions. See Section 10.1 of ACLE mod barrier; @@ -54,70 +57,29 @@ mod barrier; pub use self::barrier::*; mod hints; - pub use self::hints::*; mod registers; - pub use self::registers::*; -mod ex; - -pub use self::ex::*; - -// Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) -// We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see -// section 5.4.7) -// Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on -// '+v5te' rather than on '+dsp' -#[cfg(all( - not(target_arch = "aarch64"), - any( - // >= v5TE but excludes v7-M - all(target_feature = "v5te", not(target_feature = "mclass")), - // v7E-M - all(target_feature = "mclass", target_feature = "dsp"), - ) -))] -mod dsp; - -#[cfg(all( - not(target_arch = "aarch64"), - any( - all(target_feature = "v5te", not(target_feature = "mclass")), - all(target_feature = "mclass", target_feature = "dsp"), - ) -))] -pub use self::dsp::*; - -// Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT) -#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] -mod sat; - -#[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] -pub use self::sat::*; - -// Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says -// Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated -#[cfg(all( - not(target_arch = "aarch64"), - any( - // v7-A, v7-R - all(target_feature = "v6", not(target_feature = "mclass")), - // v7E-M - all(target_feature = "mclass", target_feature = "dsp") - ) -))] -mod simd32; - -#[cfg(all( - not(target_arch = "aarch64"), - any( - all(target_feature = "v6", not(target_feature = "mclass")), - all(target_feature = "mclass", target_feature = "dsp") - ) -))] -pub use self::simd32::*; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +mod crc; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +pub use crc::*; + +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +mod crypto; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +pub use self::crypto::*; + +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +pub(crate) mod neon; +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +pub use self::neon::*; + +#[cfg(test)] +#[cfg(any(target_arch = "aarch64", target_feature = "v7", doc))] +pub(crate) mod test_support; mod sealed { pub trait Dmb { diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs similarity index 100% rename from crates/core_arch/src/arm/neon/generated.rs rename to crates/core_arch/src/arm_shared/neon/generated.rs diff --git a/crates/core_arch/src/arm/neon/load_tests.rs b/crates/core_arch/src/arm_shared/neon/load_tests.rs similarity index 100% rename from crates/core_arch/src/arm/neon/load_tests.rs rename to crates/core_arch/src/arm_shared/neon/load_tests.rs diff --git a/crates/core_arch/src/arm/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs similarity index 88% rename from crates/core_arch/src/arm/neon/mod.rs rename to crates/core_arch/src/arm_shared/neon/mod.rs index 02a58c1e87..3c87862166 100644 --- a/crates/core_arch/src/arm/neon/mod.rs +++ b/crates/core_arch/src/arm_shared/neon/mod.rs @@ -5,8 +5,6 @@ mod generated; #[rustfmt::skip] pub use self::generated::*; -#[cfg(target_arch = "arm")] -use crate::mem::align_of; use crate::{ convert::TryInto, core_arch::simd::*, core_arch::simd_llvm::*, hint::unreachable_unchecked, mem::transmute, @@ -21,67 +19,67 @@ pub(crate) type p128 = u128; types! { /// ARM-specific 64-bit wide vector of eight packed `i8`. - pub struct int8x8_t(i8, i8, i8, i8, i8, i8, i8, i8); + pub struct int8x8_t(pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8); /// ARM-specific 64-bit wide vector of eight packed `u8`. - pub struct uint8x8_t(u8, u8, u8, u8, u8, u8, u8, u8); + pub struct uint8x8_t(pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8); /// ARM-specific 64-bit wide polynomial vector of eight packed `p8`. - pub struct poly8x8_t(p8, p8, p8, p8, p8, p8, p8, p8); + pub struct poly8x8_t(pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8); /// ARM-specific 64-bit wide vector of four packed `i16`. - pub struct int16x4_t(i16, i16, i16, i16); + pub struct int16x4_t(pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16); /// ARM-specific 64-bit wide vector of four packed `u16`. - pub struct uint16x4_t(u16, u16, u16, u16); + pub struct uint16x4_t(pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16); // FIXME: ARM-specific 64-bit wide vector of four packed `f16`. // pub struct float16x4_t(f16, f16, f16, f16); /// ARM-specific 64-bit wide vector of four packed `p16`. - pub struct poly16x4_t(p16, p16, p16, p16); + pub struct poly16x4_t(pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16); /// ARM-specific 64-bit wide vector of two packed `i32`. - pub struct int32x2_t(i32, i32); + pub struct int32x2_t(pub(crate) i32, pub(crate) i32); /// ARM-specific 64-bit wide vector of two packed `u32`. - pub struct uint32x2_t(u32, u32); + pub struct uint32x2_t(pub(crate) u32, pub(crate) u32); /// ARM-specific 64-bit wide vector of two packed `f32`. - pub struct float32x2_t(f32, f32); + pub struct float32x2_t(pub(crate) f32, pub(crate) f32); /// ARM-specific 64-bit wide vector of one packed `i64`. - pub struct int64x1_t(i64); + pub struct int64x1_t(pub(crate) i64); /// ARM-specific 64-bit wide vector of one packed `u64`. - pub struct uint64x1_t(u64); + pub struct uint64x1_t(pub(crate) u64); /// ARM-specific 64-bit wide vector of one packed `p64`. - pub struct poly64x1_t(p64); + pub struct poly64x1_t(pub(crate) p64); /// ARM-specific 128-bit wide vector of sixteen packed `i8`. pub struct int8x16_t( - i8, i8, i8, i8, i8, i8 ,i8, i8, - i8, i8, i8, i8, i8, i8 ,i8, i8, + pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8 , pub(crate) i8, pub(crate) i8, + pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8 , pub(crate) i8, pub(crate) i8, ); /// ARM-specific 128-bit wide vector of sixteen packed `u8`. pub struct uint8x16_t( - u8, u8 ,u8, u8, u8, u8 ,u8, u8, - u8, u8 ,u8, u8, u8, u8 ,u8, u8, + pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, + pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, ); /// ARM-specific 128-bit wide vector of sixteen packed `p8`. pub struct poly8x16_t( - p8, p8, p8, p8, p8, p8, p8, p8, - p8, p8, p8, p8, p8, p8, p8, p8, + pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, + pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, ); /// ARM-specific 128-bit wide vector of eight packed `i16`. - pub struct int16x8_t(i16, i16, i16, i16, i16, i16, i16, i16); + pub struct int16x8_t(pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16); /// ARM-specific 128-bit wide vector of eight packed `u16`. - pub struct uint16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); + pub struct uint16x8_t(pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16); // FIXME: ARM-specific 128-bit wide vector of eight packed `f16`. // pub struct float16x8_t(f16, f16, f16, f16, f16, f16, f16); /// ARM-specific 128-bit wide vector of eight packed `p16`. - pub struct poly16x8_t(p16, p16, p16, p16, p16, p16, p16, p16); + pub struct poly16x8_t(pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16); /// ARM-specific 128-bit wide vector of four packed `i32`. - pub struct int32x4_t(i32, i32, i32, i32); + pub struct int32x4_t(pub(crate) i32, pub(crate) i32, pub(crate) i32, pub(crate) i32); /// ARM-specific 128-bit wide vector of four packed `u32`. - pub struct uint32x4_t(u32, u32, u32, u32); + pub struct uint32x4_t(pub(crate) u32, pub(crate) u32, pub(crate) u32, pub(crate) u32); /// ARM-specific 128-bit wide vector of four packed `f32`. - pub struct float32x4_t(f32, f32, f32, f32); + pub struct float32x4_t(pub(crate) f32, pub(crate) f32, pub(crate) f32, pub(crate) f32); /// ARM-specific 128-bit wide vector of two packed `i64`. - pub struct int64x2_t(i64, i64); + pub struct int64x2_t(pub(crate) i64, pub(crate) i64); /// ARM-specific 128-bit wide vector of two packed `u64`. - pub struct uint64x2_t(u64, u64); + pub struct uint64x2_t(pub(crate) u64, pub(crate) u64); /// ARM-specific 128-bit wide vector of two packed `p64`. - pub struct poly64x2_t(p64, p64); + pub struct poly64x2_t(pub(crate) p64, pub(crate) p64); } /// ARM-specific type containing two `int8x8_t` vectors. @@ -214,74 +212,74 @@ extern "C" { target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8" )] - fn vpaddl_s8_(a: int8x8_t) -> int16x4_t; + pub(crate) fn vpaddl_s8_(a: int8x8_t) -> int16x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16" )] - fn vpaddl_s16_(a: int16x4_t) -> int32x2_t; + pub(crate) fn vpaddl_s16_(a: int16x4_t) -> int32x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32" )] - fn vpaddl_s32_(a: int32x2_t) -> int64x1_t; + pub(crate) fn vpaddl_s32_(a: int32x2_t) -> int64x1_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8" )] - fn vpaddlq_s8_(a: int8x16_t) -> int16x8_t; + pub(crate) fn vpaddlq_s8_(a: int8x16_t) -> int16x8_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16" )] - fn vpaddlq_s16_(a: int16x8_t) -> int32x4_t; + pub(crate) fn vpaddlq_s16_(a: int16x8_t) -> int32x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32" )] - fn vpaddlq_s32_(a: int32x4_t) -> int64x2_t; + pub(crate) fn vpaddlq_s32_(a: int32x4_t) -> int64x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8" )] - fn vpaddl_u8_(a: uint8x8_t) -> uint16x4_t; + pub(crate) fn vpaddl_u8_(a: uint8x8_t) -> uint16x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16" )] - fn vpaddl_u16_(a: uint16x4_t) -> uint32x2_t; + pub(crate) fn vpaddl_u16_(a: uint16x4_t) -> uint32x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32" )] - fn vpaddl_u32_(a: uint32x2_t) -> uint64x1_t; + pub(crate) fn vpaddl_u32_(a: uint32x2_t) -> uint64x1_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8" )] - fn vpaddlq_u8_(a: uint8x16_t) -> uint16x8_t; + pub(crate) fn vpaddlq_u8_(a: uint8x16_t) -> uint16x8_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16" )] - fn vpaddlq_u16_(a: uint16x8_t) -> uint32x4_t; + pub(crate) fn vpaddlq_u16_(a: uint16x8_t) -> uint32x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")] #[cfg_attr( target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32" )] - fn vpaddlq_u32_(a: uint32x4_t) -> uint64x2_t; + pub(crate) fn vpaddlq_u32_(a: uint32x4_t) -> uint64x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v8i8")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctpop.v8i8")] @@ -310,301 +308,6 @@ extern "C" { fn vclzq_s32_(a: int32x4_t) -> int32x4_t; } -#[cfg(target_arch = "arm")] -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.arm.neon.vbsl.v8i8"] - fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vbsl.v16i8"] - fn vbslq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; - #[link_name = "llvm.arm.neon.vpadals.v4i16.v8i8"] - fn vpadal_s8_(a: int16x4_t, b: int8x8_t) -> int16x4_t; - #[link_name = "llvm.arm.neon.vpadals.v2i32.v4i16"] - fn vpadal_s16_(a: int32x2_t, b: int16x4_t) -> int32x2_t; - #[link_name = "llvm.arm.neon.vpadals.v1i64.v2i32"] - fn vpadal_s32_(a: int64x1_t, b: int32x2_t) -> int64x1_t; - #[link_name = "llvm.arm.neon.vpadals.v8i16.v16i8"] - fn vpadalq_s8_(a: int16x8_t, b: int8x16_t) -> int16x8_t; - #[link_name = "llvm.arm.neon.vpadals.v4i32.v8i16"] - fn vpadalq_s16_(a: int32x4_t, b: int16x8_t) -> int32x4_t; - #[link_name = "llvm.arm.neon.vpadals.v2i64.v4i32"] - fn vpadalq_s32_(a: int64x2_t, b: int32x4_t) -> int64x2_t; - - #[link_name = "llvm.arm.neon.vpadalu.v4i16.v8i8"] - fn vpadal_u8_(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t; - #[link_name = "llvm.arm.neon.vpadalu.v2i32.v4i16"] - fn vpadal_u16_(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t; - #[link_name = "llvm.arm.neon.vpadalu.v1i64.v2i32"] - fn vpadal_u32_(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t; - #[link_name = "llvm.arm.neon.vpadalu.v8i16.v16i8"] - fn vpadalq_u8_(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t; - #[link_name = "llvm.arm.neon.vpadalu.v4i32.v8i16"] - fn vpadalq_u16_(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t; - #[link_name = "llvm.arm.neon.vpadalu.v2i64.v4i32"] - fn vpadalq_u32_(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t; - - #[link_name = "llvm.arm.neon.vtbl1"] - fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbl2"] - fn vtbl2(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbl3"] - fn vtbl3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbl4"] - fn vtbl4(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; - - #[link_name = "llvm.arm.neon.vtbx1"] - fn vtbx1(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbx2"] - fn vtbx2(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbx3"] - fn vtbx3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbx4"] - fn vtbx4( - a: int8x8_t, - b: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - e: int8x8_t, - ) -> int8x8_t; - - #[link_name = "llvm.arm.neon.vshiftins.v8i8"] - fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, shift: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vshiftins.v16i8"] - fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, shift: int8x16_t) -> int8x16_t; - #[link_name = "llvm.arm.neon.vshiftins.v4i16"] - fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, shift: int16x4_t) -> int16x4_t; - #[link_name = "llvm.arm.neon.vshiftins.v8i16"] - fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, shift: int16x8_t) -> int16x8_t; - #[link_name = "llvm.arm.neon.vshiftins.v2i32"] - fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, shift: int32x2_t) -> int32x2_t; - #[link_name = "llvm.arm.neon.vshiftins.v4i32"] - fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, shift: int32x4_t) -> int32x4_t; - #[link_name = "llvm.arm.neon.vshiftins.v1i64"] - fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, shift: int64x1_t) -> int64x1_t; - #[link_name = "llvm.arm.neon.vshiftins.v2i64"] - fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, shift: int64x2_t) -> int64x2_t; - - #[link_name = "llvm.arm.neon.vld1.v8i8.p0i8"] - fn vld1_v8i8(addr: *const i8, align: i32) -> int8x8_t; - #[link_name = "llvm.arm.neon.vld1.v16i8.p0i8"] - fn vld1q_v16i8(addr: *const i8, align: i32) -> int8x16_t; - #[link_name = "llvm.arm.neon.vld1.v4i16.p0i8"] - fn vld1_v4i16(addr: *const i8, align: i32) -> int16x4_t; - #[link_name = "llvm.arm.neon.vld1.v8i16.p0i8"] - fn vld1q_v8i16(addr: *const i8, align: i32) -> int16x8_t; - #[link_name = "llvm.arm.neon.vld1.v2i32.p0i8"] - fn vld1_v2i32(addr: *const i8, align: i32) -> int32x2_t; - #[link_name = "llvm.arm.neon.vld1.v4i32.p0i8"] - fn vld1q_v4i32(addr: *const i8, align: i32) -> int32x4_t; - #[link_name = "llvm.arm.neon.vld1.v1i64.p0i8"] - fn vld1_v1i64(addr: *const i8, align: i32) -> int64x1_t; - #[link_name = "llvm.arm.neon.vld1.v2i64.p0i8"] - fn vld1q_v2i64(addr: *const i8, align: i32) -> int64x2_t; - #[link_name = "llvm.arm.neon.vld1.v2f32.p0i8"] - fn vld1_v2f32(addr: *const i8, align: i32) -> float32x2_t; - #[link_name = "llvm.arm.neon.vld1.v4f32.p0i8"] - fn vld1q_v4f32(addr: *const i8, align: i32) -> float32x4_t; -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { - vld1_v8i8(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { - vld1q_v16i8(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { - vld1_v4i16(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { - vld1q_v8i16(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vldr))] -pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { - vld1_v2i32(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.32"))] -pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { - vld1q_v4i32(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vldr))] -pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { - vld1_v1i64(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.64"))] -pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { - vld1q_v2i64(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - transmute(vld1_v8i8(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - transmute(vld1q_v16i8(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - transmute(vld1_v4i16(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - transmute(vld1q_v8i16(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vldr))] -pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - transmute(vld1_v2i32(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.32"))] -pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - transmute(vld1q_v4i32(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vldr))] -pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { - transmute(vld1_v1i64(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.64"))] -pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - transmute(vld1q_v2i64(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - transmute(vld1_v8i8(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.8"))] -pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - transmute(vld1q_v16i8(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - transmute(vld1_v4i16(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.16"))] -pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - transmute(vld1q_v8i16(ptr as *const i8, align_of::() as i32)) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vldr))] -pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - vld1_v2f32(ptr as *const i8, align_of::() as i32) -} - -/// Load multiple single-element structures to one, two, three, or four registers. -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vld1.32"))] -pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - vld1q_v4f32(ptr as *const i8, align_of::() as i32) -} - /// Load one single-element structure to one lane of one register. #[inline] #[target_feature(enable = "neon")] @@ -943,8 +646,13 @@ pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { #[cfg(target_arch = "aarch64")] - use crate::core_arch::aarch64::vld1_s64; - vld1_s64(ptr) + { + crate::core_arch::aarch64::vld1_s64(ptr) + } + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::vld1_s64(ptr) + } } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -1032,8 +740,13 @@ pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t { #[cfg(target_arch = "aarch64")] - use crate::core_arch::aarch64::vld1_u64; - vld1_u64(ptr) + { + crate::core_arch::aarch64::vld1_u64(ptr) + } + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::vld1_u64(ptr) + } } /// Load one single-element structure and Replicate to all lanes (of one register). @@ -2158,321 +1871,117 @@ pub unsafe fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t { vpaddlq_u32_(a) } -/// Signed Add and Accumulate Long Pairwise. +/// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { - #[cfg(target_arch = "arm")] - { - vpadal_s8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_s8_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t { + simd_cast(a) } -/// Signed Add and Accumulate Long Pairwise. +/// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { - #[cfg(target_arch = "arm")] - { - vpadal_s16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_s16_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t { + simd_cast(a) } -/// Signed Add and Accumulate Long Pairwise. +/// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { - #[cfg(target_arch = "arm")] - { - vpadal_s32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_s32_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t { + simd_cast(a) } -/// Signed Add and Accumulate Long Pairwise. +/// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { - #[cfg(target_arch = "arm")] - { - vpadalq_s8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_s8_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { + simd_cast(a) } -/// Signed Add and Accumulate Long Pairwise. +/// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { - #[cfg(target_arch = "arm")] - { - vpadalq_s16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_s16_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { + simd_cast(a) } -/// Signed Add and Accumulate Long Pairwise. +/// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] -pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { - #[cfg(target_arch = "arm")] - { - vpadalq_s32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_s32_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { + simd_cast(a) } -/// Unsigned Add and Accumulate Long Pairwise. +/// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { - #[cfg(target_arch = "arm")] - { - vpadal_u8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_u8_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] +pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t { + simd_cast(a) } -/// Unsigned Add and Accumulate Long Pairwise. +/// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { - #[cfg(target_arch = "arm")] - { - vpadal_u16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_u16_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] +pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t { + simd_cast(a) } -/// Unsigned Add and Accumulate Long Pairwise. +/// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { - #[cfg(target_arch = "arm")] - { - vpadal_u32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddl_u32_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] +pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t { + simd_cast(a) } -/// Unsigned Add and Accumulate Long Pairwise. +/// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { - #[cfg(target_arch = "arm")] - { - vpadalq_u8_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_u8_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] +pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { + simd_cast(a) } -/// Unsigned Add and Accumulate Long Pairwise. +/// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { - #[cfg(target_arch = "arm")] - { - vpadalq_u16_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_u16_(b), a) - } +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] +pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { + simd_cast(a) } -/// Unsigned Add and Accumulate Long Pairwise. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] -pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { - #[cfg(target_arch = "arm")] - { - vpadalq_u32_(a, b) - } - #[cfg(target_arch = "aarch64")] - { - simd_add(vpaddlq_u32_(b), a) - } -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t { - simd_cast(a) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t { - simd_cast(a) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t { - simd_cast(a) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { - simd_cast(a) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { - simd_cast(a) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] -pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] -pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] -pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] -pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] -pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { - simd_cast(a) -} - -/// Vector long move. +/// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] @@ -3368,304 +2877,6 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { vpmaxf_v2f32(a, b) } -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vtbl1(a, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl1(transmute(a), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl1(transmute(a), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { - vtbl2(a.0, a.1, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { - vtbl3(a.0, a.1, a.2, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { - vtbl4(a.0, a.1, a.2, a.3, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - vtbx1(a, b, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx1(transmute(a), transmute(b), transmute(c))) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx1(transmute(a), transmute(b), transmute(c))) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { - vtbx2(a, b.0, b.1, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { - vtbx3(a, b.0, b.1, b.2, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { - vtbx4(a, b.0, b.1, b.2, b.3, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )) -} - /// Move vector element to general-purpose register #[inline] #[target_feature(enable = "neon")] @@ -4440,29 +3651,6 @@ pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_ a } -// These float-to-int implementations have undefined behaviour when `a` overflows -// the destination type. Clang has the same problem: https://llvm.org/PR47510 - -/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon")] -#[target_feature(enable = "v7")] -#[cfg_attr(test, assert_instr("vcvt.s32.f32"))] -pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { - transmute(simd_cast::<_, i32x4>(transmute::<_, f32x4>(a))) -} - -/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon")] -#[target_feature(enable = "v7")] -#[cfg_attr(test, assert_instr("vcvt.u32.f32"))] -pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { - transmute(simd_cast::<_, u32x4>(transmute::<_, f32x4>(a))) -} - /// Population count per byte. #[inline] #[target_feature(enable = "neon")] @@ -4518,532 +3706,6 @@ pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { transmute(vcntq_s8_(transmute(a))) } -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert_imm3!(N); - let n = N as i8; - vshiftins_v8i8(a, b, int8x8_t(n, n, n, n, n, n, n, n)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert_imm3!(N); - let n = N as i8; - vshiftins_v16i8( - a, - b, - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - ) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert_imm4!(N); - let n = N as i16; - vshiftins_v4i16(a, b, int16x4_t(n, n, n, n)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert_imm4!(N); - let n = N as i16; - vshiftins_v8i16(a, b, int16x8_t(n, n, n, n, n, n, n, n)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N: i32 where N >= 0 && N <= 31); - vshiftins_v2i32(a, b, int32x2_t(N, N)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N: i32 where N >= 0 && N <= 31); - vshiftins_v4i32(a, b, int32x4_t(N, N, N, N)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N : i32 where 0 <= N && N <= 63); - vshiftins_v1i64(a, b, int64x1_t(N as i64)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N : i32 where 0 <= N && N <= 63); - vshiftins_v2i64(a, b, int64x2_t(N as i64, N as i64)) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert_imm3!(N); - let n = N as i8; - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert_imm3!(N); - let n = N as i8; - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert_imm4!(N); - let n = N as i16; - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t(n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert_imm4!(N); - let n = N as i16; - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N: i32 where N >= 0 && N <= 31); - transmute(vshiftins_v2i32(transmute(a), transmute(b), int32x2_t(N, N))) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N: i32 where N >= 0 && N <= 31); - transmute(vshiftins_v4i32( - transmute(a), - transmute(b), - int32x4_t(N, N, N, N), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N : i32 where 0 <= N && N <= 63); - transmute(vshiftins_v1i64( - transmute(a), - transmute(b), - int64x1_t(N as i64), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N : i32 where 0 <= N && N <= 63); - transmute(vshiftins_v2i64( - transmute(a), - transmute(b), - int64x2_t(N as i64, N as i64), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert_imm3!(N); - let n = N as i8; - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert_imm3!(N); - let n = N as i8; - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert_imm4!(N); - let n = N as i16; - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t(n, n, n, n), - )) -} -/// Shift Left and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsli.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert_imm4!(N); - let n = N as i16; - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t(n, n, n, n, n, n, n, n), - )) -} - -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - vshiftins_v8i8(a, b, int8x8_t(n, n, n, n, n, n, n, n)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - vshiftins_v16i8( - a, - b, - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - ) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - vshiftins_v4i16(a, b, int16x4_t(n, n, n, n)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - vshiftins_v8i16(a, b, int16x8_t(n, n, n, n, n, n, n, n)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N : i32 where 1 <= N && N <= 32); - vshiftins_v2i32(a, b, int32x2_t(-N, -N)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N : i32 where 1 <= N && N <= 32); - vshiftins_v4i32(a, b, int32x4_t(-N, -N, -N, -N)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N : i32 where 1 <= N && N <= 64); - vshiftins_v1i64(a, b, int64x1_t(-N as i64)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N : i32 where 1 <= N && N <= 64); - vshiftins_v2i64(a, b, int64x2_t(-N as i64, -N as i64)) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t(n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N : i32 where 1 <= N && N <= 32); - transmute(vshiftins_v2i32( - transmute(a), - transmute(b), - int32x2_t(-N, -N), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.32", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N : i32 where 1 <= N && N <= 32); - transmute(vshiftins_v4i32( - transmute(a), - transmute(b), - int32x4_t(-N, -N, -N, -N), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N : i32 where 1 <= N && N <= 64); - transmute(vshiftins_v1i64( - transmute(a), - transmute(b), - int64x1_t(-N as i64), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.64", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N : i32 where 1 <= N && N <= 64); - transmute(vshiftins_v2i64( - transmute(a), - transmute(b), - int64x2_t(-N as i64, -N as i64), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t(n, n, n, n, n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.8", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { - static_assert!(N : i32 where 1 <= N && N <= 8); - let n = -N as i8; - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t(n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t(n, n, n, n), - )) -} -/// Shift Right and Insert (immediate) -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr("vsri.16", N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { - static_assert!(N : i32 where 1 <= N && N <= 16); - let n = -N as i16; - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t(n, n, n, n, n, n, n, n), - )) -} - /// Reversing vector elements (swap endianness) #[inline] #[target_feature(enable = "neon")] @@ -5404,11 +4066,219 @@ pub unsafe fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { simd_shuffle8(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) } +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_s8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s8_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_s16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s16_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_s32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s32_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_s8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s8_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_s16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s16_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_s32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s32_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_u8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u8_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_u16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u16_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_u32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u32_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_u8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u8_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_u16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u16_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_u32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u32_(b), a) + } +} + #[cfg(test)] mod tests { use super::*; - use crate::core_arch::arm::test_support::*; - use crate::core_arch::{arm::*, simd::*}; + #[cfg(target_arch = "aarch64")] + use crate::core_arch::aarch64::*; + #[cfg(target_arch = "arm")] + use crate::core_arch::arm::*; + use crate::core_arch::arm_shared::test_support::*; + use crate::core_arch::simd::*; use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; use stdarch_test::simd_test; @@ -5792,24 +4662,6 @@ mod tests { assert_eq!(r, e) } - #[cfg(target_arch = "arm")] - #[simd_test(enable = "neon")] - unsafe fn test_vcvtq_s32_f32() { - let f = f32x4::new(-1., 2., 3., 4.); - let e = i32x4::new(-1, 2, 3, 4); - let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f))); - assert_eq!(r, e); - } - - #[cfg(target_arch = "arm")] - #[simd_test(enable = "neon")] - unsafe fn test_vcvtq_u32_f32() { - let f = f32x4::new(1., 2., 3., 4.); - let e = u32x4::new(1, 2, 3, 4); - let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vget_lane_u8() { let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); diff --git a/crates/core_arch/src/arm/neon/shift_and_insert_tests.rs b/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs similarity index 100% rename from crates/core_arch/src/arm/neon/shift_and_insert_tests.rs rename to crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs diff --git a/crates/core_arch/src/arm/neon/table_lookup_tests.rs b/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs similarity index 100% rename from crates/core_arch/src/arm/neon/table_lookup_tests.rs rename to crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs diff --git a/crates/core_arch/src/acle/registers/aarch32.rs b/crates/core_arch/src/arm_shared/registers/aarch32.rs similarity index 100% rename from crates/core_arch/src/acle/registers/aarch32.rs rename to crates/core_arch/src/arm_shared/registers/aarch32.rs diff --git a/crates/core_arch/src/acle/registers/mod.rs b/crates/core_arch/src/arm_shared/registers/mod.rs similarity index 100% rename from crates/core_arch/src/acle/registers/mod.rs rename to crates/core_arch/src/arm_shared/registers/mod.rs diff --git a/crates/core_arch/src/acle/registers/v6m.rs b/crates/core_arch/src/arm_shared/registers/v6m.rs similarity index 100% rename from crates/core_arch/src/acle/registers/v6m.rs rename to crates/core_arch/src/arm_shared/registers/v6m.rs diff --git a/crates/core_arch/src/acle/registers/v7m.rs b/crates/core_arch/src/arm_shared/registers/v7m.rs similarity index 100% rename from crates/core_arch/src/acle/registers/v7m.rs rename to crates/core_arch/src/arm_shared/registers/v7m.rs diff --git a/crates/core_arch/src/arm/test_support.rs b/crates/core_arch/src/arm_shared/test_support.rs similarity index 98% rename from crates/core_arch/src/arm/test_support.rs rename to crates/core_arch/src/arm_shared/test_support.rs index 337a270e40..ff752f25b3 100644 --- a/crates/core_arch/src/arm/test_support.rs +++ b/crates/core_arch/src/arm_shared/test_support.rs @@ -1,4 +1,10 @@ -use crate::core_arch::{arm::*, simd::*}; +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +#[cfg(target_arch = "aarch64")] +use crate::core_arch::aarch64::*; + +use crate::core_arch::simd::*; use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; macro_rules! V_u8 { diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs index 5b25687c3d..4e6dcb7dc4 100644 --- a/crates/core_arch/src/mod.rs +++ b/crates/core_arch/src/mod.rs @@ -5,7 +5,7 @@ mod macros; #[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))] -mod acle; +mod arm_shared; mod simd; @@ -53,7 +53,7 @@ pub mod arch { #[doc(cfg(target_arch = "aarch64"))] #[unstable(feature = "stdsimd", issue = "27731")] pub mod aarch64 { - pub use crate::core_arch::{aarch64::*, arm::*}; + pub use crate::core_arch::aarch64::*; } /// Platform-specific intrinsics for the `wasm32` platform. @@ -234,8 +234,8 @@ mod x86_64; #[cfg(any(target_arch = "aarch64", doc))] #[doc(cfg(target_arch = "aarch64"))] mod aarch64; -#[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))] -#[doc(cfg(any(target_arch = "arm", target_arch = "aarch64")))] +#[cfg(any(target_arch = "arm", doc))] +#[doc(cfg(any(target_arch = "arm")))] mod arm; #[cfg(any(target_arch = "wasm32", doc))] diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 155c898a41..1babd33744 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -2253,7 +2253,7 @@ mod test { let arm_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap()) .join("src") - .join("arm") + .join("arm_shared") .join("neon"); std::fs::create_dir_all(&arm_out_path)?; diff --git a/crates/stdarch-test/src/lib.rs b/crates/stdarch-test/src/lib.rs index 70797e17c8..408d7190e9 100644 --- a/crates/stdarch-test/src/lib.rs +++ b/crates/stdarch-test/src/lib.rs @@ -121,7 +121,7 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { // in some cases exceed the limit. "cvtpi2ps" => 25, - // core_arch/src/acle/simd32 + // core_arch/src/arm_shared/simd32 "usad8" => 27, "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs index 7c947d8ac7..e85f0489a8 100644 --- a/crates/stdarch-verify/src/lib.rs +++ b/crates/stdarch-verify/src/lib.rs @@ -224,6 +224,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream { "int64x1_t" => quote! { &I64X1 }, "int64x2_t" => quote! { &I64X2 }, "uint8x8_t" => quote! { &U8X8 }, + "uint8x4_t" => quote! { &U8X4 }, "uint8x8x2_t" => quote! { &U8X8X2 }, "uint8x16x2_t" => quote! { &U8X16X2 }, "uint8x16x3_t" => quote! { &U8X16X3 }, diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs index 9f0d981485..8c2cf47855 100644 --- a/crates/stdarch-verify/tests/arm.rs +++ b/crates/stdarch-verify/tests/arm.rs @@ -149,6 +149,7 @@ static U8X16X2: Type = Type::U(8, 16, 2); static U8X16X3: Type = Type::U(8, 16, 3); static U8X16X4: Type = Type::U(8, 16, 4); static U8X8: Type = Type::U(8, 8, 1); +static U8X4: Type = Type::U(8, 4, 1); static U8X8X2: Type = Type::U(8, 8, 2); static U8X8X3: Type = Type::U(8, 8, 3); static U8X8X4: Type = Type::U(8, 8, 4); @@ -371,6 +372,90 @@ fn verify_all_signatures() { "vsriq_n_p8", "vsri_n_p16", "vsriq_n_p16", + "__smulbb", + "__smultb", + "__smulbt", + "__smultt", + "__smulwb", + "__smulwt", + "__qadd", + "__qsub", + "__qdbl", + "__smlabb", + "__smlabt", + "__smlatb", + "__smlatt", + "__smlawb", + "__smlawt", + "__qadd8", + "__qsub8", + "__qsub16", + "__qadd16", + "__qasx", + "__qsax", + "__sadd16", + "__sadd8", + "__smlad", + "__smlsd", + "__sasx", + "__sel", + "__shadd8", + "__shadd16", + "__shsub8", + "__usub8", + "__ssub8", + "__shsub16", + "__smuad", + "__smuadx", + "__smusd", + "__smusdx", + "__usad8", + "__usada8", + "vld1_s8", + "vld1q_s8", + "vld1q_s8", + "vld1_s16", + "vld1q_s16", + "vld1_s32", + "vld1q_s32", + "vld1_s64", + "vld1q_s64", + "vld1_u8", + "vld1q_u8", + "vld1_u16", + "vld1q_u16", + "vld1_u32", + "vld1q_u32", + "vld1_u64", + "vld1q_u64", + "vld1_p8", + "vld1q_p8", + "vld1_p16", + "vld1q_p16", + "vld1_f32", + "vld1q_f32", + "vld1_f64", + "vld1q_f64", + "vpadal_s8", + "vpadal_s16", + "vpadal_s32", + "vpadalq_s8", + "vpadalq_s16", + "vpadalq_s32", + "vpadal_u8", + "vpadal_u16", + "vpadal_u32", + "vpadalq_u8", + "vpadalq_u16", + "vpadalq_u32", + "__ldrex", + "__strex", + "__ldrexb", + "__strexb", + "__ldrexh", + "__strexh", + "__clrex", + "__dbg", ]; if !skip.contains(&rust.name) { println!( @@ -402,6 +487,7 @@ fn verify_all_signatures() { "vreinterpret_p64_s64", "vreinterpret_f32_p64", "vreinterpretq_f32_p64", + "__dbg", ]; let arm = match map.get(rust.name) { Some(i) => i, @@ -412,11 +498,13 @@ fn verify_all_signatures() { // TODO: we still need to verify these intrinsics or find a // reference for them, need to figure out where though! if !rust.file.ends_with("dsp.rs\"") + && !rust.file.ends_with("simd32.rs\"") && !rust.file.ends_with("cmsis.rs\"") && !rust.file.ends_with("v6.rs\"") && !rust.file.ends_with("v7.rs\"") && !rust.file.ends_with("v8.rs\"") && !rust.file.ends_with("tme.rs\"") + && !rust.file.ends_with("ex.rs\"") && !skip_intrinsic_verify.contains(&rust.name) { println!(