From 69e2e59cb222c264b6738d3b7bc0b1e9dac443d1 Mon Sep 17 00:00:00 2001 From: Caio Date: Tue, 7 Nov 2017 23:20:55 -0200 Subject: [PATCH 1/3] Add _mm_unpackhi_pd and _mm_unpacklo_pd --- src/x86/sse2.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index 9d2ee6f47e..7afd9bd90f 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -2052,6 +2052,30 @@ pub unsafe fn _mm_undefined_si128() -> __m128i { mem::transmute(i32x4::splat(mem::uninitialized())) } +/// The resulting `f64x2` element is composed by the low-order values of +/// the two `f64x2` interleaved input elements, i.e.: +/// +/// * The [127:64] bits are copied from the [63:0] bits of the second input +/// * The [63:0] bits are copied from the [63:0] bits of the first input +#[inline(always)] +#[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(unpckhpd))] +pub unsafe fn _mm_unpackhi_pd(a: f64x2, b: f64x2) -> f64x2 { + simd_shuffle2(a, b, [2, 0]) +} + +/// The resulting `f64x2` element is composed by the high-order values of +/// the two `f64x2` interleaved input elements, i.e.: +/// +/// * The [127:64] bits are copied from the [127:64] bits of the second input +/// * The [63:0] bits are copied from the [127:64] bits of the first input +#[inline(always)] +#[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(unpcklpd))] +pub unsafe fn _mm_unpacklo_pd(a: f64x2, b: f64x2) -> f64x2 { + simd_shuffle2(a, b, [3, 1]) +} + #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse2.pause"] @@ -4170,4 +4194,20 @@ mod tests { let r = sse2::_mm_load_pd1(&d); assert_eq!(r, f64x2::new(d, d)); } + + #[simd_test = "sse2"] + unsafe fn _mm_unpackhi_pd() { + let a = f64x2::new(1.0, 2.0); + let b = f64x2::new(3.0, 4.0); + let r = sse2::_mm_unpackhi_pd(a, b); + assert_eq!(r, f64x2::new(3.0, 1.0)); + } + + #[simd_test = "sse2"] + unsafe fn _mm_unpacklo_pd() { + let a = f64x2::new(1.0, 2.0); + let b = f64x2::new(3.0, 4.0); + let r = sse2::_mm_unpacklo_pd(a, b); + assert_eq!(r, f64x2::new(4.0, 2.0)); + } } From 4a9a522ad393c48815f71e4f5839889f5aed74e3 Mon Sep 17 00:00:00 2001 From: Caio Date: Wed, 8 Nov 2017 00:48:47 -0200 Subject: [PATCH 2/3] Fix tests --- src/x86/sse2.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index 7afd9bd90f..b0d5a3b097 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -2061,7 +2061,7 @@ pub unsafe fn _mm_undefined_si128() -> __m128i { #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(unpckhpd))] pub unsafe fn _mm_unpackhi_pd(a: f64x2, b: f64x2) -> f64x2 { - simd_shuffle2(a, b, [2, 0]) + simd_shuffle2(a, b, [1, 3]) } /// The resulting `f64x2` element is composed by the high-order values of @@ -2073,7 +2073,7 @@ pub unsafe fn _mm_unpackhi_pd(a: f64x2, b: f64x2) -> f64x2 { #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(unpcklpd))] pub unsafe fn _mm_unpacklo_pd(a: f64x2, b: f64x2) -> f64x2 { - simd_shuffle2(a, b, [3, 1]) + simd_shuffle2(a, b, [0, 2]) } #[allow(improper_ctypes)] @@ -4200,7 +4200,7 @@ mod tests { let a = f64x2::new(1.0, 2.0); let b = f64x2::new(3.0, 4.0); let r = sse2::_mm_unpackhi_pd(a, b); - assert_eq!(r, f64x2::new(3.0, 1.0)); + assert_eq!(r, f64x2::new(2.0, 4.0)); } #[simd_test = "sse2"] @@ -4208,6 +4208,6 @@ mod tests { let a = f64x2::new(1.0, 2.0); let b = f64x2::new(3.0, 4.0); let r = sse2::_mm_unpacklo_pd(a, b); - assert_eq!(r, f64x2::new(4.0, 2.0)); + assert_eq!(r, f64x2::new(1.0, 3.0)); } } From eefdb48c58080cbb6b3f3fa5f1d5366dc861e399 Mon Sep 17 00:00:00 2001 From: Caio Date: Wed, 8 Nov 2017 01:00:03 -0200 Subject: [PATCH 3/3] Fix docs --- src/x86/sse2.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index b0d5a3b097..d178d7080c 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -2055,8 +2055,8 @@ pub unsafe fn _mm_undefined_si128() -> __m128i { /// The resulting `f64x2` element is composed by the low-order values of /// the two `f64x2` interleaved input elements, i.e.: /// -/// * The [127:64] bits are copied from the [63:0] bits of the second input -/// * The [63:0] bits are copied from the [63:0] bits of the first input +/// * The [127:64] bits are copied from the [127:64] bits of the second input +/// * The [63:0] bits are copied from the [127:64] bits of the first input #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(unpckhpd))] @@ -2067,8 +2067,8 @@ pub unsafe fn _mm_unpackhi_pd(a: f64x2, b: f64x2) -> f64x2 { /// The resulting `f64x2` element is composed by the high-order values of /// the two `f64x2` interleaved input elements, i.e.: /// -/// * The [127:64] bits are copied from the [127:64] bits of the second input -/// * The [63:0] bits are copied from the [127:64] bits of the first input +/// * The [127:64] bits are copied from the [63:0] bits of the second input +/// * The [63:0] bits are copied from the [63:0] bits of the first input #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(unpcklpd))]