From 3d4ed5bf922fe6008188658fffd5b42a04448f72 Mon Sep 17 00:00:00 2001 From: Caio Date: Tue, 7 Nov 2017 10:30:56 -0200 Subject: [PATCH 1/2] Add _mm_cvtepu(8|16|32)_epi(16|32|64) --- src/x86/sse41.rs | 113 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/src/x86/sse41.rs b/src/x86/sse41.rs index 2c680f5efc..6515336391 100644 --- a/src/x86/sse41.rs +++ b/src/x86/sse41.rs @@ -542,6 +542,65 @@ pub unsafe fn _mm_round_ss(a: f32x4, b: f32x4, rounding: i32) -> f32x4 { constify_imm4!(rounding, call) } +/// Zero-extends each of the lower eight 8-bit integer values of a `i8x16` +/// element to 16-bit values and returns them in a `i16x8` element. The +/// upper eight values of the input are unused. +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxbw))] +pub unsafe fn _mm_cvtepu8_epi16(a: i8x16) -> i16x8 { + pmovzxbw(a) +} + +/// Zero-extends each of the lower four 8-bit integer values of a `i8x16` +/// element to 32-bit values and returns them in a `i32x4` element. The +/// upper twelve values of the input are unused. +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxbd))] +pub unsafe fn _mm_cvtepu8_epi32(a: i8x16) -> i32x4 { + pmovzxbd(a) +} + +/// Zero-extends each of the lower two 8-bit integer values of a `i8x16` +/// element to 64-bit values and returns them in a `i64x4` element. The +/// upper fourteen values of the input are unused. +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxbq))] +pub unsafe fn _mm_cvtepu8_epi64(a: i8x16) -> i64x2 { + pmovzxbq(a) +} + +/// Zero-extends each of the lower four 16-bit integer values of a `i16x8` +/// element to 32-bit values and returns them in a `i32x4` element. The +/// upper four values of the input are unused. +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxwd))] +pub unsafe fn _mm_cvtepu16_epi32(a: i16x8) -> i32x4 { + pmovzxwd(a) +} + +/// Zero-extends each of the lower two 16-bit integer values of a `i16x8` +/// element to 64-bit values and returns them in a `i64x2` element. The +/// upper six values of the input are unused. +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxwq))] +pub unsafe fn _mm_cvtepu16_epi64(a: i16x8) -> i64x2 { + pmovzxwq(a) +} + +/// Zero-extends each of the lower two 32-bit integer values of a `i32x4` +/// element to 64-bit values and returns them in a `i64x2` element. The +/// upper two values of the input are unused. +#[inline(always)] +#[target_feature = "+sse4.1"] +#[cfg_attr(test, assert_instr(pmovzxdq))] +pub unsafe fn _mm_cvtepu32_epi64(a: i32x4) -> i64x2 { + pmovzxdq(a) +} #[allow(improper_ctypes)] extern "C" { @@ -581,6 +640,18 @@ extern "C" { fn roundsd(a: f64x2, b: f64x2, rounding: i32) -> f64x2; #[link_name = "llvm.x86.sse41.round.ss"] fn roundss(a: f32x4, b: f32x4, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.sse41.pmovzxbw"] + fn pmovzxbw(a: i8x16) -> i16x8; + #[link_name = "llvm.x86.sse41.pmovzxbd"] + fn pmovzxbd(a: i8x16) -> i32x4; + #[link_name = "llvm.x86.sse41.pmovzxbq"] + fn pmovzxbq(a: i8x16) -> i64x2; + #[link_name = "llvm.x86.sse41.pmovzxwd"] + fn pmovzxwd(a: i16x8) -> i32x4; + #[link_name = "llvm.x86.sse41.pmovzxwq"] + fn pmovzxwq(a: i16x8) -> i64x2; + #[link_name = "llvm.x86.sse41.pmovzxdq"] + fn pmovzxdq(a: i32x4) -> i64x2; } #[cfg(test)] @@ -985,4 +1056,46 @@ mod tests { let e = f32x4::new(-2.0, 3.5, 7.5, 15.5); assert_eq!(r, e); } + + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu8_epi16() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = sse41::_mm_cvtepu8_epi16(a); + assert_eq!(r, i16x8::new(1, 2, 3, 4, 5, 6, 7, 8)); + } + + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu8_epi32() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = sse41::_mm_cvtepu8_epi32(a); + assert_eq!(r, i32x4::new(1, 2, 3, 4)); + } + + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu8_epi64() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = sse41::_mm_cvtepu8_epi64(a); + assert_eq!(r, i64x2::new(1, 2)); + } + + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu16_epi32() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r = sse41::_mm_cvtepu16_epi32(a); + assert_eq!(r, i32x4::new(1, 2, 3, 4)); + } + + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu16_epi64() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r = sse41::_mm_cvtepu16_epi64(a); + assert_eq!(r, i64x2::new(1, 2)); + } + + #[simd_test = "sse4.1"] + unsafe fn _mm_cvtepu32_epi64() { + let a = i32x4::new(1, 2, 3, 4); + let r = sse41::_mm_cvtepu32_epi64(a); + assert_eq!(r, i64x2::new(1, 2)); + } } From 34855d1246188933a335e88976900ac8e32784f2 Mon Sep 17 00:00:00 2001 From: Caio Date: Tue, 7 Nov 2017 11:46:55 -0200 Subject: [PATCH 2/2] 80 characters limit --- src/x86/sse41.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/x86/sse41.rs b/src/x86/sse41.rs index 6515336391..dc67cee869 100644 --- a/src/x86/sse41.rs +++ b/src/x86/sse41.rs @@ -1059,21 +1059,24 @@ mod tests { #[simd_test = "sse4.1"] unsafe fn _mm_cvtepu8_epi16() { - let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let a = + i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); let r = sse41::_mm_cvtepu8_epi16(a); assert_eq!(r, i16x8::new(1, 2, 3, 4, 5, 6, 7, 8)); } #[simd_test = "sse4.1"] unsafe fn _mm_cvtepu8_epi32() { - let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let a = + i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); let r = sse41::_mm_cvtepu8_epi32(a); assert_eq!(r, i32x4::new(1, 2, 3, 4)); } #[simd_test = "sse4.1"] unsafe fn _mm_cvtepu8_epi64() { - let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let a = + i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); let r = sse41::_mm_cvtepu8_epi64(a); assert_eq!(r, i64x2::new(1, 2)); }