//! `i586` MMX instruction set. //! //! The intrinsics here roughly correspond to those in the `mmintrin.h` C //! header. //! //! The reference is [Intel 64 and IA-32 Architectures Software Developer's //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf use coresimd::simd::*; use coresimd::x86::*; use mem; #[cfg(test)] use stdsimd_test::assert_instr; /// Constructs a 64-bit integer vector initialized to zero. #[inline] #[target_feature(enable = "mmx")] // FIXME: this produces a movl instead of xorps on x86 // FIXME: this produces a xor intrinsic instead of xorps on x86_64 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))] pub unsafe fn _mm_setzero_si64() -> __m64 { mem::transmute(0_i64) } /// Add packed 8-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddb))] pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 { paddb(a, b) } /// Add packed 8-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddb))] pub unsafe fn _m_paddb(a: __m64, b: __m64) -> __m64 { _mm_add_pi8(a, b) } /// Add packed 16-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddw))] pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 { paddw(a, b) } /// Add packed 16-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddw))] pub unsafe fn _m_paddw(a: __m64, b: __m64) -> __m64 { _mm_add_pi16(a, b) } /// Add packed 32-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddd))] pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 { paddd(a, b) } /// Add packed 32-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddd))] pub unsafe fn _m_paddd(a: __m64, b: __m64) -> __m64 { _mm_add_pi32(a, b) } /// Add packed 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsb))] pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 { paddsb(a, b) } /// Add packed 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsb))] pub unsafe fn _m_paddsb(a: __m64, b: __m64) -> __m64 { _mm_adds_pi8(a, b) } /// Add packed 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsw))] pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 { paddsw(a, b) } /// Add packed 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsw))] pub unsafe fn _m_paddsw(a: __m64, b: __m64) -> __m64 { _mm_adds_pi16(a, b) } /// Add packed unsigned 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusb))] pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 { paddusb(a, b) } /// Add packed unsigned 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusb))] pub unsafe fn _m_paddusb(a: __m64, b: __m64) -> __m64 { _mm_adds_pu8(a, b) } /// Add packed unsigned 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusw))] pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 { paddusw(a, b) } /// Add packed unsigned 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusw))] pub unsafe fn _m_paddusw(a: __m64, b: __m64) -> __m64 { _mm_adds_pu16(a, b) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubb))] pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 { psubb(a, b) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubb))] pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 { _mm_sub_pi8(a, b) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubw))] pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 { psubw(a, b) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubw))] pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 { _mm_sub_pi16(a, b) } /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubd))] pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 { psubd(a, b) } /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubd))] pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 { _mm_sub_pi32(a, b) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` /// using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubsb))] pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 { psubsb(a, b) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` /// using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubsb))] pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 { _mm_subs_pi8(a, b) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` /// using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubsw))] pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 { psubsw(a, b) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` /// using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubsw))] pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 { _mm_subs_pi16(a, b) } /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit /// integers in `a` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubusb))] pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 { psubusb(a, b) } /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit /// integers in `a` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubusb))] pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 { _mm_subs_pu8(a, b) } /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned /// 16-bit integers in `a` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubusw))] pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 { psubusw(a, b) } /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned /// 16-bit integers in `a` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubusw))] pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 { _mm_subs_pu16(a, b) } /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation. /// /// Positive values greater than 0x7F are saturated to 0x7F. Negative values /// less than 0x80 are saturated to 0x80. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(packsswb))] pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 { packsswb(a, b) } /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation. /// /// Positive values greater than 0x7F are saturated to 0x7F. Negative values /// less than 0x80 are saturated to 0x80. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(packssdw))] pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 { packssdw(a, b) } /// Compares whether each element of `a` is greater than the corresponding /// element of `b` returning `0` for `false` and `-1` for `true`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(pcmpgtb))] pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 { pcmpgtb(a, b) } /// Compares whether each element of `a` is greater than the corresponding /// element of `b` returning `0` for `false` and `-1` for `true`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(pcmpgtw))] pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 { pcmpgtw(a, b) } /// Compares whether each element of `a` is greater than the corresponding /// element of `b` returning `0` for `false` and `-1` for `true`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(pcmpgtd))] pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 { pcmpgtd(a, b) } /// Unpacks the upper two elements from two `i16x4` vectors and interleaves /// them into the result: `[a.2, b.2, a.3, b.3]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 { punpckhwd(a, b) } /// Unpacks the upper four elements from two `i8x8` vectors and interleaves /// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpckhbw))] pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 { punpckhbw(a, b) } /// Unpacks the lower four elements from two `i8x8` vectors and interleaves /// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpcklbw))] pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 { punpcklbw(a, b) } /// Unpacks the lower two elements from two `i16x4` vectors and interleaves /// them into the result: `[a.0 b.0 a.1 b.1]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpcklwd))] pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 { punpcklwd(a, b) } /// Unpacks the upper element from two `i32x2` vectors and interleaves them /// into the result: `[a.1, b.1]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpckhdq))] pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 { punpckhdq(a, b) } /// Unpacks the lower element from two `i32x2` vectors and interleaves them /// into the result: `[a.0, b.0]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpckldq))] pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 { punpckldq(a, b) } /// Set packed 16-bit integers in dst with the supplied values. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 { _mm_setr_pi16(e0, e1, e2, e3) } /// Set packed 32-bit integers in dst with the supplied values. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 { _mm_setr_pi32(e0, e1) } /// Set packed 8-bit integers in dst with the supplied values. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set_pi8( e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8, ) -> __m64 { _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7) } /// Broadcast 16-bit integer a to all all elements of dst. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 { _mm_setr_pi16(a, a, a, a) } /// Broadcast 32-bit integer a to all all elements of dst. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 { _mm_setr_pi32(a, a) } /// Broadcast 8-bit integer a to all all elements of dst. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 { _mm_setr_pi8(a, a, a, a, a, a, a, a) } /// Set packed 16-bit integers in dst with the supplied values in reverse /// order. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 { mem::transmute(i16x4::new(e0, e1, e2, e3)) } /// Set packed 32-bit integers in dst with the supplied values in reverse /// order. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 { mem::transmute(i32x2::new(e0, e1)) } /// Set packed 8-bit integers in dst with the supplied values in reverse order. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_setr_pi8( e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8, ) -> __m64 { mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.mmx.padd.b"] fn paddb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.padd.w"] fn paddw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.padd.d"] fn paddd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.padds.b"] fn paddsb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.padds.w"] fn paddsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.paddus.b"] fn paddusb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.paddus.w"] fn paddusw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psub.b"] fn psubb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psub.w"] fn psubw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psub.d"] fn psubd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psubs.b"] fn psubsb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psubs.w"] fn psubsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psubus.b"] fn psubusb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psubus.w"] fn psubusw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.packsswb"] fn packsswb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.packssdw"] fn packssdw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pcmpgt.b"] fn pcmpgtb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pcmpgt.w"] fn pcmpgtw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pcmpgt.d"] fn pcmpgtd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpckhwd"] fn punpckhwd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpcklwd"] fn punpcklwd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpckhbw"] fn punpckhbw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpcklbw"] fn punpcklbw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpckhdq"] fn punpckhdq(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpckldq"] fn punpckldq(a: __m64, b: __m64) -> __m64; } #[cfg(test)] mod tests { use coresimd::x86::*; use stdsimd_test::simd_test; #[simd_test(enable = "mmx")] unsafe fn test_mm_setzero_si64() { let r: __m64 = ::std::mem::transmute(0_i64); assert_eq_m64(r, _mm_setzero_si64()); } #[simd_test(enable = "mmx")] unsafe fn test_mm_add_pi8() { let a = _mm_setr_pi8(-1, -1, 1, 1, -1, 0, 1, 0); let b = _mm_setr_pi8(-127, 101, 99, 126, 0, -1, 0, 1); let e = _mm_setr_pi8(-128, 100, 100, 127, -1, -1, 1, 1); assert_eq_m64(e, _mm_add_pi8(a, b)); assert_eq_m64(e, _m_paddb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_add_pi16() { let a = _mm_setr_pi16(-1, -1, 1, 1); let b = _mm_setr_pi16( i16::min_value() + 1, 30001, -30001, i16::max_value() - 1, ); let e = _mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value()); assert_eq_m64(e, _mm_add_pi16(a, b)); assert_eq_m64(e, _m_paddw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_add_pi32() { let a = _mm_setr_pi32(1, -1); let b = _mm_setr_pi32(i32::max_value() - 1, i32::min_value() + 1); let e = _mm_setr_pi32(i32::max_value(), i32::min_value()); assert_eq_m64(e, _mm_add_pi32(a, b)); assert_eq_m64(e, _m_paddd(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_adds_pi8() { let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0); let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1); let e = _mm_setr_pi8(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1); assert_eq_m64(e, _mm_adds_pi8(a, b)); assert_eq_m64(e, _m_paddsb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_adds_pi16() { let a = _mm_setr_pi16(-32000, 32000, 4, 0); let b = _mm_setr_pi16(-32000, 32000, -5, 1); let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), -1, 1); assert_eq_m64(e, _mm_adds_pi16(a, b)); assert_eq_m64(e, _m_paddsw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_adds_pu8() { let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8); let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8); let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::max_value() as i8); assert_eq_m64(e, _mm_adds_pu8(a, b)); assert_eq_m64(e, _m_paddusb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_adds_pu16() { let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16); let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16); let e = _mm_setr_pi16(0, 11, 22, u16::max_value() as i16); assert_eq_m64(e, _mm_adds_pu16(a, b)); assert_eq_m64(e, _m_paddusw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_sub_pi8() { let a = _mm_setr_pi8(0, 0, 1, 1, -1, -1, 0, 0); let b = _mm_setr_pi8(-1, 1, -2, 2, 100, -100, -127, 127); let e = _mm_setr_pi8(1, -1, 3, -1, -101, 99, 127, -127); assert_eq_m64(e, _mm_sub_pi8(a, b)); assert_eq_m64(e, _m_psubb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_sub_pi16() { let a = _mm_setr_pi16(-20000, -20000, 20000, 30000); let b = _mm_setr_pi16(-10000, 10000, -10000, 30000); let e = _mm_setr_pi16(-10000, -30000, 30000, 0); assert_eq_m64(e, _mm_sub_pi16(a, b)); assert_eq_m64(e, _m_psubw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_sub_pi32() { let a = _mm_setr_pi32(500_000, -500_000); let b = _mm_setr_pi32(500_000, 500_000); let e = _mm_setr_pi32(0, -1_000_000); assert_eq_m64(e, _mm_sub_pi32(a, b)); assert_eq_m64(e, _m_psubd(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_subs_pi8() { let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5); let b = _mm_setr_pi8(100, -100, i8::min_value(), 127, -1, 1, 3, -3); let e = _mm_setr_pi8( i8::min_value(), i8::max_value(), i8::max_value(), -127, 1, -1, -8, 8, ); assert_eq_m64(e, _mm_subs_pi8(a, b)); assert_eq_m64(e, _m_psubsb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_subs_pi16() { let a = _mm_setr_pi16(-20000, 20000, 0, 0); let b = _mm_setr_pi16(20000, -20000, -1, 1); let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), 1, -1); assert_eq_m64(e, _mm_subs_pi16(a, b)); assert_eq_m64(e, _m_psubsw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_subs_pu8() { let a = _mm_setr_pi8(50, 10, 20, 30, 40, 60, 70, 80); let b = _mm_setr_pi8(60, 20, 30, 40, 30, 20, 10, 0); let e = _mm_setr_pi8(0, 0, 0, 0, 10, 40, 60, 80); assert_eq_m64(e, _mm_subs_pu8(a, b)); assert_eq_m64(e, _m_psubusb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_subs_pu16() { let a = _mm_setr_pi16(10000, 200, 0, 44444u16 as i16); let b = _mm_setr_pi16(20000, 300, 1, 11111); let e = _mm_setr_pi16(0, 0, 0, 33333u16 as i16); assert_eq_m64(e, _mm_subs_pu16(a, b)); assert_eq_m64(e, _m_psubusw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_packs_pi16() { let a = _mm_setr_pi16(-1, 2, -3, 4); let b = _mm_setr_pi16(-5, 6, -7, 8); let r = _mm_setr_pi8(-1, 2, -3, 4, -5, 6, -7, 8); assert_eq_m64(r, _mm_packs_pi16(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_packs_pi32() { let a = _mm_setr_pi32(-1, 2); let b = _mm_setr_pi32(-5, 6); let r = _mm_setr_pi16(-1, 2, -5, 6); assert_eq_m64(r, _mm_packs_pi32(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_cmpgt_pi8() { let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm_setr_pi8(8, 7, 6, 5, 4, 3, 2, 1); let r = _mm_setr_pi8(0, 0, 0, 0, 0, -1, -1, -1); assert_eq_m64(r, _mm_cmpgt_pi8(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_cmpgt_pi16() { let a = _mm_setr_pi16(0, 1, 2, 3); let b = _mm_setr_pi16(4, 3, 2, 1); let r = _mm_setr_pi16(0, 0, 0, -1); assert_eq_m64(r, _mm_cmpgt_pi16(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_cmpgt_pi32() { let a = _mm_setr_pi32(0, 3); let b = _mm_setr_pi32(1, 2); let r0 = _mm_setr_pi32(0, -1); let r1 = _mm_setr_pi32(-1, 0); assert_eq_m64(r0, _mm_cmpgt_pi32(a, b)); assert_eq_m64(r1, _mm_cmpgt_pi32(b, a)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpackhi_pi8() { let a = _mm_setr_pi8(0, 3, 4, 7, 8, 11, 12, 15); let b = _mm_setr_pi8(1, 2, 5, 6, 9, 10, 13, 14); let r = _mm_setr_pi8(8, 9, 11, 10, 12, 13, 15, 14); assert_eq_m64(r, _mm_unpackhi_pi8(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpacklo_pi8() { let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm_setr_pi8(8, 9, 10, 11, 12, 13, 14, 15); let r = _mm_setr_pi8(0, 8, 1, 9, 2, 10, 3, 11); assert_eq_m64(r, _mm_unpacklo_pi8(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpackhi_pi16() { let a = _mm_setr_pi16(0, 1, 2, 3); let b = _mm_setr_pi16(4, 5, 6, 7); let r = _mm_setr_pi16(2, 6, 3, 7); assert_eq_m64(r, _mm_unpackhi_pi16(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpacklo_pi16() { let a = _mm_setr_pi16(0, 1, 2, 3); let b = _mm_setr_pi16(4, 5, 6, 7); let r = _mm_setr_pi16(0, 4, 1, 5); assert_eq_m64(r, _mm_unpacklo_pi16(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpackhi_pi32() { let a = _mm_setr_pi32(0, 3); let b = _mm_setr_pi32(1, 2); let r = _mm_setr_pi32(3, 2); assert_eq_m64(r, _mm_unpackhi_pi32(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpacklo_pi32() { let a = _mm_setr_pi32(0, 3); let b = _mm_setr_pi32(1, 2); let r = _mm_setr_pi32(0, 1); assert_eq_m64(r, _mm_unpacklo_pi32(a, b)); } }