This commit is contained in:
Andrew Gallant 2017-06-19 17:03:29 -04:00
parent 1450e641a5
commit e975e22c20
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44
14 changed files with 256 additions and 75 deletions

View File

@ -24,3 +24,8 @@ type is probably `i64x2`.
What is the deal with the ucomi f64 comparison functions in SSE2? Clang's
headers indicate some specific behavior with NAN, but I can't seem to reproduce
it. Intel's official docs are very vague.
---
`_mm256_blendv_pd` takes a mask parameter with type `f64x4`, but the
documentation seems to indicate that it is a bit vector. What's going on?

View File

@ -456,7 +456,7 @@ sse4.1
* [ ] `_mm_blend_ps`
* [ ] `_mm_blendv_pd`
* [ ] `_mm_blendv_ps`
* [ ] `_mm_blendv_epi8`
* [x] `_mm_blendv_epi8`
* [ ] `_mm_blend_epi16`
* [ ] `_mm_dp_pd`
* [ ] `_mm_dp_ps`

View File

@ -3,7 +3,8 @@
extern crate stdsimd;
use std::env;
use stdsimd as s;
use stdsimd::simd as s;
use stdsimd::vendor;
#[inline(never)]
#[target_feature = "+sse4.2"]
@ -14,15 +15,15 @@ fn index(needle: &str, haystack: &str) -> usize {
let mut needle = needle.to_string().into_bytes();
needle.resize(16, 0);
let vneedle = s::__m128i::from(s::u8x16::load(&needle, 0));
let vneedle = vendor::__m128i::from(s::u8x16::load(&needle, 0));
let mut haystack = haystack.to_string().into_bytes();
haystack.resize(16, 0);
let vhaystack = s::__m128i::from(s::u8x16::load(&haystack, 0));
let vhaystack = vendor::__m128i::from(s::u8x16::load(&haystack, 0));
s::_mm_cmpestri(
vendor::_mm_cmpestri(
vneedle, needle_len as i32, vhaystack, hay_len as i32,
s::_SIDD_CMP_EQUAL_ORDERED) as usize
vendor::_SIDD_CMP_EQUAL_ORDERED) as usize
}
fn main() {

View File

@ -4,17 +4,26 @@
target_feature,
)]
pub use v128::*;
pub use v256::*;
pub use v64::*;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub use x86::*;
/// Platform independent SIMD vector types and operations.
pub mod simd {
pub use v128::*;
pub use v256::*;
pub use v512::*;
pub use v64::*;
}
/// Platform dependent vendor intrinsics.
pub mod vendor {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub use x86::*;
}
#[macro_use]
mod macros;
mod simd;
mod simd_llvm;
mod v128;
mod v256;
mod v512;
mod v64;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86;

View File

@ -7,6 +7,16 @@ macro_rules! define_ty {
}
}
macro_rules! define_ty_doc {
($name:ident, $($elty:ident),+ | $(#[$doc:meta])*) => {
$(#[$doc])*
#[repr(simd)]
#[derive(Clone, Copy, Debug, PartialEq)]
#[allow(non_camel_case_types)]
pub struct $name($($elty),*);
}
}
macro_rules! define_impl {
(
$name:ident, $elemty:ident, $nelems:expr, $boolname:ident,
@ -246,11 +256,11 @@ macro_rules! define_integer_ops {
}
macro_rules! define_casts {
($(($ty:ident, $floatty:ident, $floatcast:ident)),+) => {
($(($fromty:ident, $toty:ident, $cast:ident)),+) => {
$(
impl $ty {
impl $fromty {
#[inline]
pub fn $floatcast(self) -> ::$floatty {
pub fn $cast(self) -> ::simd::$toty {
unsafe { simd_cast(self) }
}
}

View File

@ -1,4 +1,4 @@
use simd::*;
use simd_llvm::*;
define_ty! { f64x2, f64, f64 }
define_impl! { f64x2, f64, 2, i64x2, x0, x1 }

View File

@ -1,4 +1,4 @@
use simd::*;
use simd_llvm::*;
define_ty! { f64x4, f64, f64, f64, f64 }
define_impl! { f64x4, f64, 4, i64x4, x0, x1, x2, x3 }

152
library/stdarch/src/v512.rs Normal file
View File

@ -0,0 +1,152 @@
use simd_llvm::*;
define_ty! { f64x8, f64, f64, f64, f64, f64, f64, f64, f64 }
define_impl! { f64x8, f64, 8, i64x8, x0, x1, x2, x3, x4, x5, x6, x7 }
define_ty! {
f32x16,
f32, f32, f32, f32, f32, f32, f32, f32,
f32, f32, f32, f32, f32, f32, f32, f32
}
define_impl! {
f32x16, f32, 16, i32x16,
x0, x1, x2, x3, x4, x5, x6, x7,
x8, x9, x10, x11, x12, x13, x14, x15
}
define_ty! { u64x8, u64, u64, u64, u64, u64, u64, u64, u64 }
define_impl! { u64x8, u64, 8, i64x8, x0, x1, x2, x3, x4, x5, x6, x7 }
define_ty! { i64x8, i64, i64, i64, i64, i64, i64, i64, i64 }
define_impl! { i64x8, i64, 8, i64x8, x0, x1, x2, x3, x4, x5, x6, x7 }
define_ty! {
u32x16,
u32, u32, u32, u32, u32, u32, u32, u32,
u32, u32, u32, u32, u32, u32, u32, u32
}
define_impl! {
u32x16, u32, 16, i32x16,
x0, x1, x2, x3, x4, x5, x6, x7,
x8, x9, x10, x11, x12, x13, x14, x15
}
define_ty! {
i32x16,
i32, i32, i32, i32, i32, i32, i32, i32,
i32, i32, i32, i32, i32, i32, i32, i32
}
define_impl! {
i32x16, i32, 16, i32x16,
x0, x1, x2, x3, x4, x5, x6, x7,
x8, x9, x10, x11, x12, x13, x14, x15
}
define_ty! {
u16x32,
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16
}
define_impl! {
u16x32, u16, 32, i16x32,
x0, x1, x2, x3, x4, x5, x6, x7,
x8, x9, x10, x11, x12, x13, x14, x15,
x16, x17, x18, x19, x20, x21, x22, x23,
x24, x25, x26, x27, x28, x29, x30, x31
}
define_ty! {
i16x32,
i16, i16, i16, i16, i16, i16, i16, i16,
i16, i16, i16, i16, i16, i16, i16, i16,
i16, i16, i16, i16, i16, i16, i16, i16,
i16, i16, i16, i16, i16, i16, i16, i16
}
define_impl! {
i16x32, i16, 32, i16x32,
x0, x1, x2, x3, x4, x5, x6, x7,
x8, x9, x10, x11, x12, x13, x14, x15,
x16, x17, x18, x19, x20, x21, x22, x23,
x24, x25, x26, x27, x28, x29, x30, x31
}
define_ty! {
u8x64,
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8
}
define_impl! {
u8x64, u8, 64, i8x64,
x0, x1, x2, x3, x4, x5, x6, x7,
x8, x9, x10, x11, x12, x13, x14, x15,
x16, x17, x18, x19, x20, x21, x22, x23,
x24, x25, x26, x27, x28, x29, x30, x31,
x32, x33, x34, x35, x36, x37, x38, x39,
x40, x41, x42, x43, x44, x45, x46, x47,
x48, x49, x50, x51, x52, x53, x54, x55,
x56, x57, x58, x59, x60, x61, x62, x63
}
define_ty! {
i8x64,
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8
}
define_impl! {
i8x64, i8, 64, i8x64,
x0, x1, x2, x3, x4, x5, x6, x7,
x8, x9, x10, x11, x12, x13, x14, x15,
x16, x17, x18, x19, x20, x21, x22, x23,
x24, x25, x26, x27, x28, x29, x30, x31,
x32, x33, x34, x35, x36, x37, x38, x39,
x40, x41, x42, x43, x44, x45, x46, x47,
x48, x49, x50, x51, x52, x53, x54, x55,
x56, x57, x58, x59, x60, x61, x62, x63
}
define_from!(u64x8, i64x8, u32x16, i32x16, u16x32, i16x32, u8x64, i8x64);
define_from!(i64x8, u64x8, u32x16, i32x16, u16x32, i16x32, u8x64, i8x64);
define_from!(u32x16, u64x8, i64x8, i32x16, u16x32, i16x32, u8x64, i8x64);
define_from!(i32x16, u64x8, i64x8, u32x16, u16x32, i16x32, u8x64, i8x64);
define_from!(u16x32, u64x8, i64x8, u32x16, i32x16, i16x32, u8x64, i8x64);
define_from!(i16x32, u64x8, i64x8, u32x16, i32x16, u16x32, u8x64, i8x64);
define_from!(u8x64, u64x8, i64x8, u32x16, i32x16, u16x32, i16x32, i8x64);
define_from!(i8x64, u64x8, i64x8, u32x16, i32x16, u16x32, i16x32, u8x64);
define_common_ops!(
f64x8, f32x16, u64x8, i64x8, u32x16, i32x16, u16x32, i16x32, u8x64, i8x64);
define_float_ops!(f64x8, f32x16);
define_integer_ops!(
(u64x8, u64),
(i64x8, i64),
(u32x16, u32),
(i32x16, i32),
(u16x32, u16),
(i16x32, i16),
(u8x64, u8),
(i8x64, i8));
define_casts!(
(f64x8, f32x8, as_f32x8),
(f64x8, u64x8, as_u64x8),
(f64x8, i64x8, as_i64x8),
(f32x16, u32x16, as_u32x16),
(f32x16, i32x16, as_i32x16),
(u64x8, f64x8, as_f64x8),
(u64x8, i64x8, as_i64x8),
(i64x8, f64x8, as_f64x8),
(i64x8, u64x8, as_u64x8),
(u32x16, f32x16, as_f32x16),
(u32x16, i32x16, as_i32x16),
(i32x16, f32x16, as_f32x16),
(i32x16, u32x16, as_u32x16),
(u16x32, i16x32, as_i16x32),
(i16x32, u16x32, as_u16x32),
(u8x64, i8x64, as_i8x64),
(i8x64, u8x64, as_u8x64));

View File

@ -1,9 +1,15 @@
use simd::*;
use simd_llvm::*;
define_ty! { f32x2, f32, f32 }
define_ty_doc! {
f32x2, f32, f32 |
/// A 64-bit vector with 2 `f32` lanes.
}
define_impl! { f32x2, f32, 2, i32x2, x0, x1 }
define_ty! { u32x2, u32, u32 }
define_ty_doc! {
u32x2, u32, u32 |
/// A 64-bit vector with 2 `u32` lanes.
}
define_impl! { u32x2, u32, 2, i32x2, x0, x1 }
define_ty! { i32x2, i32, i32 }

View File

@ -423,12 +423,12 @@ pub fn _mm256_movemask_epi8(a: i8x32) -> i32 {
unsafe { pmovmskb(a) }
}
/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned
/// 8-bit integers in `a` compared to those in `b`, and store the 16-bit
/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned
/// 8-bit integers in `a` compared to those in `b`, and store the 16-bit
/// results in dst. Eight SADs are performed for each 128-bit lane using one
/// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
/// selected from `b` starting at on the offset specified in `imm8`. Eight
/// quadruplets are formed from sequential 8-bit integers selected from `a`
/// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
/// selected from `b` starting at on the offset specified in `imm8`. Eight
/// quadruplets are formed from sequential 8-bit integers selected from `a`
/// starting at the offset specified in `imm8`.
#[inline(always)]
#[target_feature = "+avx2"]
@ -438,9 +438,9 @@ pub fn _mm256_mpsadbw_epu8(a: u8x32, b: u8x32, imm8: i32) -> u16x16 {
***/
/// Multiply the low 32-bit integers from each packed 64-bit element in
/// Multiply the low 32-bit integers from each packed 64-bit element in
/// `a` and `b`
///
///
/// Return the 64-bit results.
#[inline(always)]
#[target_feature = "+avx2"]
@ -448,7 +448,7 @@ pub fn _mm256_mul_epi32(a: i32x8, b: i32x8) -> i64x4 {
unsafe { pmuldq(a, b) }
}
/// Multiply the low unsigned 32-bit integers from each packed 64-bit
/// Multiply the low unsigned 32-bit integers from each packed 64-bit
/// element in `a` and `b`
///
/// Return the unsigned 64-bit results.
@ -458,7 +458,7 @@ pub fn _mm256_mul_epu32(a: u32x8, b: u32x8) -> u64x4 {
unsafe { pmuludq(a, b) }
}
/// Multiply the packed 16-bit integers in `a` and `b`, producing
/// Multiply the packed 16-bit integers in `a` and `b`, producing
/// intermediate 32-bit integers and returning the high 16 bits of the
/// intermediate integers.
#[inline(always)]
@ -476,7 +476,7 @@ pub fn _mm256_mulhi_epu16(a: u16x16, b: u16x16) -> u16x16 {
unsafe { pmulhuw(a, b) }
}
/// Multiply the packed 16-bit integers in `a` and `b`, producing
/// Multiply the packed 16-bit integers in `a` and `b`, producing
/// intermediate 32-bit integers, and return the low 16 bits of the
/// intermediate integers
#[inline(always)]
@ -486,7 +486,7 @@ pub fn _mm256_mullo_epi16(a: i16x16, b:i16x16) -> i16x16 {
}
/// Multiply the packed 32-bit integers in `a` and `b`, producing
/// Multiply the packed 32-bit integers in `a` and `b`, producing
/// intermediate 64-bit integers, and return the low 16 bits of the
/// intermediate integers
#[inline(always)]
@ -495,7 +495,7 @@ pub fn _mm256_mullo_epi32(a: i32x8, b:i32x8) -> i32x8 {
a * b
}
/// Multiply packed 16-bit integers in `a` and `b`, producing
/// Multiply packed 16-bit integers in `a` and `b`, producing
/// intermediate signed 32-bit integers. Truncate each intermediate
/// integer to the 18 most significant bits, round by adding 1, and
/// return bits [16:1]
@ -505,7 +505,7 @@ pub fn _mm256_mulhrs_epi16(a: i16x16, b:i16x16) -> i16x16 {
unsafe { pmulhrsw(a, b) }
}
/// Compute the bitwise OR of 256 bits (representing integer data) in `a`
/// Compute the bitwise OR of 256 bits (representing integer data) in `a`
/// and `b`
#[inline(always)]
#[target_feature = "+avx2"]
@ -513,7 +513,7 @@ pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
a | b
}
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
/// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers
/// using signed saturation
#[inline(always)]
#[target_feature = "+avx2"]
@ -521,7 +521,7 @@ pub fn _mm256_packs_epi16(a: i16x16, b: i16x16) -> i8x32 {
unsafe { packsswb(a, b) }
}
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
/// using signed saturation
#[inline(always)]
#[target_feature = "+avx2"]
@ -552,8 +552,8 @@ pub fn _mm256_packus_epi32(a: i32x8, b: i32x8) -> u16x16 {
// TODO _mm256_permutevar8x32_ps (__m256 a, __m256i idx)
/// Compute the absolute differences of packed unsigned 8-bit integers in `a`
/// and `b`, then horizontally sum each consecutive 8 differences to
/// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
/// and `b`, then horizontally sum each consecutive 8 differences to
/// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
/// integers in the low 16 bits of the 64-bit return value
#[inline(always)]
#[target_feature = "+avx2"]
@ -593,7 +593,7 @@ pub fn _mm256_sign_epi8(a: i8x32, b: i8x32) -> i8x32 {
unsafe { psignb(a, b) }
}
/// Shift packed 16-bit integers in `a` left by `count` while
/// Shift packed 16-bit integers in `a` left by `count` while
/// shifting in zeros, and return the result
#[inline(always)]
#[target_feature = "+avx2"]
@ -601,7 +601,7 @@ pub fn _mm256_sll_epi16(a: i16x16, count: i16x8) -> i16x16 {
unsafe { psllw(a, count) }
}
/// Shift packed 32-bit integers in `a` left by `count` while
/// Shift packed 32-bit integers in `a` left by `count` while
/// shifting in zeros, and return the result
#[inline(always)]
#[target_feature = "+avx2"]
@ -609,7 +609,7 @@ pub fn _mm256_sll_epi32(a: i32x8, count: i32x4) -> i32x8 {
unsafe { pslld(a, count) }
}
/// Shift packed 64-bit integers in `a` left by `count` while
/// Shift packed 64-bit integers in `a` left by `count` while
/// shifting in zeros, and return the result
#[inline(always)]
#[target_feature = "+avx2"]
@ -622,7 +622,7 @@ pub fn _mm256_sll_epi64(a: i64x4, count: i64x2) -> i64x4 {
#[inline(always)]
#[target_feature = "+avx2"]
pub fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 {
unsafe { pslliw(a, imm8) }
unsafe { pslliw(a, imm8) }
}
/// Shift packed 32-bit integers in `a` left by `imm8` while
@ -630,7 +630,7 @@ pub fn _mm256_slli_epi16(a: i16x16, imm8: i32) -> i16x16 {
#[inline(always)]
#[target_feature = "+avx2"]
pub fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 {
unsafe { psllid(a, imm8) }
unsafe { psllid(a, imm8) }
}
/// Shift packed 64-bit integers in `a` left by `imm8` while
@ -638,7 +638,7 @@ pub fn _mm256_slli_epi32(a: i32x8, imm8: i32) -> i32x8 {
#[inline(always)]
#[target_feature = "+avx2"]
pub fn _mm256_slli_epi64(a: i64x4, imm8: i32) -> i64x4 {
unsafe { pslliq(a, imm8) }
unsafe { pslliq(a, imm8) }
}
// TODO _mm256_slli_si256 (__m256i a, const int imm8)
@ -695,7 +695,7 @@ pub fn _mm256_sra_epi32(a: i32x8, count: i32x4) -> i32x8 {
unsafe { psrad(a, count) }
}
/// Shift packed 16-bit integers in `a` right by `imm8` while
/// Shift packed 16-bit integers in `a` right by `imm8` while
/// shifting in sign bits.
#[inline(always)]
#[target_feature = "+avx2"]
@ -703,7 +703,7 @@ pub fn _mm256_srai_epi16(a: i16x16, imm8: i32) -> i16x16 {
unsafe { psraiw(a, imm8) }
}
/// Shift packed 32-bit integers in `a` right by `imm8` while
/// Shift packed 32-bit integers in `a` right by `imm8` while
/// shifting in sign bits.
#[inline(always)]
#[target_feature = "+avx2"]
@ -733,7 +733,7 @@ pub fn _mm256_srav_epi32(a: i32x8, count: i32x8) -> i32x8 {
#[inline(always)]
#[target_feature = "+avx2"]
pub fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 {
unsafe { psrlw(a, count) }
unsafe { psrlw(a, count) }
}
/// Shift packed 32-bit integers in `a` right by `count` while shifting in
@ -741,7 +741,7 @@ pub fn _mm256_srl_epi16(a: i16x16, count: i16x8) -> i16x16 {
#[inline(always)]
#[target_feature = "+avx2"]
pub fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 {
unsafe { psrld(a, count) }
unsafe { psrld(a, count) }
}
/// Shift packed 64-bit integers in `a` right by `count` while shifting in
@ -749,10 +749,10 @@ pub fn _mm256_srl_epi32(a: i32x8, count: i32x4) -> i32x8 {
#[inline(always)]
#[target_feature = "+avx2"]
pub fn _mm256_srl_epi64(a: i64x4, count: i64x2) -> i64x4 {
unsafe { psrlq(a, count) }
unsafe { psrlq(a, count) }
}
/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in
/// Shift packed 16-bit integers in `a` right by `imm8` while shifting in
/// zeros
#[inline(always)]
#[target_feature = "+avx2"]
@ -760,7 +760,7 @@ pub fn _mm256_srli_epi16(a: i16x16, imm8: i32) -> i16x16 {
unsafe { psrliw(a, imm8) }
}
/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in
/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in
/// zeros
#[inline(always)]
#[target_feature = "+avx2"]
@ -768,7 +768,7 @@ pub fn _mm256_srli_epi32(a: i32x8, imm8: i32) -> i32x8 {
unsafe { psrlid(a, imm8) }
}
/// Shift packed 64-bit integers in `a` right by `imm8` while shifting in
/// Shift packed 64-bit integers in `a` right by `imm8` while shifting in
/// zeros
#[inline(always)]
#[target_feature = "+avx2"]
@ -879,7 +879,7 @@ pub fn _mm256_subs_epu8(a: u8x32, b: u8x32) -> u8x32 {
// TODO __m256i _mm256_unpacklo_epi64 (__m256i a, __m256i b)
// TODO __m256i _mm256_unpacklo_epi8 (__m256i a, __m256i b)
/// Compute the bitwise XOR of 256 bits (representing integer data)
/// Compute the bitwise XOR of 256 bits (representing integer data)
/// in `a` and `b`
#[inline(always)]
#[target_feature = "+avx2"]
@ -903,7 +903,7 @@ extern "C" {
#[link_name = "llvm.x86.avx2.paddus.b"]
fn paddusb(a: u8x32, b: u8x32) -> u8x32;
#[link_name = "llvm.x86.avx2.paddus.w"]
fn paddusw(a: u16x16, b: u16x16) -> u16x16;
fn paddusw(a: u16x16, b: u16x16) -> u16x16;
#[link_name = "llvm.x86.avx2.pavg.b"]
fn pavgb(a: u8x32, b: u8x32) -> u8x32;
#[link_name = "llvm.x86.avx2.pavg.w"]
@ -949,7 +949,7 @@ extern "C" {
#[link_name = "llvm.x86.avx2.pminu.d"]
fn pminud(a: u32x8, b: u32x8) -> u32x8;
#[link_name = "llvm.x86.avx2.pminu.b"]
fn pminub(a: u8x32, b: u8x32) -> u8x32;
fn pminub(a: u8x32, b: u8x32) -> u8x32;
#[link_name = "llvm.x86.avx2.pmovmskb"] //fails in debug
fn pmovmskb(a: i8x32) -> i32;
#[link_name = "llvm.x86.avx2.mpsadbw"] //fails in debug
@ -1031,7 +1031,7 @@ extern "C" {
#[link_name = "llvm.x86.avx2.psrlv.q"]
fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx2.psrlv.q.256"]
fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx2.psubs.b"]
fn psubsb(a: i8x32, b: i8x32) -> i8x32;
#[link_name = "llvm.x86.avx2.psubs.w"]
@ -1580,15 +1580,15 @@ mod tests {
}
/**
/**
// TODO this fails in debug but not release, why?
#[test]
#[target_feature ="+avx2"]
fn _mm256_movemask_epi8() {
let a = i8x32::splat(-1);
let a = i8x32::splat(-1);
let r = avx2::_mm256_movemask_epi8(a);
let e : i32 = -1;
assert_eq!(r, e);
assert_eq!(r, e);
}
// TODO This fails in debug but not in release, whhhy?
@ -1604,7 +1604,7 @@ mod tests {
**/
#[test]
#[target_feature = "+avx2"]
#[target_feature = "+avx2"]
fn _mm256_mul_epi32() {
let a = i32x8::new(0, 0, 0, 0, 2, 2, 2, 2);
let b = i32x8::new(1, 2, 3, 4, 5, 6, 7, 8);
@ -1693,7 +1693,7 @@ mod tests {
4, 4, 4, 4, 4, 4, 4, 4,
2, 2, 2, 2, 2, 2, 2, 2,
4, 4, 4, 4, 4, 4, 4, 4);
assert_eq!(r, e);
}
@ -1708,7 +1708,7 @@ mod tests {
4, 4, 4, 4,
2, 2, 2, 2,
4, 4, 4, 4);
assert_eq!(r, e);
}
@ -1723,7 +1723,7 @@ mod tests {
4, 4, 4, 4, 4, 4, 4, 4,
2, 2, 2, 2, 2, 2, 2, 2,
4, 4, 4, 4, 4, 4, 4, 4);
assert_eq!(r, e);
}
@ -1738,7 +1738,7 @@ mod tests {
4, 4, 4, 4,
2, 2, 2, 2,
4, 4, 4, 4);
assert_eq!(r, e);
}
@ -1756,7 +1756,7 @@ mod tests {
#[target_feature = "+avx2"]
fn _mm256_sign_epi16() {
let a = i16x16::splat(2);
let b = i16x16::splat(-1);
let b = i16x16::splat(-1);
let r = avx2::_mm256_sign_epi16(a, b);
let e = i16x16::splat(-2);
assert_eq!(r, e);
@ -1766,7 +1766,7 @@ mod tests {
#[target_feature = "+avx2"]
fn _mm256_sign_epi32() {
let a = i32x8::splat(2);
let b = i32x8::splat(-1);
let b = i32x8::splat(-1);
let r = avx2::_mm256_sign_epi32(a, b);
let e = i32x8::splat(-2);
assert_eq!(r, e);
@ -1776,7 +1776,7 @@ mod tests {
#[target_feature = "+avx2"]
fn _mm256_sign_epi8() {
let a = i8x32::splat(2);
let b = i8x32::splat(-1);
let b = i8x32::splat(-1);
let r = avx2::_mm256_sign_epi8(a, b);
let e = i8x32::splat(-2);
assert_eq!(r, e);
@ -1816,7 +1816,7 @@ mod tests {
avx2::_mm256_slli_epi16(i16x16::splat(0xFF), 4),
i16x16::splat(0xFF0));
}
#[test]
#[target_feature = "+avx2"]
fn _mm256_slli_epi32() {
@ -1840,7 +1840,7 @@ mod tests {
let b = i32x4::splat(1);
let r = avx2::_mm_sllv_epi32(a, b);
let e = i32x4::splat(4);
assert_eq!(r, e);
assert_eq!(r, e);
}
#[test]
@ -1850,7 +1850,7 @@ mod tests {
let b = i32x8::splat(1);
let r = avx2::_mm256_sllv_epi32(a, b);
let e = i32x8::splat(4);
assert_eq!(r, e);
assert_eq!(r, e);
}
#[test]
#[target_feature = "+avx2"]
@ -1859,7 +1859,7 @@ mod tests {
let b = i64x2::splat(1);
let r = avx2::_mm_sllv_epi64(a, b);
let e = i64x2::splat(4);
assert_eq!(r, e);
assert_eq!(r, e);
}
#[test]
#[target_feature = "+avx2"]
@ -1868,7 +1868,7 @@ mod tests {
let b = i64x4::splat(1);
let r = avx2::_mm256_sllv_epi64(a, b);
let e = i64x4::splat(4);
assert_eq!(r, e);
assert_eq!(r, e);
}
#[test]
@ -2097,5 +2097,5 @@ mod tests {
__m256i::splat(6));
}
}

View File

@ -2,7 +2,7 @@ use std::mem;
use std::os::raw::c_void;
use std::ptr;
use simd::{
use simd_llvm::{
simd_cast, simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16,
};
use x86::__m128i;

View File

@ -1,4 +1,3 @@
// use v128::*;
use x86::__m128i;
#[inline(always)]

View File

@ -1,4 +1,3 @@
// use v128::*;
use x86::__m128i;
pub const _SIDD_UBYTE_OPS: i8 = 0b00000000;