mirror of
https://github.com/rust-lang/rust.git
synced 2025-12-29 21:13:15 +00:00
add vqshl, vqshrn, vqshrun neon instructions (#1120)
This commit is contained in:
parent
0073d6ace3
commit
e792dfd02c
@ -3435,6 +3435,338 @@ pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) ->
|
||||
simd_shuffle4(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshl))]
|
||||
pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 {
|
||||
let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b));
|
||||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshl))]
|
||||
pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 {
|
||||
let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b));
|
||||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshl))]
|
||||
pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
|
||||
let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b));
|
||||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshl))]
|
||||
pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
|
||||
let c: int64x1_t = vqshl_s64(vdup_n_s64(a), vdup_n_s64(b));
|
||||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshl))]
|
||||
pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 {
|
||||
let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b));
|
||||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshl))]
|
||||
pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 {
|
||||
let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b));
|
||||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshl))]
|
||||
pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
|
||||
let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b));
|
||||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshl))]
|
||||
pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
|
||||
let c: uint64x1_t = vqshl_u64(vdup_n_u64(a), vdup_n_s64(b));
|
||||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshl, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
|
||||
static_assert_imm3!(N);
|
||||
simd_extract(vqshl_n_s8::<N>(vdup_n_s8(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshl, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
|
||||
static_assert_imm4!(N);
|
||||
simd_extract(vqshl_n_s16::<N>(vdup_n_s16(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshl, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
|
||||
static_assert_imm5!(N);
|
||||
simd_extract(vqshl_n_s32::<N>(vdup_n_s32(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshl, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
|
||||
static_assert_imm6!(N);
|
||||
simd_extract(vqshl_n_s64::<N>(vdup_n_s64(a)), 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshl, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
|
||||
static_assert_imm3!(N);
|
||||
simd_extract(vqshl_n_u8::<N>(vdup_n_u8(a)), 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshl, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
|
||||
static_assert_imm4!(N);
|
||||
simd_extract(vqshl_n_u16::<N>(vdup_n_u16(a)), 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshl, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
|
||||
static_assert_imm5!(N);
|
||||
simd_extract(vqshl_n_u32::<N>(vdup_n_u32(a)), 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshl, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
|
||||
static_assert_imm6!(N);
|
||||
simd_extract(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 8);
|
||||
simd_extract(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 16);
|
||||
simd_extract(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
simd_extract(vqshrn_n_s64::<N>(vdupq_n_s64(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 8);
|
||||
simd_shuffle16(a, vqshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 16);
|
||||
simd_shuffle8(a, vqshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
simd_shuffle4(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 8);
|
||||
simd_extract(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 16);
|
||||
simd_extract(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
simd_extract(vqshrn_n_u64::<N>(vdupq_n_u64(a)), 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 8);
|
||||
simd_shuffle16(a, vqshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 16);
|
||||
simd_shuffle8(a, vqshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
simd_shuffle4(a, vqshrn_n_u64::<N>(b), [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Signed saturating shift right unsigned narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 8);
|
||||
simd_extract(vqshrun_n_s16::<N>(vdupq_n_s16(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right unsigned narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 16);
|
||||
simd_extract(vqshrun_n_s32::<N>(vdupq_n_s32(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right unsigned narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
simd_extract(vqshrun_n_s64::<N>(vdupq_n_s64(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right unsigned narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 8);
|
||||
simd_shuffle16(a, vqshrun_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Signed saturating shift right unsigned narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 16);
|
||||
simd_shuffle8(a, vqshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Signed saturating shift right unsigned narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
simd_shuffle4(a, vqshrun_n_s64::<N>(b), [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Calculates the square root of each lane.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
@ -8413,6 +8745,295 @@ mod test {
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshlb_s8() {
|
||||
let a: i8 = 1;
|
||||
let b: i8 = 2;
|
||||
let e: i8 = 4;
|
||||
let r: i8 = transmute(vqshlb_s8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshlh_s16() {
|
||||
let a: i16 = 1;
|
||||
let b: i16 = 2;
|
||||
let e: i16 = 4;
|
||||
let r: i16 = transmute(vqshlh_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshls_s32() {
|
||||
let a: i32 = 1;
|
||||
let b: i32 = 2;
|
||||
let e: i32 = 4;
|
||||
let r: i32 = transmute(vqshls_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshld_s64() {
|
||||
let a: i64 = 1;
|
||||
let b: i64 = 2;
|
||||
let e: i64 = 4;
|
||||
let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshlb_u8() {
|
||||
let a: u8 = 1;
|
||||
let b: i8 = 2;
|
||||
let e: u8 = 4;
|
||||
let r: u8 = transmute(vqshlb_u8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshlh_u16() {
|
||||
let a: u16 = 1;
|
||||
let b: i16 = 2;
|
||||
let e: u16 = 4;
|
||||
let r: u16 = transmute(vqshlh_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshls_u32() {
|
||||
let a: u32 = 1;
|
||||
let b: i32 = 2;
|
||||
let e: u32 = 4;
|
||||
let r: u32 = transmute(vqshls_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshld_u64() {
|
||||
let a: u64 = 1;
|
||||
let b: i64 = 2;
|
||||
let e: u64 = 4;
|
||||
let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshlb_n_s8() {
|
||||
let a: i8 = 1;
|
||||
let e: i8 = 4;
|
||||
let r: i8 = transmute(vqshlb_n_s8::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshlh_n_s16() {
|
||||
let a: i16 = 1;
|
||||
let e: i16 = 4;
|
||||
let r: i16 = transmute(vqshlh_n_s16::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshls_n_s32() {
|
||||
let a: i32 = 1;
|
||||
let e: i32 = 4;
|
||||
let r: i32 = transmute(vqshls_n_s32::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshld_n_s64() {
|
||||
let a: i64 = 1;
|
||||
let e: i64 = 4;
|
||||
let r: i64 = transmute(vqshld_n_s64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshlb_n_u8() {
|
||||
let a: u8 = 1;
|
||||
let e: u8 = 4;
|
||||
let r: u8 = transmute(vqshlb_n_u8::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshlh_n_u16() {
|
||||
let a: u16 = 1;
|
||||
let e: u16 = 4;
|
||||
let r: u16 = transmute(vqshlh_n_u16::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshls_n_u32() {
|
||||
let a: u32 = 1;
|
||||
let e: u32 = 4;
|
||||
let r: u32 = transmute(vqshls_n_u32::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshld_n_u64() {
|
||||
let a: u64 = 1;
|
||||
let e: u64 = 4;
|
||||
let r: u64 = transmute(vqshld_n_u64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrnh_n_s16() {
|
||||
let a: i16 = 4;
|
||||
let e: i8 = 1;
|
||||
let r: i8 = transmute(vqshrnh_n_s16::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrns_n_s32() {
|
||||
let a: i32 = 4;
|
||||
let e: i16 = 1;
|
||||
let r: i16 = transmute(vqshrns_n_s32::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrnd_n_s64() {
|
||||
let a: i64 = 4;
|
||||
let e: i32 = 1;
|
||||
let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrn_high_n_s16() {
|
||||
let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
|
||||
let e: i8x16 = i8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r: i8x16 = transmute(vqshrn_high_n_s16::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrn_high_n_s32() {
|
||||
let a: i16x4 = i16x4::new(0, 1, 8, 9);
|
||||
let b: i32x4 = i32x4::new(32, 36, 40, 44);
|
||||
let e: i16x8 = i16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
let r: i16x8 = transmute(vqshrn_high_n_s32::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrn_high_n_s64() {
|
||||
let a: i32x2 = i32x2::new(0, 1);
|
||||
let b: i64x2 = i64x2::new(32, 36);
|
||||
let e: i32x4 = i32x4::new(0, 1, 8, 9);
|
||||
let r: i32x4 = transmute(vqshrn_high_n_s64::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrnh_n_u16() {
|
||||
let a: u16 = 4;
|
||||
let e: u8 = 1;
|
||||
let r: u8 = transmute(vqshrnh_n_u16::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrns_n_u32() {
|
||||
let a: u32 = 4;
|
||||
let e: u16 = 1;
|
||||
let r: u16 = transmute(vqshrns_n_u32::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrnd_n_u64() {
|
||||
let a: u64 = 4;
|
||||
let e: u32 = 1;
|
||||
let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrn_high_n_u16() {
|
||||
let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
|
||||
let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r: u8x16 = transmute(vqshrn_high_n_u16::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrn_high_n_u32() {
|
||||
let a: u16x4 = u16x4::new(0, 1, 8, 9);
|
||||
let b: u32x4 = u32x4::new(32, 36, 40, 44);
|
||||
let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
let r: u16x8 = transmute(vqshrn_high_n_u32::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrn_high_n_u64() {
|
||||
let a: u32x2 = u32x2::new(0, 1);
|
||||
let b: u64x2 = u64x2::new(32, 36);
|
||||
let e: u32x4 = u32x4::new(0, 1, 8, 9);
|
||||
let r: u32x4 = transmute(vqshrn_high_n_u64::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrunh_n_s16() {
|
||||
let a: i16 = 4;
|
||||
let e: u8 = 1;
|
||||
let r: u8 = transmute(vqshrunh_n_s16::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshruns_n_s32() {
|
||||
let a: i32 = 4;
|
||||
let e: u16 = 1;
|
||||
let r: u16 = transmute(vqshruns_n_s32::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrund_n_s64() {
|
||||
let a: i64 = 4;
|
||||
let e: u32 = 1;
|
||||
let r: u32 = transmute(vqshrund_n_s64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrun_high_n_s16() {
|
||||
let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
|
||||
let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r: u8x16 = transmute(vqshrun_high_n_s16::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrun_high_n_s32() {
|
||||
let a: u16x4 = u16x4::new(0, 1, 8, 9);
|
||||
let b: i32x4 = i32x4::new(32, 36, 40, 44);
|
||||
let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
let r: u16x8 = transmute(vqshrun_high_n_s32::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrun_high_n_s64() {
|
||||
let a: u32x2 = u32x2::new(0, 1);
|
||||
let b: i64x2 = i64x2::new(32, 36);
|
||||
let e: u32x4 = u32x4::new(0, 1, 8, 9);
|
||||
let r: u32x4 = transmute(vqshrun_high_n_s64::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsqrt_f32() {
|
||||
let a: f32x2 = f32x2::new(4.0, 9.0);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -2198,6 +2198,246 @@ validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
|
||||
aarch64 = sqrshrun2
|
||||
generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
|
||||
|
||||
/// Signed saturating shift left
|
||||
name = vqshl
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
||||
|
||||
aarch64 = sqshl
|
||||
link-aarch64 = sqshl._EXT_
|
||||
|
||||
arm = vqshl
|
||||
link-arm = vqshifts._EXT_
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Signed saturating shift left
|
||||
name = vqshl
|
||||
multi_fn = vqshl-in_ntt-noext, c:in_ntt, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b}
|
||||
multi_fn = simd_extract, c, 0
|
||||
a = 1
|
||||
b = 2
|
||||
validate 4
|
||||
|
||||
aarch64 = sqshl
|
||||
generate i8, i16, i32, i64
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
name = vqshl
|
||||
out-suffix
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
||||
|
||||
aarch64 = uqshl
|
||||
link-aarch64 = uqshl._EXT_
|
||||
|
||||
arm = vqshl
|
||||
link-arm = vqshiftu._EXT_
|
||||
generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
|
||||
generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
name = vqshl
|
||||
out-suffix
|
||||
multi_fn = vqshl-out_ntt-noext, c:out_ntt, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b}
|
||||
multi_fn = simd_extract, c, 0
|
||||
a = 1
|
||||
b = 2
|
||||
validate 4
|
||||
|
||||
aarch64 = uqshl
|
||||
generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64
|
||||
|
||||
/// Signed saturating shift left
|
||||
name = vqshl
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-out_bits_exp_len-N
|
||||
multi_fn = vqshl-self-noext, a, {vdup-nself-noext, N.try_into().unwrap()}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
n = 2
|
||||
validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
||||
|
||||
aarch64 = sqshl
|
||||
arm = vqshl
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Signed saturating shift left
|
||||
name = vqshl
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-out_bits_exp_len-N
|
||||
multi_fn = simd_extract, {vqshl_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
|
||||
a = 1
|
||||
n = 2
|
||||
validate 4
|
||||
|
||||
aarch64 = sqshl
|
||||
generate i8, i16, i32, i64
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
name = vqshl
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-out_bits_exp_len-N
|
||||
multi_fn = vqshl-self-noext, a, {vdup-nsigned-noext, N.try_into().unwrap()}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
n = 2
|
||||
validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
||||
|
||||
aarch64 = uqshl
|
||||
arm = vqshl
|
||||
generate uint*_t, uint64x*_t
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
name = vqshl
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-out_bits_exp_len-N
|
||||
multi_fn = simd_extract, {vqshl_n-in_ntt-::<N>, {vdup_n-in_ntt-noext, a}}, 0
|
||||
a = 1
|
||||
n = 2
|
||||
validate 4
|
||||
|
||||
aarch64 = uqshl
|
||||
generate u8, u16, u32, u64
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
name = vqshrn
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
a = 0, 4, 8, 12, 16, 20, 24, 28
|
||||
n = 2
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7
|
||||
|
||||
aarch64 = sqshrn
|
||||
link-aarch64 = sqshrn._EXT2_
|
||||
const-aarch64 = N
|
||||
|
||||
arm = vqshrn
|
||||
link-arm = vqshiftns._EXT2_
|
||||
const-arm = -N as ttn
|
||||
generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
name = vqshrn
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_extract, {vqshrn_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
|
||||
a = 4
|
||||
n = 2
|
||||
validate 1
|
||||
|
||||
aarch64 = sqshrn
|
||||
generate i16:i8, i32:i16, i64:i32
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
name = vqshrn_high
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 8, 9, 8, 9, 10, 11
|
||||
b = 32, 36, 40, 44, 48, 52, 56, 60
|
||||
n = 2
|
||||
validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = sqshrn2
|
||||
generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
name = vqshrn
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
a = 0, 4, 8, 12, 16, 20, 24, 28
|
||||
n = 2
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7
|
||||
|
||||
aarch64 = uqshrn
|
||||
link-aarch64 = uqshrn._EXT2_
|
||||
const-aarch64 = N
|
||||
|
||||
arm = vqshrn
|
||||
link-arm = vqshiftnu._EXT2_
|
||||
const-arm = -N as ttn
|
||||
generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
name = vqshrn
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_extract, {vqshrn_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
|
||||
a = 4
|
||||
n = 2
|
||||
validate 1
|
||||
|
||||
aarch64 = uqshrn
|
||||
generate u16:u8, u32:u16, u64:u32
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
name = vqshrn_high
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vqshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 8, 9, 8, 9, 10, 11
|
||||
b = 32, 36, 40, 44, 48, 52, 56, 60
|
||||
n = 2
|
||||
validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = uqshrn2
|
||||
generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
|
||||
|
||||
/// Signed saturating shift right unsigned narrow
|
||||
name = vqshrun
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
a = 0, 4, 8, 12, 16, 20, 24, 28
|
||||
n = 2
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7
|
||||
|
||||
aarch64 = sqshrun
|
||||
link-aarch64 = sqshrun._EXT2_
|
||||
const-aarch64 = N
|
||||
|
||||
arm = vqshrun
|
||||
link-arm = vqshiftnsu._EXT2_
|
||||
const-arm = -N as ttn
|
||||
generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
|
||||
|
||||
/// Signed saturating shift right unsigned narrow
|
||||
name = vqshrun
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_extract, {vqshrun_n-in_ntt-::<N>, {vdupq_n-in_ntt-noext, a}}, 0
|
||||
a = 4
|
||||
n = 2
|
||||
validate 1
|
||||
|
||||
aarch64 = sqshrun
|
||||
generate i16:u8, i32:u16, i64:u32
|
||||
|
||||
/// Signed saturating shift right unsigned narrow
|
||||
name = vqshrun_high
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vqshrun_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 8, 9, 8, 9, 10, 11
|
||||
b = 32, 36, 40, 44, 48, 52, 56, 60
|
||||
n = 2
|
||||
validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = sqshrun2
|
||||
generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
|
||||
|
||||
/// Calculates the square root of each lane.
|
||||
name = vsqrt
|
||||
fn = simd_fsqrt
|
||||
|
||||
@ -135,10 +135,13 @@ fn type_exp_len(t: &str) -> usize {
|
||||
|
||||
fn type_bits_exp_len(t: &str) -> usize {
|
||||
match t {
|
||||
"int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t" => 3,
|
||||
"int16x4_t" | "int16x8_t" | "uint16x4_t" | "uint16x8_t" | "poly16x4_t" | "poly16x8_t" => 4,
|
||||
"int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" => 5,
|
||||
"int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t" => 6,
|
||||
"int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t"
|
||||
| "i8" | "u8" => 3,
|
||||
"int16x4_t" | "int16x8_t" | "uint16x4_t" | "uint16x8_t" | "poly16x4_t" | "poly16x8_t"
|
||||
| "i16" | "u16" => 4,
|
||||
"int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" | "i32" | "u32" => 5,
|
||||
"int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t"
|
||||
| "i64" | "u64" => 6,
|
||||
_ => panic!("unknown type: {}", t),
|
||||
}
|
||||
}
|
||||
@ -219,6 +222,14 @@ fn type_to_n_suffix(t: &str) -> &str {
|
||||
"poly16x8_t" => "q_n_p16",
|
||||
"poly64x1_t" => "_n_p64",
|
||||
"poly64x2_t" => "q_n_p64",
|
||||
"i8" => "b_n_s8",
|
||||
"i16" => "h_n_s16",
|
||||
"i32" => "s_n_s32",
|
||||
"i64" => "d_n_s64",
|
||||
"u8" => "b_n_u8",
|
||||
"u16" => "h_n_u16",
|
||||
"u32" => "s_n_u32",
|
||||
"u64" => "d_n_u64",
|
||||
_ => panic!("unknown type: {}", t),
|
||||
}
|
||||
}
|
||||
@ -262,50 +273,30 @@ fn type_to_lane_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String {
|
||||
str
|
||||
}
|
||||
|
||||
fn type_to_signed_suffix(t: &str) -> &str {
|
||||
fn type_to_signed(t: &str) -> &str {
|
||||
match t {
|
||||
"int8x8_t" | "uint8x8_t" | "poly8x8_t" => "_s8",
|
||||
"int8x16_t" | "uint8x16_t" | "poly8x16_t" => "q_s8",
|
||||
"int16x4_t" | "uint16x4_t" | "poly16x4_t" => "_s16",
|
||||
"int16x8_t" | "uint16x8_t" | "poly16x8_t" => "q_s16",
|
||||
"int32x2_t" | "uint32x2_t" => "_s32",
|
||||
"int32x4_t" | "uint32x4_t" => "q_s32",
|
||||
"int64x1_t" | "uint64x1_t" | "poly64x1_t" => "_s64",
|
||||
"int64x2_t" | "uint64x2_t" | "poly64x2_t" => "q_s64",
|
||||
/*
|
||||
"float16x4_t" => "_f16",
|
||||
"float16x8_t" => "q_f16",
|
||||
"float32x2_t" => "_f32",
|
||||
"float32x4_t" => "q_f32",
|
||||
"float64x1_t" => "_f64",
|
||||
"float64x2_t" => "q_f64",
|
||||
"poly64x1_t" => "_p64",
|
||||
"poly64x2_t" => "q_p64",
|
||||
*/
|
||||
"int8x8_t" | "uint8x8_t" | "poly8x8_t" => "int8x8_t",
|
||||
"int8x16_t" | "uint8x16_t" | "poly8x16_t" => "int8x16_t",
|
||||
"int16x4_t" | "uint16x4_t" | "poly16x4_t" => "int16x4_t",
|
||||
"int16x8_t" | "uint16x8_t" | "poly16x8_t" => "int16x8_t",
|
||||
"int32x2_t" | "uint32x2_t" => "int32x2_t",
|
||||
"int32x4_t" | "uint32x4_t" => "int32x4_t",
|
||||
"int64x1_t" | "uint64x1_t" | "poly64x1_t" => "int64x1_t",
|
||||
"int64x2_t" | "uint64x2_t" | "poly64x2_t" => "int64x2_t",
|
||||
_ => panic!("unknown type: {}", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn type_to_unsigned_suffix(t: &str) -> &str {
|
||||
fn type_to_unsigned(t: &str) -> &str {
|
||||
match t {
|
||||
"int8x8_t" | "uint8x8_t" => "_u8",
|
||||
"int8x16_t" | "uint8x16_t" => "q_u8",
|
||||
"int16x4_t" | "uint16x4_t" => "_u16",
|
||||
"int16x8_t" | "uint16x8_t" => "q_u16",
|
||||
"int32x2_t" | "uint32x2_t" => "_u32",
|
||||
"int32x4_t" | "uint32x4_t" => "q_u32",
|
||||
"int64x1_t" | "uint64x1_t" => "_u64",
|
||||
"int64x2_t" | "uint64x2_t" => "q_u64",
|
||||
/*
|
||||
"float16x4_t" => "_f16",
|
||||
"float16x8_t" => "q_f16",
|
||||
"float32x2_t" => "_f32",
|
||||
"float32x4_t" => "q_f32",
|
||||
"float64x1_t" => "_f64",
|
||||
"float64x2_t" => "q_f64",
|
||||
"poly64x1_t" => "_p64",
|
||||
"poly64x2_t" => "q_p64",
|
||||
*/
|
||||
"int8x8_t" | "uint8x8_t" | "poly8x8_t" => "uint8x8_t",
|
||||
"int8x16_t" | "uint8x16_t" | "poly8x16_t" => "uint8x16_t",
|
||||
"int16x4_t" | "uint16x4_t" | "poly16x4_t" => "uint16x4_t",
|
||||
"int16x8_t" | "uint16x8_t" | "poly16x8_t" => "uint16x8_t",
|
||||
"int32x2_t" | "uint32x2_t" => "uint32x2_t",
|
||||
"int32x4_t" | "uint32x4_t" => "uint32x4_t",
|
||||
"int64x1_t" | "uint64x1_t" | "poly64x1_t" => "uint64x1_t",
|
||||
"int64x2_t" | "uint64x2_t" | "poly64x2_t" => "uint64x2_t",
|
||||
_ => panic!("unknown type: {}", t),
|
||||
}
|
||||
}
|
||||
@ -1834,15 +1825,24 @@ fn get_call(
|
||||
sub_fn.push_str(", ");
|
||||
}
|
||||
sub_fn.push_str(¶ms[i]);
|
||||
if params[i].starts_with('{') {
|
||||
paranthes += 1;
|
||||
}
|
||||
if params[i].ends_with('}') {
|
||||
paranthes -= 1;
|
||||
if paranthes == 0 {
|
||||
let l = params[i].len();
|
||||
for j in 0..l {
|
||||
if ¶ms[i][j..j + 1] == "{" {
|
||||
paranthes += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for j in 0..l {
|
||||
if ¶ms[i][l - j - 1..l - j] == "}" {
|
||||
paranthes -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if paranthes == 0 {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
let sub_call = get_call(
|
||||
@ -1923,9 +1923,9 @@ fn get_call(
|
||||
} else if fn_format[1] == "nself" {
|
||||
fn_name.push_str(type_to_n_suffix(in_t[1]));
|
||||
} else if fn_format[1] == "signed" {
|
||||
fn_name.push_str(type_to_signed_suffix(in_t[1]));
|
||||
fn_name.push_str(type_to_suffix(type_to_signed(in_t[1])));
|
||||
} else if fn_format[1] == "unsigned" {
|
||||
fn_name.push_str(type_to_unsigned_suffix(in_t[1]));
|
||||
fn_name.push_str(type_to_suffix(type_to_unsigned(in_t[1])));
|
||||
} else if fn_format[1] == "doubleself" {
|
||||
fn_name.push_str(&type_to_double_suffixes(out_t, in_t[1]));
|
||||
} else if fn_format[1] == "noq_doubleself" {
|
||||
@ -1941,6 +1941,8 @@ fn get_call(
|
||||
fn_name.push_str(&(type_len(in_t[1]) / 2).to_string());
|
||||
} else if fn_format[1] == "nout" {
|
||||
fn_name.push_str(type_to_n_suffix(out_t));
|
||||
} else if fn_format[1] == "nsigned" {
|
||||
fn_name.push_str(type_to_n_suffix(type_to_signed(in_t[1])));
|
||||
} else if fn_format[1] == "in_ntt" {
|
||||
fn_name.push_str(type_to_suffix(native_type_to_type(in_t[1])));
|
||||
} else if fn_format[1] == "out_ntt" {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user