From e792dfd02cdc979d7178981ca330e3c56a9988c1 Mon Sep 17 00:00:00 2001 From: Sparrow Li Date: Fri, 16 Apr 2021 20:22:39 +0800 Subject: [PATCH] add vqshl, vqshrn, vqshrun neon instructions (#1120) --- .../core_arch/src/aarch64/neon/generated.rs | 621 ++++++++++ .../core_arch/src/arm/neon/generated.rs | 1080 +++++++++++++++++ library/stdarch/crates/stdarch-gen/neon.spec | 240 ++++ .../stdarch/crates/stdarch-gen/src/main.rs | 102 +- 4 files changed, 1993 insertions(+), 50 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 4e10639b902e..78e11e691577 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -3435,6 +3435,338 @@ pub unsafe fn vqrshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> simd_shuffle4(a, vqrshrun_n_s64::(b), [0, 1, 2, 3]) } +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshl))] +pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 { + let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b)); + simd_extract(c, 0) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshl))] +pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 { + let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b)); + simd_extract(c, 0) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshl))] +pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 { + let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b)); + simd_extract(c, 0) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshl))] +pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 { + let c: int64x1_t = vqshl_s64(vdup_n_s64(a), vdup_n_s64(b)); + simd_extract(c, 0) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshl))] +pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 { + let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b)); + simd_extract(c, 0) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshl))] +pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 { + let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b)); + simd_extract(c, 0) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshl))] +pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 { + let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b)); + simd_extract(c, 0) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshl))] +pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 { + let c: uint64x1_t = vqshl_u64(vdup_n_u64(a), vdup_n_s64(b)); + simd_extract(c, 0) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlb_n_s8(a: i8) -> i8 { + static_assert_imm3!(N); + simd_extract(vqshl_n_s8::(vdup_n_s8(a)), 0) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlh_n_s16(a: i16) -> i16 { + static_assert_imm4!(N); + simd_extract(vqshl_n_s16::(vdup_n_s16(a)), 0) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshls_n_s32(a: i32) -> i32 { + static_assert_imm5!(N); + simd_extract(vqshl_n_s32::(vdup_n_s32(a)), 0) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshld_n_s64(a: i64) -> i64 { + static_assert_imm6!(N); + simd_extract(vqshl_n_s64::(vdup_n_s64(a)), 0) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlb_n_u8(a: u8) -> u8 { + static_assert_imm3!(N); + simd_extract(vqshl_n_u8::(vdup_n_u8(a)), 0) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlh_n_u16(a: u16) -> u16 { + static_assert_imm4!(N); + simd_extract(vqshl_n_u16::(vdup_n_u16(a)), 0) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshls_n_u32(a: u32) -> u32 { + static_assert_imm5!(N); + simd_extract(vqshl_n_u32::(vdup_n_u32(a)), 0) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshld_n_u64(a: u64) -> u64 { + static_assert_imm6!(N); + simd_extract(vqshl_n_u64::(vdup_n_u64(a)), 0) +} + +/// Signed saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrnh_n_s16(a: i16) -> i8 { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_extract(vqshrn_n_s16::(vdupq_n_s16(a)), 0) +} + +/// Signed saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrns_n_s32(a: i32) -> i16 { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_extract(vqshrn_n_s32::(vdupq_n_s32(a)), 0) +} + +/// Signed saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrnd_n_s64(a: i64) -> i32 { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_extract(vqshrn_n_s64::(vdupq_n_s64(a)), 0) +} + +/// Signed saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shuffle16(a, vqshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Signed saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shuffle8(a, vqshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Signed saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shuffle4(a, vqshrn_n_s64::(b), [0, 1, 2, 3]) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrnh_n_u16(a: u16) -> u8 { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_extract(vqshrn_n_u16::(vdupq_n_u16(a)), 0) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrns_n_u32(a: u32) -> u16 { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_extract(vqshrn_n_u32::(vdupq_n_u32(a)), 0) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrnd_n_u64(a: u64) -> u32 { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_extract(vqshrn_n_u64::(vdupq_n_u64(a)), 0) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shuffle16(a, vqshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shuffle8(a, vqshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shuffle4(a, vqshrn_n_u64::(b), [0, 1, 2, 3]) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrunh_n_s16(a: i16) -> u8 { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_extract(vqshrun_n_s16::(vdupq_n_s16(a)), 0) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshruns_n_s32(a: i32) -> u16 { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_extract(vqshrun_n_s32::(vdupq_n_s32(a)), 0) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrund_n_s64(a: i64) -> u32 { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_extract(vqshrun_n_s64::(vdupq_n_s64(a)), 0) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrun2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqshrun_high_n_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shuffle16(a, vqshrun_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrun2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqshrun_high_n_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shuffle8(a, vqshrun_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrun2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shuffle4(a, vqshrun_n_s64::(b), [0, 1, 2, 3]) +} + /// Calculates the square root of each lane. #[inline] #[target_feature(enable = "neon")] @@ -8413,6 +8745,295 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqshlb_s8() { + let a: i8 = 1; + let b: i8 = 2; + let e: i8 = 4; + let r: i8 = transmute(vqshlb_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlh_s16() { + let a: i16 = 1; + let b: i16 = 2; + let e: i16 = 4; + let r: i16 = transmute(vqshlh_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshls_s32() { + let a: i32 = 1; + let b: i32 = 2; + let e: i32 = 4; + let r: i32 = transmute(vqshls_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshld_s64() { + let a: i64 = 1; + let b: i64 = 2; + let e: i64 = 4; + let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlb_u8() { + let a: u8 = 1; + let b: i8 = 2; + let e: u8 = 4; + let r: u8 = transmute(vqshlb_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlh_u16() { + let a: u16 = 1; + let b: i16 = 2; + let e: u16 = 4; + let r: u16 = transmute(vqshlh_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshls_u32() { + let a: u32 = 1; + let b: i32 = 2; + let e: u32 = 4; + let r: u32 = transmute(vqshls_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshld_u64() { + let a: u64 = 1; + let b: i64 = 2; + let e: u64 = 4; + let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlb_n_s8() { + let a: i8 = 1; + let e: i8 = 4; + let r: i8 = transmute(vqshlb_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlh_n_s16() { + let a: i16 = 1; + let e: i16 = 4; + let r: i16 = transmute(vqshlh_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshls_n_s32() { + let a: i32 = 1; + let e: i32 = 4; + let r: i32 = transmute(vqshls_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshld_n_s64() { + let a: i64 = 1; + let e: i64 = 4; + let r: i64 = transmute(vqshld_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlb_n_u8() { + let a: u8 = 1; + let e: u8 = 4; + let r: u8 = transmute(vqshlb_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlh_n_u16() { + let a: u16 = 1; + let e: u16 = 4; + let r: u16 = transmute(vqshlh_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshls_n_u32() { + let a: u32 = 1; + let e: u32 = 4; + let r: u32 = transmute(vqshls_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshld_n_u64() { + let a: u64 = 1; + let e: u64 = 4; + let r: u64 = transmute(vqshld_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrnh_n_s16() { + let a: i16 = 4; + let e: i8 = 1; + let r: i8 = transmute(vqshrnh_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrns_n_s32() { + let a: i32 = 4; + let e: i16 = 1; + let r: i16 = transmute(vqshrns_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrnd_n_s64() { + let a: i64 = 4; + let e: i32 = 1; + let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_high_n_s16() { + let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11); + let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60); + let e: i8x16 = i8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15); + let r: i8x16 = transmute(vqshrn_high_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_high_n_s32() { + let a: i16x4 = i16x4::new(0, 1, 8, 9); + let b: i32x4 = i32x4::new(32, 36, 40, 44); + let e: i16x8 = i16x8::new(0, 1, 8, 9, 8, 9, 10, 11); + let r: i16x8 = transmute(vqshrn_high_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_high_n_s64() { + let a: i32x2 = i32x2::new(0, 1); + let b: i64x2 = i64x2::new(32, 36); + let e: i32x4 = i32x4::new(0, 1, 8, 9); + let r: i32x4 = transmute(vqshrn_high_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrnh_n_u16() { + let a: u16 = 4; + let e: u8 = 1; + let r: u8 = transmute(vqshrnh_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrns_n_u32() { + let a: u32 = 4; + let e: u16 = 1; + let r: u16 = transmute(vqshrns_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrnd_n_u64() { + let a: u64 = 4; + let e: u32 = 1; + let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_high_n_u16() { + let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11); + let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60); + let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15); + let r: u8x16 = transmute(vqshrn_high_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_high_n_u32() { + let a: u16x4 = u16x4::new(0, 1, 8, 9); + let b: u32x4 = u32x4::new(32, 36, 40, 44); + let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11); + let r: u16x8 = transmute(vqshrn_high_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_high_n_u64() { + let a: u32x2 = u32x2::new(0, 1); + let b: u64x2 = u64x2::new(32, 36); + let e: u32x4 = u32x4::new(0, 1, 8, 9); + let r: u32x4 = transmute(vqshrn_high_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrunh_n_s16() { + let a: i16 = 4; + let e: u8 = 1; + let r: u8 = transmute(vqshrunh_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshruns_n_s32() { + let a: i32 = 4; + let e: u16 = 1; + let r: u16 = transmute(vqshruns_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrund_n_s64() { + let a: i64 = 4; + let e: u32 = 1; + let r: u32 = transmute(vqshrund_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrun_high_n_s16() { + let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11); + let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60); + let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15); + let r: u8x16 = transmute(vqshrun_high_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrun_high_n_s32() { + let a: u16x4 = u16x4::new(0, 1, 8, 9); + let b: i32x4 = i32x4::new(32, 36, 40, 44); + let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11); + let r: u16x8 = transmute(vqshrun_high_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrun_high_n_s64() { + let a: u32x2 = u32x2::new(0, 1); + let b: i64x2 = i64x2::new(32, 36); + let e: u32x4 = u32x4::new(0, 1, 8, 9); + let r: u32x4 = transmute(vqshrun_high_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vsqrt_f32() { let a: f32x2 = f32x2::new(4.0, 9.0); diff --git a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs index 195a3121c3c9..bc98607e8b42 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs @@ -6430,6 +6430,742 @@ pub unsafe fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { vqrshrun_n_s64_(a, N) } +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i8")] + fn vqshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqshl_s8_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v16i8")] + fn vqshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqshlq_s8_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i16")] + fn vqshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqshl_s16_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i16")] + fn vqshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqshlq_s16_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i32")] + fn vqshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqshl_s32_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i32")] + fn vqshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqshlq_s32_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v1i64")] + fn vqshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vqshl_s64_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i64")] + fn vqshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vqshlq_s64_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i8")] + fn vqshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vqshl_u8_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v16i8")] + fn vqshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vqshlq_u8_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i16")] + fn vqshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vqshl_u16_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i16")] + fn vqshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vqshlq_u16_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i32")] + fn vqshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vqshl_u32_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i32")] + fn vqshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vqshlq_u32_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v1i64")] + fn vqshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vqshl_u64_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i64")] + fn vqshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vqshlq_u64_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + vqshl_s8(a, vdup_n_s8(N.try_into().unwrap())) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_imm3!(N); + vqshlq_s8(a, vdupq_n_s8(N.try_into().unwrap())) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_imm4!(N); + vqshl_s16(a, vdup_n_s16(N.try_into().unwrap())) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_imm4!(N); + vqshlq_s16(a, vdupq_n_s16(N.try_into().unwrap())) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_imm5!(N); + vqshl_s32(a, vdup_n_s32(N.try_into().unwrap())) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_imm5!(N); + vqshlq_s32(a, vdupq_n_s32(N.try_into().unwrap())) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_imm6!(N); + vqshl_s64(a, vdup_n_s64(N.try_into().unwrap())) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_imm6!(N); + vqshlq_s64(a, vdupq_n_s64(N.try_into().unwrap())) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + vqshl_u8(a, vdup_n_s8(N.try_into().unwrap())) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + vqshlq_u8(a, vdupq_n_s8(N.try_into().unwrap())) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + vqshl_u16(a, vdup_n_s16(N.try_into().unwrap())) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + vqshlq_u16(a, vdupq_n_s16(N.try_into().unwrap())) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_imm5!(N); + vqshl_u32(a, vdup_n_s32(N.try_into().unwrap())) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_imm5!(N); + vqshlq_u32(a, vdupq_n_s32(N.try_into().unwrap())) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_imm6!(N); + vqshl_u64(a, vdup_n_s64(N.try_into().unwrap())) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_imm6!(N); + vqshlq_u64(a, vdupq_n_s64(N.try_into().unwrap())) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")] + fn vqshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } +vqshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v8i8")] + fn vqshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; + } +vqshrn_n_s16_(a, N) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")] + fn vqshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } +vqshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v4i16")] + fn vqshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; + } +vqshrn_n_s32_(a, N) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")] + fn vqshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } +vqshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v2i32")] + fn vqshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; + } +vqshrn_n_s64_(a, N) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")] + fn vqshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } +vqshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16)) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v8i8")] + fn vqshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t; + } +vqshrn_n_u16_(a, N) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")] + fn vqshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } +vqshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32)) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v4i16")] + fn vqshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t; + } +vqshrn_n_u32_(a, N) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")] + fn vqshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } +vqshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64)) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v2i32")] + fn vqshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t; + } +vqshrn_n_u64_(a, N) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")] + fn vqshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } +vqshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v8i8")] + fn vqshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t; + } +vqshrun_n_s16_(a, N) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")] + fn vqshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } +vqshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v4i16")] + fn vqshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t; + } +vqshrun_n_s32_(a, N) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")] + fn vqshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } +vqshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v2i32")] + fn vqshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t; + } +vqshrun_n_s64_(a, N) +} + /// Reciprocal square-root estimate. #[inline] #[target_feature(enable = "neon")] @@ -14234,6 +14970,350 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x8 = i8x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: i8x8 = transmute(vqshl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x16 = i8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: i8x16 = transmute(vqshlq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i16x4 = i16x4::new(0, 4, 8, 12); + let r: i16x4 = transmute(vqshl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: i16x8 = transmute(vqshlq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let e: i32x2 = i32x2::new(0, 4); + let r: i32x2 = transmute(vqshl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(0, 4, 8, 12); + let r: i32x4 = transmute(vqshlq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_s64() { + let a: i64x1 = i64x1::new(0); + let b: i64x1 = i64x1::new(2); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vqshl_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_s64() { + let a: i64x2 = i64x2::new(0, 1); + let b: i64x2 = i64x2::new(2, 2); + let e: i64x2 = i64x2::new(0, 4); + let r: i64x2 = transmute(vqshlq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x8 = u8x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u8x8 = transmute(vqshl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x16 = u8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: u8x16 = transmute(vqshlq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: u16x4 = u16x4::new(0, 4, 8, 12); + let r: u16x4 = transmute(vqshl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u16x8 = transmute(vqshlq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let e: u32x2 = u32x2::new(0, 4); + let r: u32x2 = transmute(vqshl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: u32x4 = u32x4::new(0, 4, 8, 12); + let r: u32x4 = transmute(vqshlq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_u64() { + let a: u64x1 = u64x1::new(0); + let b: i64x1 = i64x1::new(2); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vqshl_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_u64() { + let a: u64x2 = u64x2::new(0, 1); + let b: i64x2 = i64x2::new(2, 2); + let e: u64x2 = u64x2::new(0, 4); + let r: u64x2 = transmute(vqshlq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x8 = i8x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: i8x8 = transmute(vqshl_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i8x16 = i8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: i8x16 = transmute(vqshlq_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: i16x4 = i16x4::new(0, 4, 8, 12); + let r: i16x4 = transmute(vqshl_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: i16x8 = transmute(vqshlq_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: i32x2 = i32x2::new(0, 4); + let r: i32x2 = transmute(vqshl_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: i32x4 = i32x4::new(0, 4, 8, 12); + let r: i32x4 = transmute(vqshlq_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_s64() { + let a: i64x1 = i64x1::new(0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vqshl_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i64x2 = i64x2::new(0, 4); + let r: i64x2 = transmute(vqshlq_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u8x8 = transmute(vqshl_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: u8x16 = transmute(vqshlq_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0, 4, 8, 12); + let r: u16x4 = transmute(vqshl_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u16x8 = transmute(vqshlq_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_u32() { + let a: u32x2 = u32x2::new(0, 1); + let e: u32x2 = u32x2::new(0, 4); + let r: u32x2 = transmute(vqshl_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0, 4, 8, 12); + let r: u32x4 = transmute(vqshlq_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_u64() { + let a: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vqshl_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: u64x2 = u64x2::new(0, 4); + let r: u64x2 = transmute(vqshlq_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_s16() { + let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vqshrn_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_s32() { + let a: i32x4 = i32x4::new(0, 4, 8, 12); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vqshrn_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_s64() { + let a: i64x2 = i64x2::new(0, 4); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vqshrn_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_u16() { + let a: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vqshrn_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_u32() { + let a: u32x4 = u32x4::new(0, 4, 8, 12); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vqshrn_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_u64() { + let a: u64x2 = u64x2::new(0, 4); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vqshrn_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrun_n_s16() { + let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vqshrun_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrun_n_s32() { + let a: i32x4 = i32x4::new(0, 4, 8, 12); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vqshrun_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrun_n_s64() { + let a: i64x2 = i64x2::new(0, 4); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vqshrun_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vrsqrte_f32() { let a: f32x2 = f32x2::new(1.0, 2.0); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index 33418b012209..bc87effb7f7d 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -2198,6 +2198,246 @@ validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15 aarch64 = sqrshrun2 generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t +/// Signed saturating shift left +name = vqshl +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 + +aarch64 = sqshl +link-aarch64 = sqshl._EXT_ + +arm = vqshl +link-arm = vqshifts._EXT_ +generate int*_t, int64x*_t + +/// Signed saturating shift left +name = vqshl +multi_fn = vqshl-in_ntt-noext, c:in_ntt, {vdup_n-in_ntt-noext, a}, {vdup_n-in_ntt-noext, b} +multi_fn = simd_extract, c, 0 +a = 1 +b = 2 +validate 4 + +aarch64 = sqshl +generate i8, i16, i32, i64 + +/// Unsigned saturating shift left +name = vqshl +out-suffix +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 + +aarch64 = uqshl +link-aarch64 = uqshl._EXT_ + +arm = vqshl +link-arm = vqshiftu._EXT_ +generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t +generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t + +/// Unsigned saturating shift left +name = vqshl +out-suffix +multi_fn = vqshl-out_ntt-noext, c:out_ntt, {vdup_n-out_ntt-noext, a}, {vdup_n-in_ntt-noext, b} +multi_fn = simd_extract, c, 0 +a = 1 +b = 2 +validate 4 + +aarch64 = uqshl +generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64 + +/// Signed saturating shift left +name = vqshl +n-suffix +constn = N +multi_fn = static_assert_imm-out_bits_exp_len-N +multi_fn = vqshl-self-noext, a, {vdup-nself-noext, N.try_into().unwrap()} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +n = 2 +validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 + +aarch64 = sqshl +arm = vqshl +generate int*_t, int64x*_t + +/// Signed saturating shift left +name = vqshl +n-suffix +constn = N +multi_fn = static_assert_imm-out_bits_exp_len-N +multi_fn = simd_extract, {vqshl_n-in_ntt-::, {vdup_n-in_ntt-noext, a}}, 0 +a = 1 +n = 2 +validate 4 + +aarch64 = sqshl +generate i8, i16, i32, i64 + +/// Unsigned saturating shift left +name = vqshl +n-suffix +constn = N +multi_fn = static_assert_imm-out_bits_exp_len-N +multi_fn = vqshl-self-noext, a, {vdup-nsigned-noext, N.try_into().unwrap()} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +n = 2 +validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 + +aarch64 = uqshl +arm = vqshl +generate uint*_t, uint64x*_t + +/// Unsigned saturating shift left +name = vqshl +n-suffix +constn = N +multi_fn = static_assert_imm-out_bits_exp_len-N +multi_fn = simd_extract, {vqshl_n-in_ntt-::, {vdup_n-in_ntt-noext, a}}, 0 +a = 1 +n = 2 +validate 4 + +aarch64 = uqshl +generate u8, u16, u32, u64 + +/// Signed saturating shift right narrow +name = vqshrn +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +a = 0, 4, 8, 12, 16, 20, 24, 28 +n = 2 +validate 0, 1, 2, 3, 4, 5, 6, 7 + +aarch64 = sqshrn +link-aarch64 = sqshrn._EXT2_ +const-aarch64 = N + +arm = vqshrn +link-arm = vqshiftns._EXT2_ +const-arm = -N as ttn +generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t + +/// Signed saturating shift right narrow +name = vqshrn +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_extract, {vqshrn_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 +a = 4 +n = 2 +validate 1 + +aarch64 = sqshrn +generate i16:i8, i32:i16, i64:i32 + +/// Signed saturating shift right narrow +name = vqshrn_high +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_shuffle-out_len-noext, a, {vqshrn_n-noqself-::, b}, {asc-0-out_len} +a = 0, 1, 8, 9, 8, 9, 10, 11 +b = 32, 36, 40, 44, 48, 52, 56, 60 +n = 2 +validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = sqshrn2 +generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t + +/// Unsigned saturating shift right narrow +name = vqshrn +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +a = 0, 4, 8, 12, 16, 20, 24, 28 +n = 2 +validate 0, 1, 2, 3, 4, 5, 6, 7 + +aarch64 = uqshrn +link-aarch64 = uqshrn._EXT2_ +const-aarch64 = N + +arm = vqshrn +link-arm = vqshiftnu._EXT2_ +const-arm = -N as ttn +generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t + +/// Unsigned saturating shift right narrow +name = vqshrn +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_extract, {vqshrn_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 +a = 4 +n = 2 +validate 1 + +aarch64 = uqshrn +generate u16:u8, u32:u16, u64:u32 + +/// Unsigned saturating shift right narrow +name = vqshrn_high +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_shuffle-out_len-noext, a, {vqshrn_n-noqself-::, b}, {asc-0-out_len} +a = 0, 1, 8, 9, 8, 9, 10, 11 +b = 32, 36, 40, 44, 48, 52, 56, 60 +n = 2 +validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = uqshrn2 +generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t + +/// Signed saturating shift right unsigned narrow +name = vqshrun +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +a = 0, 4, 8, 12, 16, 20, 24, 28 +n = 2 +validate 0, 1, 2, 3, 4, 5, 6, 7 + +aarch64 = sqshrun +link-aarch64 = sqshrun._EXT2_ +const-aarch64 = N + +arm = vqshrun +link-arm = vqshiftnsu._EXT2_ +const-arm = -N as ttn +generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t + +/// Signed saturating shift right unsigned narrow +name = vqshrun +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_extract, {vqshrun_n-in_ntt-::, {vdupq_n-in_ntt-noext, a}}, 0 +a = 4 +n = 2 +validate 1 + +aarch64 = sqshrun +generate i16:u8, i32:u16, i64:u32 + +/// Signed saturating shift right unsigned narrow +name = vqshrun_high +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_shuffle-out_len-noext, a, {vqshrun_n-noqself-::, b}, {asc-0-out_len} +a = 0, 1, 8, 9, 8, 9, 10, 11 +b = 32, 36, 40, 44, 48, 52, 56, 60 +n = 2 +validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = sqshrun2 +generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t + /// Calculates the square root of each lane. name = vsqrt fn = simd_fsqrt diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index 5ba466bd735f..155c898a41ea 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -135,10 +135,13 @@ fn type_exp_len(t: &str) -> usize { fn type_bits_exp_len(t: &str) -> usize { match t { - "int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t" => 3, - "int16x4_t" | "int16x8_t" | "uint16x4_t" | "uint16x8_t" | "poly16x4_t" | "poly16x8_t" => 4, - "int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" => 5, - "int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t" => 6, + "int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t" + | "i8" | "u8" => 3, + "int16x4_t" | "int16x8_t" | "uint16x4_t" | "uint16x8_t" | "poly16x4_t" | "poly16x8_t" + | "i16" | "u16" => 4, + "int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" | "i32" | "u32" => 5, + "int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t" + | "i64" | "u64" => 6, _ => panic!("unknown type: {}", t), } } @@ -219,6 +222,14 @@ fn type_to_n_suffix(t: &str) -> &str { "poly16x8_t" => "q_n_p16", "poly64x1_t" => "_n_p64", "poly64x2_t" => "q_n_p64", + "i8" => "b_n_s8", + "i16" => "h_n_s16", + "i32" => "s_n_s32", + "i64" => "d_n_s64", + "u8" => "b_n_u8", + "u16" => "h_n_u16", + "u32" => "s_n_u32", + "u64" => "d_n_u64", _ => panic!("unknown type: {}", t), } } @@ -262,50 +273,30 @@ fn type_to_lane_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String { str } -fn type_to_signed_suffix(t: &str) -> &str { +fn type_to_signed(t: &str) -> &str { match t { - "int8x8_t" | "uint8x8_t" | "poly8x8_t" => "_s8", - "int8x16_t" | "uint8x16_t" | "poly8x16_t" => "q_s8", - "int16x4_t" | "uint16x4_t" | "poly16x4_t" => "_s16", - "int16x8_t" | "uint16x8_t" | "poly16x8_t" => "q_s16", - "int32x2_t" | "uint32x2_t" => "_s32", - "int32x4_t" | "uint32x4_t" => "q_s32", - "int64x1_t" | "uint64x1_t" | "poly64x1_t" => "_s64", - "int64x2_t" | "uint64x2_t" | "poly64x2_t" => "q_s64", - /* - "float16x4_t" => "_f16", - "float16x8_t" => "q_f16", - "float32x2_t" => "_f32", - "float32x4_t" => "q_f32", - "float64x1_t" => "_f64", - "float64x2_t" => "q_f64", - "poly64x1_t" => "_p64", - "poly64x2_t" => "q_p64", - */ + "int8x8_t" | "uint8x8_t" | "poly8x8_t" => "int8x8_t", + "int8x16_t" | "uint8x16_t" | "poly8x16_t" => "int8x16_t", + "int16x4_t" | "uint16x4_t" | "poly16x4_t" => "int16x4_t", + "int16x8_t" | "uint16x8_t" | "poly16x8_t" => "int16x8_t", + "int32x2_t" | "uint32x2_t" => "int32x2_t", + "int32x4_t" | "uint32x4_t" => "int32x4_t", + "int64x1_t" | "uint64x1_t" | "poly64x1_t" => "int64x1_t", + "int64x2_t" | "uint64x2_t" | "poly64x2_t" => "int64x2_t", _ => panic!("unknown type: {}", t), } } -fn type_to_unsigned_suffix(t: &str) -> &str { +fn type_to_unsigned(t: &str) -> &str { match t { - "int8x8_t" | "uint8x8_t" => "_u8", - "int8x16_t" | "uint8x16_t" => "q_u8", - "int16x4_t" | "uint16x4_t" => "_u16", - "int16x8_t" | "uint16x8_t" => "q_u16", - "int32x2_t" | "uint32x2_t" => "_u32", - "int32x4_t" | "uint32x4_t" => "q_u32", - "int64x1_t" | "uint64x1_t" => "_u64", - "int64x2_t" | "uint64x2_t" => "q_u64", - /* - "float16x4_t" => "_f16", - "float16x8_t" => "q_f16", - "float32x2_t" => "_f32", - "float32x4_t" => "q_f32", - "float64x1_t" => "_f64", - "float64x2_t" => "q_f64", - "poly64x1_t" => "_p64", - "poly64x2_t" => "q_p64", - */ + "int8x8_t" | "uint8x8_t" | "poly8x8_t" => "uint8x8_t", + "int8x16_t" | "uint8x16_t" | "poly8x16_t" => "uint8x16_t", + "int16x4_t" | "uint16x4_t" | "poly16x4_t" => "uint16x4_t", + "int16x8_t" | "uint16x8_t" | "poly16x8_t" => "uint16x8_t", + "int32x2_t" | "uint32x2_t" => "uint32x2_t", + "int32x4_t" | "uint32x4_t" => "uint32x4_t", + "int64x1_t" | "uint64x1_t" | "poly64x1_t" => "uint64x1_t", + "int64x2_t" | "uint64x2_t" | "poly64x2_t" => "uint64x2_t", _ => panic!("unknown type: {}", t), } } @@ -1834,15 +1825,24 @@ fn get_call( sub_fn.push_str(", "); } sub_fn.push_str(¶ms[i]); - if params[i].starts_with('{') { - paranthes += 1; - } - if params[i].ends_with('}') { - paranthes -= 1; - if paranthes == 0 { + let l = params[i].len(); + for j in 0..l { + if ¶ms[i][j..j + 1] == "{" { + paranthes += 1; + } else { break; } } + for j in 0..l { + if ¶ms[i][l - j - 1..l - j] == "}" { + paranthes -= 1; + } else { + break; + } + } + if paranthes == 0 { + break; + } i += 1; } let sub_call = get_call( @@ -1923,9 +1923,9 @@ fn get_call( } else if fn_format[1] == "nself" { fn_name.push_str(type_to_n_suffix(in_t[1])); } else if fn_format[1] == "signed" { - fn_name.push_str(type_to_signed_suffix(in_t[1])); + fn_name.push_str(type_to_suffix(type_to_signed(in_t[1]))); } else if fn_format[1] == "unsigned" { - fn_name.push_str(type_to_unsigned_suffix(in_t[1])); + fn_name.push_str(type_to_suffix(type_to_unsigned(in_t[1]))); } else if fn_format[1] == "doubleself" { fn_name.push_str(&type_to_double_suffixes(out_t, in_t[1])); } else if fn_format[1] == "noq_doubleself" { @@ -1941,6 +1941,8 @@ fn get_call( fn_name.push_str(&(type_len(in_t[1]) / 2).to_string()); } else if fn_format[1] == "nout" { fn_name.push_str(type_to_n_suffix(out_t)); + } else if fn_format[1] == "nsigned" { + fn_name.push_str(type_to_n_suffix(type_to_signed(in_t[1]))); } else if fn_format[1] == "in_ntt" { fn_name.push_str(type_to_suffix(native_type_to_type(in_t[1]))); } else if fn_format[1] == "out_ntt" {