2018-05-06 11:57:37 -05:00

807 lines
32 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Fused Multiply-Add instruction set (FMA)
//!
//! The FMA instruction set is an extension to the 128 and 256-bit SSE
//! instructions in the x86 microprocessor instruction set to perform fused
//! multiplyadd (FMA) operations.
//!
//! The references are:
//!
//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
//! Instruction Set Reference, A-Z][intel64_ref].
//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
//! System Instructions][amd64_ref].
//!
//! Wikipedia's [FMA][wiki_fma] page provides a quick overview of the
//! instructions available.
//!
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
use coresimd::x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and add the intermediate result to packed elements in `c`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
vfmaddpd(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and add the intermediate result to packed elements in `c`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
vfmaddpd256(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and add the intermediate result to packed elements in `c`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
vfmaddps(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and add the intermediate result to packed elements in `c`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
vfmaddps256(a, b, c)
}
/// Multiply the lower double-precision (64-bit) floating-point elements in
/// `a` and `b`, and add the intermediate result to the lower element in `c`.
/// Store the result in the lower element of the returned value, and copy the
/// upper element from `a` to the upper elements of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd213sd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
vfmaddsd(a, b, c)
}
/// Multiply the lower single-precision (32-bit) floating-point elements in
/// `a` and `b`, and add the intermediate result to the lower element in `c`.
/// Store the result in the lower element of the returned value, and copy the
/// 3 upper elements from `a` to the upper elements of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ss)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmadd213ss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
vfmaddss(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and alternatively add and subtract packed elements in `c` to/from
/// the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmaddsub213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
vfmaddsubpd(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and alternatively add and subtract packed elements in `c` to/from
/// the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmaddsub213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmaddsub_pd(
a: __m256d, b: __m256d, c: __m256d,
) -> __m256d {
vfmaddsubpd256(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and alternatively add and subtract packed elements in `c` to/from
/// the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmaddsub213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
vfmaddsubps(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and alternatively add and subtract packed elements in `c` to/from
/// the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmaddsub213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
vfmaddsubps256(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and subtract packed elements in `c` from the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
vfmsubpd(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and subtract packed elements in `c` from the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
vfmsubpd256(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and subtract packed elements in `c` from the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
vfmsubps(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and subtract packed elements in `c` from the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
vfmsubps256(a, b, c)
}
/// Multiply the lower double-precision (64-bit) floating-point elements in
/// `a` and `b`, and subtract the lower element in `c` from the intermediate
/// result. Store the result in the lower element of the returned value, and
/// copy the upper element from `a` to the upper elements of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub213sd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
vfmsubsd(a, b, c)
}
/// Multiply the lower single-precision (32-bit) floating-point elements in
/// `a` and `b`, and subtract the lower element in `c` from the intermediate
/// result. Store the result in the lower element of the returned value, and
/// copy the 3 upper elements from `a` to the upper elements of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ss)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsub213ss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
vfmsubss(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and alternatively subtract and add packed elements in `c` from/to
/// the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsubadd213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
vfmsubaddpd(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and alternatively subtract and add packed elements in `c` from/to
/// the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsubadd213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmsubadd_pd(
a: __m256d, b: __m256d, c: __m256d,
) -> __m256d {
vfmsubaddpd256(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and alternatively subtract and add packed elements in `c` from/to
/// the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsubadd213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
vfmsubaddps(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and alternatively subtract and add packed elements in `c` from/to
/// the intermediate result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfmsubadd213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
vfmsubaddps256(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and add the negated intermediate result to packed elements in `c`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
vfnmaddpd(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and add the negated intermediate result to packed elements in `c`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
vfnmaddpd256(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and add the negated intermediate result to packed elements in `c`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
vfnmaddps(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and add the negated intermediate result to packed elements in `c`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
vfnmaddps256(a, b, c)
}
/// Multiply the lower double-precision (64-bit) floating-point elements in
/// `a` and `b`, and add the negated intermediate result to the lower element
/// in `c`. Store the result in the lower element of the returned value, and
/// copy the upper element from `a` to the upper elements of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd213sd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
vfnmaddsd(a, b, c)
}
/// Multiply the lower single-precision (32-bit) floating-point elements in
/// `a` and `b`, and add the negated intermediate result to the lower element
/// in `c`. Store the result in the lower element of the returned value, and
/// copy the 3 upper elements from `a` to the upper elements of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ss)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmadd213ss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
vfnmaddss(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and subtract packed elements in `c` from the negated intermediate
/// result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
vfnmsubpd(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in `a`
/// and `b`, and subtract packed elements in `c` from the negated intermediate
/// result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_pd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub213pd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
vfnmsubpd256(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and subtract packed elements in `c` from the negated intermediate
/// result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
vfnmsubps(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in `a`
/// and `b`, and subtract packed elements in `c` from the negated intermediate
/// result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ps)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub213ps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
vfnmsubps256(a, b, c)
}
/// Multiply the lower double-precision (64-bit) floating-point elements in
/// `a` and `b`, and subtract packed elements in `c` from the negated
/// intermediate result. Store the result in the lower element of the returned
/// value, and copy the upper element from `a` to the upper elements of the
/// result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sd)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub213sd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
vfnmsubsd(a, b, c)
}
/// Multiply the lower single-precision (32-bit) floating-point elements in
/// `a` and `b`, and subtract packed elements in `c` from the negated
/// intermediate result. Store the result in the lower element of the
/// returned value, and copy the 3 upper elements from `a` to the upper
/// elements of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ss)
#[inline]
#[target_feature(enable = "fma")]
#[cfg_attr(test, assert_instr(vfnmsub213ss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
vfnmsubss(a, b, c)
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.fma.vfmadd.pd"]
fn vfmaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
#[link_name = "llvm.x86.fma.vfmadd.pd.256"]
fn vfmaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
#[link_name = "llvm.x86.fma.vfmadd.ps"]
fn vfmaddps(a: __m128, b: __m128, c: __m128) -> __m128;
#[link_name = "llvm.x86.fma.vfmadd.ps.256"]
fn vfmaddps256(a: __m256, b: __m256, c: __m256) -> __m256;
#[link_name = "llvm.x86.fma.vfmadd.sd"]
fn vfmaddsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
#[link_name = "llvm.x86.fma.vfmadd.ss"]
fn vfmaddss(a: __m128, b: __m128, c: __m128) -> __m128;
#[link_name = "llvm.x86.fma.vfmaddsub.pd"]
fn vfmaddsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
#[link_name = "llvm.x86.fma.vfmaddsub.pd.256"]
fn vfmaddsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
#[link_name = "llvm.x86.fma.vfmaddsub.ps"]
fn vfmaddsubps(a: __m128, b: __m128, c: __m128) -> __m128;
#[link_name = "llvm.x86.fma.vfmaddsub.ps.256"]
fn vfmaddsubps256(a: __m256, b: __m256, c: __m256) -> __m256;
#[link_name = "llvm.x86.fma.vfmsub.pd"]
fn vfmsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
#[link_name = "llvm.x86.fma.vfmsub.pd.256"]
fn vfmsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
#[link_name = "llvm.x86.fma.vfmsub.ps"]
fn vfmsubps(a: __m128, b: __m128, c: __m128) -> __m128;
#[link_name = "llvm.x86.fma.vfmsub.ps.256"]
fn vfmsubps256(a: __m256, b: __m256, c: __m256) -> __m256;
#[link_name = "llvm.x86.fma.vfmsub.sd"]
fn vfmsubsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
#[link_name = "llvm.x86.fma.vfmsub.ss"]
fn vfmsubss(a: __m128, b: __m128, c: __m128) -> __m128;
#[link_name = "llvm.x86.fma.vfmsubadd.pd"]
fn vfmsubaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
#[link_name = "llvm.x86.fma.vfmsubadd.pd.256"]
fn vfmsubaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
#[link_name = "llvm.x86.fma.vfmsubadd.ps"]
fn vfmsubaddps(a: __m128, b: __m128, c: __m128) -> __m128;
#[link_name = "llvm.x86.fma.vfmsubadd.ps.256"]
fn vfmsubaddps256(a: __m256, b: __m256, c: __m256) -> __m256;
#[link_name = "llvm.x86.fma.vfnmadd.pd"]
fn vfnmaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
#[link_name = "llvm.x86.fma.vfnmadd.pd.256"]
fn vfnmaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
#[link_name = "llvm.x86.fma.vfnmadd.ps"]
fn vfnmaddps(a: __m128, b: __m128, c: __m128) -> __m128;
#[link_name = "llvm.x86.fma.vfnmadd.ps.256"]
fn vfnmaddps256(a: __m256, b: __m256, c: __m256) -> __m256;
#[link_name = "llvm.x86.fma.vfnmadd.sd"]
fn vfnmaddsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
#[link_name = "llvm.x86.fma.vfnmadd.ss"]
fn vfnmaddss(a: __m128, b: __m128, c: __m128) -> __m128;
#[link_name = "llvm.x86.fma.vfnmsub.pd"]
fn vfnmsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
#[link_name = "llvm.x86.fma.vfnmsub.pd.256"]
fn vfnmsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
#[link_name = "llvm.x86.fma.vfnmsub.ps"]
fn vfnmsubps(a: __m128, b: __m128, c: __m128) -> __m128;
#[link_name = "llvm.x86.fma.vfnmsub.ps.256"]
fn vfnmsubps256(a: __m256, b: __m256, c: __m256) -> __m256;
#[link_name = "llvm.x86.fma.vfnmsub.sd"]
fn vfnmsubsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d;
#[link_name = "llvm.x86.fma.vfnmsub.ss"]
fn vfnmsubss(a: __m128, b: __m128, c: __m128) -> __m128;
}
#[cfg(test)]
mod tests {
use std;
use stdsimd_test::simd_test;
use coresimd::x86::*;
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmadd_pd() {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(5., 3.);
let c = _mm_setr_pd(4., 9.);
let r = _mm_setr_pd(9., 15.);
assert_eq_m128d(_mm_fmadd_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fmadd_pd() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let b = _mm256_setr_pd(5., 3., 7., 2.);
let c = _mm256_setr_pd(4., 9., 1., 7.);
let r = _mm256_setr_pd(9., 15., 22., 15.);
assert_eq_m256d(_mm256_fmadd_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmadd_ps() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let b = _mm_setr_ps(5., 3., 7., 2.);
let c = _mm_setr_ps(4., 9., 1., 7.);
let r = _mm_setr_ps(9., 15., 22., 15.);
assert_eq_m128(_mm_fmadd_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fmadd_ps() {
let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
let r = _mm256_setr_ps(9., 15., 22., 15., -5., -49., -2., -31.);
assert_eq_m256(_mm256_fmadd_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmadd_sd() {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(5., 3.);
let c = _mm_setr_pd(4., 9.);
let r = _mm_setr_pd(9., 2.);
assert_eq_m128d(_mm_fmadd_sd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmadd_ss() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let b = _mm_setr_ps(5., 3., 7., 2.);
let c = _mm_setr_ps(4., 9., 1., 7.);
let r = _mm_setr_ps(9., 2., 3., 4.);
assert_eq_m128(_mm_fmadd_ss(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmaddsub_pd() {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(5., 3.);
let c = _mm_setr_pd(4., 9.);
let r = _mm_setr_pd(1., 15.);
assert_eq_m128d(_mm_fmaddsub_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fmaddsub_pd() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let b = _mm256_setr_pd(5., 3., 7., 2.);
let c = _mm256_setr_pd(4., 9., 1., 7.);
let r = _mm256_setr_pd(1., 15., 20., 15.);
assert_eq_m256d(_mm256_fmaddsub_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmaddsub_ps() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let b = _mm_setr_ps(5., 3., 7., 2.);
let c = _mm_setr_ps(4., 9., 1., 7.);
let r = _mm_setr_ps(1., 15., 20., 15.);
assert_eq_m128(_mm_fmaddsub_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fmaddsub_ps() {
let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
let r = _mm256_setr_ps(1., 15., 20., 15., 5., -49., 2., -31.);
assert_eq_m256(_mm256_fmaddsub_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmsub_pd() {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(5., 3.);
let c = _mm_setr_pd(4., 9.);
let r = _mm_setr_pd(1., -3.);
assert_eq_m128d(_mm_fmsub_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fmsub_pd() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let b = _mm256_setr_pd(5., 3., 7., 2.);
let c = _mm256_setr_pd(4., 9., 1., 7.);
let r = _mm256_setr_pd(1., -3., 20., 1.);
assert_eq_m256d(_mm256_fmsub_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmsub_ps() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let b = _mm_setr_ps(5., 3., 7., 2.);
let c = _mm_setr_ps(4., 9., 1., 7.);
let r = _mm_setr_ps(1., -3., 20., 1.);
assert_eq_m128(_mm_fmsub_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fmsub_ps() {
let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
let r = _mm256_setr_ps(1., -3., 20., 1., 5., -71., 2., -25.);
assert_eq_m256(_mm256_fmsub_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmsub_sd() {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(5., 3.);
let c = _mm_setr_pd(4., 9.);
let r = _mm_setr_pd(1., 2.);
assert_eq_m128d(_mm_fmsub_sd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmsub_ss() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let b = _mm_setr_ps(5., 3., 7., 2.);
let c = _mm_setr_ps(4., 9., 1., 7.);
let r = _mm_setr_ps(1., 2., 3., 4.);
assert_eq_m128(_mm_fmsub_ss(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmsubadd_pd() {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(5., 3.);
let c = _mm_setr_pd(4., 9.);
let r = _mm_setr_pd(9., -3.);
assert_eq_m128d(_mm_fmsubadd_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fmsubadd_pd() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let b = _mm256_setr_pd(5., 3., 7., 2.);
let c = _mm256_setr_pd(4., 9., 1., 7.);
let r = _mm256_setr_pd(9., -3., 22., 1.);
assert_eq_m256d(_mm256_fmsubadd_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fmsubadd_ps() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let b = _mm_setr_ps(5., 3., 7., 2.);
let c = _mm_setr_ps(4., 9., 1., 7.);
let r = _mm_setr_ps(9., -3., 22., 1.);
assert_eq_m128(_mm_fmsubadd_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fmsubadd_ps() {
let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
let r = _mm256_setr_ps(9., -3., 22., 1., -5., -71., -2., -25.);
assert_eq_m256(_mm256_fmsubadd_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fnmadd_pd() {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(5., 3.);
let c = _mm_setr_pd(4., 9.);
let r = _mm_setr_pd(-1., 3.);
assert_eq_m128d(_mm_fnmadd_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fnmadd_pd() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let b = _mm256_setr_pd(5., 3., 7., 2.);
let c = _mm256_setr_pd(4., 9., 1., 7.);
let r = _mm256_setr_pd(-1., 3., -20., -1.);
assert_eq_m256d(_mm256_fnmadd_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fnmadd_ps() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let b = _mm_setr_ps(5., 3., 7., 2.);
let c = _mm_setr_ps(4., 9., 1., 7.);
let r = _mm_setr_ps(-1., 3., -20., -1.);
assert_eq_m128(_mm_fnmadd_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fnmadd_ps() {
let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
let r = _mm256_setr_ps(-1., 3., -20., -1., -5., 71., -2., 25.);
assert_eq_m256(_mm256_fnmadd_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fnmadd_sd() {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(5., 3.);
let c = _mm_setr_pd(4., 9.);
let r = _mm_setr_pd(-1., 2.);
assert_eq_m128d(_mm_fnmadd_sd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fnmadd_ss() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let b = _mm_setr_ps(5., 3., 7., 2.);
let c = _mm_setr_ps(4., 9., 1., 7.);
let r = _mm_setr_ps(-1., 2., 3., 4.);
assert_eq_m128(_mm_fnmadd_ss(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fnmsub_pd() {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(5., 3.);
let c = _mm_setr_pd(4., 9.);
let r = _mm_setr_pd(-9., -15.);
assert_eq_m128d(_mm_fnmsub_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fnmsub_pd() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let b = _mm256_setr_pd(5., 3., 7., 2.);
let c = _mm256_setr_pd(4., 9., 1., 7.);
let r = _mm256_setr_pd(-9., -15., -22., -15.);
assert_eq_m256d(_mm256_fnmsub_pd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fnmsub_ps() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let b = _mm_setr_ps(5., 3., 7., 2.);
let c = _mm_setr_ps(4., 9., 1., 7.);
let r = _mm_setr_ps(-9., -15., -22., -15.);
assert_eq_m128(_mm_fnmsub_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm256_fnmsub_ps() {
let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
let r = _mm256_setr_ps(-9., -15., -22., -15., 5., 49., 2., 31.);
assert_eq_m256(_mm256_fnmsub_ps(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fnmsub_sd() {
let a = _mm_setr_pd(1., 2.);
let b = _mm_setr_pd(5., 3.);
let c = _mm_setr_pd(4., 9.);
let r = _mm_setr_pd(-9., 2.);
assert_eq_m128d(_mm_fnmsub_sd(a, b, c), r);
}
#[simd_test(enable = "fma")]
unsafe fn test_mm_fnmsub_ss() {
let a = _mm_setr_ps(1., 2., 3., 4.);
let b = _mm_setr_ps(5., 3., 7., 2.);
let c = _mm_setr_ps(4., 9., 1., 7.);
let r = _mm_setr_ps(-9., 2., 3., 4.);
assert_eq_m128(_mm_fnmsub_ss(a, b, c), r);
}
}