#![feature(cfg_target_feature, stdsimd, target_feature)]

#[macro_use]
extern crate stdsimd;

use stdsimd::simd::*;

#[cfg(target_arch = "powerpc")]
macro_rules! is_powerpc_feature_detected {
    ($t:tt) => {
        false
    };
}

macro_rules! invoke_arch {
    ($macro:ident, $feature_macro:ident, $id:ident, $elem_ty:ident,
     [$($feature:tt),*]) => {
        $($macro!($feature, $feature_macro, $id, $elem_ty);)*
    }
}

macro_rules! invoke_vectors {
    ($macro:ident, [$(($id:ident, $elem_ty:ident)),*]) => {
        $(
            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
            invoke_arch!($macro, is_x86_feature_detected, $id, $elem_ty,
                        ["sse", "sse2", "sse3", "ssse3", "sse4.1",
                         "sse4.2", "sse4a", "avx2", "avx2", "avx512f"]);
            #[cfg(target_arch = "aarch64")]
            invoke_arch!($macro, is_aarch64_feature_detected, $id, $elem_ty,
                        ["neon"]);
            #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
            invoke_arch!($macro, is_arm_feature_detected, $id, $elem_ty,
                         ["neon"]);
            #[cfg(target_arch = "powerpc")]
            invoke_arch!($macro, is_powerpc_feature_detected, $id, $elem_ty, ["altivec"]);
            #[cfg(target_arch = "powerpc64")]
            invoke_arch!($macro, is_powerpc64_feature_detected, $id, $elem_ty, ["altivec"]);
        )*
    }
}

macro_rules! finvoke {
    ($macro:ident) => {
        invoke_vectors!(
            $macro,
            [
                (f32x2, f32),
                (f32x4, f32),
                (f32x8, f32),
                (f32x16, f32),
                (f64x2, f64),
                (f64x4, f64),
                (f64x8, f64)
            ]
        );
    };
}

macro_rules! iinvoke {
    ($macro:ident) => {
        invoke_vectors!(
            $macro,
            [
                (i8x2, i8),
                (i8x4, i8),
                (i8x8, i8),
                (i8x16, i8),
                (i8x32, i8),
                (i8x64, i8),
                (i16x2, i16),
                (i16x4, i16),
                (i16x8, i16),
                (i16x16, i16),
                (i16x32, i16),
                (i32x2, i32),
                (i32x4, i32),
                (i32x8, i32),
                (i32x16, i32),
                (i64x2, i64),
                (i64x4, i64),
                (i64x8, i64),
                (u8x2, u8),
                (u8x4, u8),
                (u8x8, u8),
                (u8x16, u8),
                (u8x32, u8),
                (u8x64, u8),
                (u16x2, u16),
                (u16x4, u16),
                (u16x8, u16),
                (u16x16, u16),
                (u16x32, u16),
                (u32x2, u32),
                (u32x4, u32),
                (u32x8, u32),
                (u32x16, u32),
                (u64x2, u64),
                (u64x4, u64),
                (u64x8, u64)
            ]
        );
    };
}

macro_rules! min_nan_test {
    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
        if $feature_macro!($feature) {
            #[target_feature(enable = $feature)]
            unsafe fn test_fn() {
                let n0 = ::std::$elem_ty::NAN;

                assert_eq!(n0.min(-3.0), -3.0);
                assert_eq!((-3.0 as $elem_ty).min(n0), -3.0);

                let v0 = $id::splat(-3.0);

                // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408):
                // When the last element is NaN the current implementation produces incorrect results.
                let bugbug = 1;
                for i in 0..$id::lanes() - bugbug {
                    let mut v = v0.replace(i, n0);
                    // If there is a NaN, the result is always the smallest element:
                    assert_eq!(v.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.min_element(), v, v.as_int());
                    for j in 0..i {
                        v = v.replace(j, n0);
                        assert_eq!(v.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.min_element(), v, v.as_int());
                    }
                }
                // If the vector contains all NaNs the result is NaN:
                let vn = $id::splat(n0);
                assert!(vn.min_element().is_nan(), "all nans | v={:?} | min={} | is_nan: {}",
                        vn, vn.min_element(), vn.min_element().is_nan());
            }
            unsafe { test_fn() };
        }
    }
}

#[test]
fn min_nan() {
    finvoke!(min_nan_test);
}

macro_rules! max_nan_test {
    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
        if $feature_macro!($feature) {
            #[target_feature(enable = $feature)]
            unsafe fn test_fn() {
                let n0 = ::std::$elem_ty::NAN;

                assert_eq!(n0.max(-3.0), -3.0);
                assert_eq!((-3.0 as $elem_ty).max(n0), -3.0);

                let v0 = $id::splat(-3.0);

                // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408):
                // When the last element is NaN the current implementation produces incorrect results.
                let bugbug = 1;
                for i in 0..$id::lanes() - bugbug {
                    let mut v = v0.replace(i, n0);
                    // If there is a NaN the result is always the largest element:
                    assert_eq!(v.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.max_element(), v, v.as_int());
                    for j in 0..i {
                        v = v.replace(j, n0);
                        assert_eq!(v.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.max_element(), v, v.as_int());
                    }
                }

                // If the vector contains all NaNs the result is NaN:
                let vn = $id::splat(n0);
                assert!(vn.max_element().is_nan(), "all nans | v={:?} | max={} | is_nan: {}",
                        vn, vn.max_element(), vn.max_element().is_nan());
            }
            unsafe { test_fn() };
        }
    }
}

#[test]
fn max_nan() {
    finvoke!(max_nan_test);
}

macro_rules! wrapping_sum_nan_test {
    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
        if $feature_macro!($feature) {
            #[target_feature(enable = $feature)]
            #[allow(unreachable_code)]
            unsafe fn test_fn() {
                // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
                // https://github.com/rust-lang-nursery/stdsimd/issues/409
                return;

                let n0 = ::std::$elem_ty::NAN;
                let v0 = $id::splat(-3.0);
                for i in 0..$id::lanes() {
                    let mut v = v0.replace(i, n0);
                    // If the vector contains a NaN the result is NaN:
                    assert!(
                        v.wrapping_sum().is_nan(),
                        "nan at {} => {} | {:?}",
                        i,
                        v.wrapping_sum(),
                        v
                    );
                    for j in 0..i {
                        v = v.replace(j, n0);
                        assert!(v.wrapping_sum().is_nan());
                    }
                }
                let v = $id::splat(n0);
                assert!(v.wrapping_sum().is_nan(), "all nans | {:?}", v);
            }
            unsafe { test_fn() };
        }
    };
}

#[test]
fn wrapping_sum_nan() {
    finvoke!(wrapping_sum_nan_test);
}

macro_rules! wrapping_product_nan_test {
    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
        if $feature_macro!($feature) {
            #[target_feature(enable = $feature)]
            #[allow(unreachable_code)]
            unsafe fn test_fn() {
                // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
                // https://github.com/rust-lang-nursery/stdsimd/issues/409
                return;

                let n0 = ::std::$elem_ty::NAN;
                let v0 = $id::splat(-3.0);
                for i in 0..$id::lanes() {
                    let mut v = v0.replace(i, n0);
                    // If the vector contains a NaN the result is NaN:
                    assert!(
                        v.wrapping_product().is_nan(),
                        "nan at {} | {:?}",
                        i,
                        v
                    );
                    for j in 0..i {
                        v = v.replace(j, n0);
                        assert!(v.wrapping_sum().is_nan());
                    }
                }
                let v = $id::splat(n0);
                assert!(
                    v.wrapping_product().is_nan(),
                    "all nans | {:?}",
                    v
                );
            }
            unsafe { test_fn() };
        }
    };
}

#[test]
fn wrapping_product_nan() {
    finvoke!(wrapping_product_nan_test);
}

trait AsInt {
    type Int;
    fn as_int(self) -> Self::Int;
    fn from_int(Self::Int) -> Self;
}

macro_rules! as_int {
    ($float:ident, $int:ident) => {
        impl AsInt for $float {
            type Int = $int;
            fn as_int(self) -> $int {
                unsafe { ::std::mem::transmute(self) }
            }
            fn from_int(x: $int) -> $float {
                unsafe { ::std::mem::transmute(x) }
            }
        }
    };
}

as_int!(f32, u32);
as_int!(f64, u64);
as_int!(f32x2, i32x2);
as_int!(f32x4, i32x4);
as_int!(f32x8, i32x8);
as_int!(f32x16, i32x16);
as_int!(f64x2, i64x2);
as_int!(f64x4, i64x4);
as_int!(f64x8, i64x8);

// FIXME: these fail on i586 for some reason
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))]
mod offset {
    use super::*;

    trait TreeReduceAdd {
        type R;
        fn tree_reduce_add(self) -> Self::R;
    }

    macro_rules! tree_reduce_add_f {
    ($elem_ty:ident) => {
        impl<'a> TreeReduceAdd for &'a [$elem_ty] {
            type R = $elem_ty;
            fn tree_reduce_add(self) -> $elem_ty {
                if self.len() == 2 {
                    println!("  lv: {}, rv: {} => {}", self[0], self[1], self[0] + self[1]);
                    self[0] + self[1]
                } else {
                    let mid = self.len() / 2;
                    let (left, right) = self.split_at(mid);
                    println!("  splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid, self[0], self[1]);
                    Self::tree_reduce_add(left) + Self::tree_reduce_add(right)
                }
            }
        }
    };
}
    tree_reduce_add_f!(f32);
    tree_reduce_add_f!(f64);

    macro_rules! wrapping_sum_roundoff_test {
    ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
        if $feature_macro!($feature) {
            #[target_feature(enable = $feature)]
            unsafe fn test_fn() {
                let mut start = std::$elem_ty::EPSILON;
                let mut wrapping_sum = 0. as $elem_ty;

                let mut v = $id::splat(0. as $elem_ty);
                for i in 0..$id::lanes() {
                    let c = if i % 2 == 0 { 1e3 } else { -1. };
                    start *= 3.14 * c;
                    wrapping_sum += start;
                    // println!("{} | start: {}", stringify!($id), start);
                    v = v.replace(i, start);
                }
                let vwrapping_sum = v.wrapping_sum();
                println!(
                    "{} | lwrapping_sum: {}",
                    stringify!($id),
                    wrapping_sum
                );
                println!(
                    "{} | vwrapping_sum: {}",
                    stringify!($id),
                    vwrapping_sum
                );
                let r = vwrapping_sum.as_int() == wrapping_sum.as_int();
                // This is false in general; the intrinsic performs a
                // tree-reduce:
                println!("{} | equal: {}", stringify!($id), r);

                let mut a = [0. as $elem_ty; $id::lanes()];
                v.store_unaligned(&mut a);

                let twrapping_sum = a.tree_reduce_add();
                println!(
                    "{} | twrapping_sum: {}",
                    stringify!($id),
                    twrapping_sum
                );

                // tolerate 1 ULP difference:
                if vwrapping_sum.as_int() > twrapping_sum.as_int() {
                    assert!(
                        vwrapping_sum.as_int() - twrapping_sum.as_int()
                            < 2,
                        "v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
                        v,
                        vwrapping_sum,
                        twrapping_sum
                    );
                } else {
                    assert!(
                        twrapping_sum.as_int() - vwrapping_sum.as_int()
                            < 2,
                        "v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
                        v,
                        vwrapping_sum,
                        twrapping_sum
                    );
                }
            }
            unsafe { test_fn() };
        }
    };
}

    #[test]
    fn wrapping_sum_roundoff_test() {
        finvoke!(wrapping_sum_roundoff_test);
    }

    trait TreeReduceMul {
        type R;
        fn tree_reduce_mul(self) -> Self::R;
    }

    macro_rules! tree_reduce_mul_f {
    ($elem_ty:ident) => {
        impl<'a> TreeReduceMul for &'a [$elem_ty] {
            type R = $elem_ty;
            fn tree_reduce_mul(self) -> $elem_ty {
                if self.len() == 2 {
                    println!("  lv: {}, rv: {} => {}", self[0], self[1], self[0] * self[1]);
                    self[0] * self[1]
                } else {
                    let mid = self.len() / 2;
                    let (left, right) = self.split_at(mid);
                    println!("  splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid, self[0], self[1]);
                    Self::tree_reduce_mul(left) * Self::tree_reduce_mul(right)
                }
            }
        }
    };
}

    tree_reduce_mul_f!(f32);
    tree_reduce_mul_f!(f64);

    macro_rules! wrapping_product_roundoff_test {
        ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
            if $feature_macro!($feature) {
                #[target_feature(enable = $feature)]
                unsafe fn test_fn() {
                    let mut start = std::$elem_ty::EPSILON;
                    let mut mul = 1. as $elem_ty;

                    let mut v = $id::splat(1. as $elem_ty);
                    for i in 0..$id::lanes() {
                        let c = if i % 2 == 0 { 1e3 } else { -1. };
                        start *= 3.14 * c;
                        mul *= start;
                        println!("{} | start: {}", stringify!($id), start);
                        v = v.replace(i, start);
                    }
                    let vmul = v.wrapping_product();
                    println!("{} | lmul: {}", stringify!($id), mul);
                    println!("{} | vmul: {}", stringify!($id), vmul);
                    let r = vmul.as_int() == mul.as_int();
                    // This is false in general; the intrinsic performs a
                    // tree-reduce:
                    println!("{} | equal: {}", stringify!($id), r);

                    let mut a = [0. as $elem_ty; $id::lanes()];
                    v.store_unaligned(&mut a);

                    let tmul = a.tree_reduce_mul();
                    println!("{} | tmul: {}", stringify!($id), tmul);

                    // tolerate 1 ULP difference:
                    if vmul.as_int() > tmul.as_int() {
                        assert!(
                            vmul.as_int() - tmul.as_int() < 2,
                            "v: {:?} | vmul: {} | tmul: {}",
                            v,
                            vmul,
                            tmul
                        );
                    } else {
                        assert!(
                            tmul.as_int() - vmul.as_int() < 2,
                            "v: {:?} | vmul: {} | tmul: {}",
                            v,
                            vmul,
                            tmul
                        );
                    }
                }
                unsafe { test_fn() };
            }
        };
    }

    #[test]
    fn wrapping_product_roundoff_test() {
        finvoke!(wrapping_product_roundoff_test);
    }

    macro_rules! wrapping_sum_overflow_test {
        ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
            if $feature_macro!($feature) {
                #[target_feature(enable = $feature)]
                unsafe fn test_fn() {
                    let start = $elem_ty::max_value()
                        - ($id::lanes() as $elem_ty / 2);

                    let v = $id::splat(start as $elem_ty);
                    let vwrapping_sum = v.wrapping_sum();

                    let mut wrapping_sum = start;
                    for _ in 1..$id::lanes() {
                        wrapping_sum = wrapping_sum.wrapping_add(start);
                    }
                    assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
                }
                unsafe { test_fn() };
            }
        };
    }

    #[test]
    fn wrapping_sum_overflow_test() {
        iinvoke!(wrapping_sum_overflow_test);
    }

    macro_rules! mul_overflow_test {
        ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
            if $feature_macro!($feature) {
                #[target_feature(enable = $feature)]
                unsafe fn test_fn() {
                    let start = $elem_ty::max_value()
                        - ($id::lanes() as $elem_ty / 2);

                    let v = $id::splat(start as $elem_ty);
                    let vmul = v.wrapping_product();

                    let mut mul = start;
                    for _ in 1..$id::lanes() {
                        mul = mul.wrapping_mul(start);
                    }
                    assert_eq!(mul, vmul, "v = {:?}", v);
                }
                unsafe { test_fn() };
            }
        };
    }

    #[test]
    fn mul_overflow_test() {
        iinvoke!(mul_overflow_test);
    }

}