mirror of
https://github.com/rust-lang/rust.git
synced 2025-10-27 19:16:36 +00:00
The PR had some unforseen perf regressions that are not as easy to find. Revert the PR for now. This reverts commit 6ae8912a3e7d2c4c775024f58a7ba4b1aedc4073, reversing changes made to 86d6d2b7389fe1b339402c1798edae8b695fc9ef.
2759 lines
98 KiB
Rust
2759 lines
98 KiB
Rust
use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO};
|
|
use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd};
|
|
|
|
use core::cmp::{self, Ordering};
|
|
use core::convert::TryFrom;
|
|
use core::fmt::{self, Write};
|
|
use core::marker::PhantomData;
|
|
use core::mem;
|
|
use core::ops::Neg;
|
|
use smallvec::{smallvec, SmallVec};
|
|
|
|
#[must_use]
|
|
pub struct IeeeFloat<S> {
|
|
/// Absolute significand value (including the integer bit).
|
|
sig: [Limb; 1],
|
|
|
|
/// The signed unbiased exponent of the value.
|
|
exp: ExpInt,
|
|
|
|
/// What kind of floating point number this is.
|
|
category: Category,
|
|
|
|
/// Sign bit of the number.
|
|
sign: bool,
|
|
|
|
marker: PhantomData<S>,
|
|
}
|
|
|
|
/// Fundamental unit of big integer arithmetic, but also
|
|
/// large to store the largest significands by itself.
|
|
type Limb = u128;
|
|
const LIMB_BITS: usize = 128;
|
|
fn limbs_for_bits(bits: usize) -> usize {
|
|
(bits + LIMB_BITS - 1) / LIMB_BITS
|
|
}
|
|
|
|
/// Enum that represents what fraction of the LSB truncated bits of an fp number
|
|
/// represent.
|
|
///
|
|
/// This essentially combines the roles of guard and sticky bits.
|
|
#[must_use]
|
|
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
|
enum Loss {
|
|
// Example of truncated bits:
|
|
ExactlyZero, // 000000
|
|
LessThanHalf, // 0xxxxx x's not all zero
|
|
ExactlyHalf, // 100000
|
|
MoreThanHalf, // 1xxxxx x's not all zero
|
|
}
|
|
|
|
/// Represents floating point arithmetic semantics.
|
|
pub trait Semantics: Sized {
|
|
/// Total number of bits in the in-memory format.
|
|
const BITS: usize;
|
|
|
|
/// Number of bits in the significand. This includes the integer bit.
|
|
const PRECISION: usize;
|
|
|
|
/// The largest E such that 2<sup>E</sup> is representable; this matches the
|
|
/// definition of IEEE 754.
|
|
const MAX_EXP: ExpInt;
|
|
|
|
/// The smallest E such that 2<sup>E</sup> is a normalized number; this
|
|
/// matches the definition of IEEE 754.
|
|
const MIN_EXP: ExpInt = -Self::MAX_EXP + 1;
|
|
|
|
/// The significand bit that marks NaN as quiet.
|
|
const QNAN_BIT: usize = Self::PRECISION - 2;
|
|
|
|
/// The significand bitpattern to mark a NaN as quiet.
|
|
/// NOTE: for X87DoubleExtended we need to set two bits instead of 2.
|
|
const QNAN_SIGNIFICAND: Limb = 1 << Self::QNAN_BIT;
|
|
|
|
fn from_bits(bits: u128) -> IeeeFloat<Self> {
|
|
assert!(Self::BITS > Self::PRECISION);
|
|
|
|
let sign = bits & (1 << (Self::BITS - 1));
|
|
let exponent = (bits & !sign) >> (Self::PRECISION - 1);
|
|
let mut r = IeeeFloat {
|
|
sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
|
|
// Convert the exponent from its bias representation to a signed integer.
|
|
exp: (exponent as ExpInt) - Self::MAX_EXP,
|
|
category: Category::Zero,
|
|
sign: sign != 0,
|
|
marker: PhantomData,
|
|
};
|
|
|
|
if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
|
|
// Exponent, significand meaningless.
|
|
r.category = Category::Zero;
|
|
} else if r.exp == Self::MAX_EXP + 1 && r.sig == [0] {
|
|
// Exponent, significand meaningless.
|
|
r.category = Category::Infinity;
|
|
} else if r.exp == Self::MAX_EXP + 1 && r.sig != [0] {
|
|
// Sign, exponent, significand meaningless.
|
|
r.category = Category::NaN;
|
|
} else {
|
|
r.category = Category::Normal;
|
|
if r.exp == Self::MIN_EXP - 1 {
|
|
// Denormal.
|
|
r.exp = Self::MIN_EXP;
|
|
} else {
|
|
// Set integer bit.
|
|
sig::set_bit(&mut r.sig, Self::PRECISION - 1);
|
|
}
|
|
}
|
|
|
|
r
|
|
}
|
|
|
|
fn to_bits(x: IeeeFloat<Self>) -> u128 {
|
|
assert!(Self::BITS > Self::PRECISION);
|
|
|
|
// Split integer bit from significand.
|
|
let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
|
|
let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1);
|
|
let exponent = match x.category {
|
|
Category::Normal => {
|
|
if x.exp == Self::MIN_EXP && !integer_bit {
|
|
// Denormal.
|
|
Self::MIN_EXP - 1
|
|
} else {
|
|
x.exp
|
|
}
|
|
}
|
|
Category::Zero => {
|
|
// FIXME(eddyb) Maybe we should guarantee an invariant instead?
|
|
significand = 0;
|
|
Self::MIN_EXP - 1
|
|
}
|
|
Category::Infinity => {
|
|
// FIXME(eddyb) Maybe we should guarantee an invariant instead?
|
|
significand = 0;
|
|
Self::MAX_EXP + 1
|
|
}
|
|
Category::NaN => Self::MAX_EXP + 1,
|
|
};
|
|
|
|
// Convert the exponent from a signed integer to its bias representation.
|
|
let exponent = (exponent + Self::MAX_EXP) as u128;
|
|
|
|
((x.sign as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand
|
|
}
|
|
}
|
|
|
|
impl<S> Copy for IeeeFloat<S> {}
|
|
impl<S> Clone for IeeeFloat<S> {
|
|
fn clone(&self) -> Self {
|
|
*self
|
|
}
|
|
}
|
|
|
|
macro_rules! ieee_semantics {
|
|
($($name:ident = $sem:ident($bits:tt : $exp_bits:tt)),*) => {
|
|
$(pub struct $sem;)*
|
|
$(pub type $name = IeeeFloat<$sem>;)*
|
|
$(impl Semantics for $sem {
|
|
const BITS: usize = $bits;
|
|
const PRECISION: usize = ($bits - 1 - $exp_bits) + 1;
|
|
const MAX_EXP: ExpInt = (1 << ($exp_bits - 1)) - 1;
|
|
})*
|
|
}
|
|
}
|
|
|
|
ieee_semantics! {
|
|
Half = HalfS(16:5),
|
|
Single = SingleS(32:8),
|
|
Double = DoubleS(64:11),
|
|
Quad = QuadS(128:15)
|
|
}
|
|
|
|
pub struct X87DoubleExtendedS;
|
|
pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>;
|
|
impl Semantics for X87DoubleExtendedS {
|
|
const BITS: usize = 80;
|
|
const PRECISION: usize = 64;
|
|
const MAX_EXP: ExpInt = (1 << (15 - 1)) - 1;
|
|
|
|
/// For x87 extended precision, we want to make a NaN, not a
|
|
/// pseudo-NaN. Maybe we should expose the ability to make
|
|
/// pseudo-NaNs?
|
|
const QNAN_SIGNIFICAND: Limb = 0b11 << Self::QNAN_BIT;
|
|
|
|
/// Integer bit is explicit in this format. Intel hardware (387 and later)
|
|
/// does not support these bit patterns:
|
|
/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
|
|
/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
|
|
/// exponent = 0, integer bit 1 ("pseudodenormal")
|
|
/// exponent != 0 nor all 1's, integer bit 0 ("unnormal")
|
|
/// At the moment, the first two are treated as NaNs, the second two as Normal.
|
|
fn from_bits(bits: u128) -> IeeeFloat<Self> {
|
|
let sign = bits & (1 << (Self::BITS - 1));
|
|
let exponent = (bits & !sign) >> Self::PRECISION;
|
|
let mut r = IeeeFloat {
|
|
sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
|
|
// Convert the exponent from its bias representation to a signed integer.
|
|
exp: (exponent as ExpInt) - Self::MAX_EXP,
|
|
category: Category::Zero,
|
|
sign: sign != 0,
|
|
marker: PhantomData,
|
|
};
|
|
|
|
if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
|
|
// Exponent, significand meaningless.
|
|
r.category = Category::Zero;
|
|
} else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] {
|
|
// Exponent, significand meaningless.
|
|
r.category = Category::Infinity;
|
|
} else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] {
|
|
// Sign, exponent, significand meaningless.
|
|
r.category = Category::NaN;
|
|
} else {
|
|
r.category = Category::Normal;
|
|
if r.exp == Self::MIN_EXP - 1 {
|
|
// Denormal.
|
|
r.exp = Self::MIN_EXP;
|
|
}
|
|
}
|
|
|
|
r
|
|
}
|
|
|
|
fn to_bits(x: IeeeFloat<Self>) -> u128 {
|
|
// Get integer bit from significand.
|
|
let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
|
|
let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1);
|
|
let exponent = match x.category {
|
|
Category::Normal => {
|
|
if x.exp == Self::MIN_EXP && !integer_bit {
|
|
// Denormal.
|
|
Self::MIN_EXP - 1
|
|
} else {
|
|
x.exp
|
|
}
|
|
}
|
|
Category::Zero => {
|
|
// FIXME(eddyb) Maybe we should guarantee an invariant instead?
|
|
significand = 0;
|
|
Self::MIN_EXP - 1
|
|
}
|
|
Category::Infinity => {
|
|
// FIXME(eddyb) Maybe we should guarantee an invariant instead?
|
|
significand = 1 << (Self::PRECISION - 1);
|
|
Self::MAX_EXP + 1
|
|
}
|
|
Category::NaN => Self::MAX_EXP + 1,
|
|
};
|
|
|
|
// Convert the exponent from a signed integer to its bias representation.
|
|
let exponent = (exponent + Self::MAX_EXP) as u128;
|
|
|
|
((x.sign as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand
|
|
}
|
|
}
|
|
|
|
float_common_impls!(IeeeFloat<S>);
|
|
|
|
impl<S: Semantics> PartialEq for IeeeFloat<S> {
|
|
fn eq(&self, rhs: &Self) -> bool {
|
|
self.partial_cmp(rhs) == Some(Ordering::Equal)
|
|
}
|
|
}
|
|
|
|
impl<S: Semantics> PartialOrd for IeeeFloat<S> {
|
|
fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
|
|
match (self.category, rhs.category) {
|
|
(Category::NaN, _) | (_, Category::NaN) => None,
|
|
|
|
(Category::Infinity, Category::Infinity) => Some((!self.sign).cmp(&(!rhs.sign))),
|
|
|
|
(Category::Zero, Category::Zero) => Some(Ordering::Equal),
|
|
|
|
(Category::Infinity, _) | (Category::Normal, Category::Zero) => {
|
|
Some((!self.sign).cmp(&self.sign))
|
|
}
|
|
|
|
(_, Category::Infinity) | (Category::Zero, Category::Normal) => {
|
|
Some(rhs.sign.cmp(&(!rhs.sign)))
|
|
}
|
|
|
|
(Category::Normal, Category::Normal) => {
|
|
// Two normal numbers. Do they have the same sign?
|
|
Some((!self.sign).cmp(&(!rhs.sign)).then_with(|| {
|
|
// Compare absolute values; invert result if negative.
|
|
let result = self.cmp_abs_normal(*rhs);
|
|
|
|
if self.sign { result.reverse() } else { result }
|
|
}))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<S> Neg for IeeeFloat<S> {
|
|
type Output = Self;
|
|
fn neg(mut self) -> Self {
|
|
self.sign = !self.sign;
|
|
self
|
|
}
|
|
}
|
|
|
|
/// Prints this value as a decimal string.
|
|
///
|
|
/// \param precision The maximum number of digits of
|
|
/// precision to output. If there are fewer digits available,
|
|
/// zero padding will not be used unless the value is
|
|
/// integral and small enough to be expressed in
|
|
/// precision digits. 0 means to use the natural
|
|
/// precision of the number.
|
|
/// \param width The maximum number of zeros to
|
|
/// consider inserting before falling back to scientific
|
|
/// notation. 0 means to always use scientific notation.
|
|
///
|
|
/// \param alternate Indicate whether to remove the trailing zero in
|
|
/// fraction part or not. Also setting this parameter to true forces
|
|
/// producing of output more similar to default printf behavior.
|
|
/// Specifically the lower e is used as exponent delimiter and exponent
|
|
/// always contains no less than two digits.
|
|
///
|
|
/// Number precision width Result
|
|
/// ------ --------- ----- ------
|
|
/// 1.01E+4 5 2 10100
|
|
/// 1.01E+4 4 2 1.01E+4
|
|
/// 1.01E+4 5 1 1.01E+4
|
|
/// 1.01E-2 5 2 0.0101
|
|
/// 1.01E-2 4 2 0.0101
|
|
/// 1.01E-2 4 1 1.01E-2
|
|
impl<S: Semantics> fmt::Display for IeeeFloat<S> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
let width = f.width().unwrap_or(3);
|
|
let alternate = f.alternate();
|
|
|
|
match self.category {
|
|
Category::Infinity => {
|
|
if self.sign {
|
|
return f.write_str("-Inf");
|
|
} else {
|
|
return f.write_str("+Inf");
|
|
}
|
|
}
|
|
|
|
Category::NaN => return f.write_str("NaN"),
|
|
|
|
Category::Zero => {
|
|
if self.sign {
|
|
f.write_char('-')?;
|
|
}
|
|
|
|
if width == 0 {
|
|
if alternate {
|
|
f.write_str("0.0")?;
|
|
if let Some(n) = f.precision() {
|
|
for _ in 1..n {
|
|
f.write_char('0')?;
|
|
}
|
|
}
|
|
f.write_str("e+00")?;
|
|
} else {
|
|
f.write_str("0.0E+0")?;
|
|
}
|
|
} else {
|
|
f.write_char('0')?;
|
|
}
|
|
return Ok(());
|
|
}
|
|
|
|
Category::Normal => {}
|
|
}
|
|
|
|
if self.sign {
|
|
f.write_char('-')?;
|
|
}
|
|
|
|
// We use enough digits so the number can be round-tripped back to an
|
|
// APFloat. The formula comes from "How to Print Floating-Point Numbers
|
|
// Accurately" by Steele and White.
|
|
// FIXME: Using a formula based purely on the precision is conservative;
|
|
// we can print fewer digits depending on the actual value being printed.
|
|
|
|
// precision = 2 + floor(S::PRECISION / lg_2(10))
|
|
let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196);
|
|
|
|
// Decompose the number into an APInt and an exponent.
|
|
let mut exp = self.exp - (S::PRECISION as ExpInt - 1);
|
|
let mut sig = vec![self.sig[0]];
|
|
|
|
// Ignore trailing binary zeros.
|
|
let trailing_zeros = sig[0].trailing_zeros();
|
|
let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize);
|
|
|
|
// Change the exponent from 2^e to 10^e.
|
|
if exp == 0 {
|
|
// Nothing to do.
|
|
} else if exp > 0 {
|
|
// Just shift left.
|
|
let shift = exp as usize;
|
|
sig.resize(limbs_for_bits(S::PRECISION + shift), 0);
|
|
sig::shift_left(&mut sig, &mut exp, shift);
|
|
} else {
|
|
// exp < 0
|
|
let mut texp = -exp as usize;
|
|
|
|
// We transform this using the identity:
|
|
// (N)(2^-e) == (N)(5^e)(10^-e)
|
|
|
|
// Multiply significand by 5^e.
|
|
// N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
|
|
let mut sig_scratch = vec![];
|
|
let mut p5 = vec![];
|
|
let mut p5_scratch = vec![];
|
|
while texp != 0 {
|
|
if p5.is_empty() {
|
|
p5.push(5);
|
|
} else {
|
|
p5_scratch.resize(p5.len() * 2, 0);
|
|
let _: Loss =
|
|
sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
|
|
while p5_scratch.last() == Some(&0) {
|
|
p5_scratch.pop();
|
|
}
|
|
mem::swap(&mut p5, &mut p5_scratch);
|
|
}
|
|
if texp & 1 != 0 {
|
|
sig_scratch.resize(sig.len() + p5.len(), 0);
|
|
let _: Loss = sig::mul(
|
|
&mut sig_scratch,
|
|
&mut 0,
|
|
&sig,
|
|
&p5,
|
|
(sig.len() + p5.len()) * LIMB_BITS,
|
|
);
|
|
while sig_scratch.last() == Some(&0) {
|
|
sig_scratch.pop();
|
|
}
|
|
mem::swap(&mut sig, &mut sig_scratch);
|
|
}
|
|
texp >>= 1;
|
|
}
|
|
}
|
|
|
|
// Fill the buffer.
|
|
let mut buffer = vec![];
|
|
|
|
// Ignore digits from the significand until it is no more
|
|
// precise than is required for the desired precision.
|
|
// 196/59 is a very slight overestimate of lg_2(10).
|
|
let required = (precision * 196 + 58) / 59;
|
|
let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196;
|
|
let mut in_trail = true;
|
|
while !sig.is_empty() {
|
|
// Perform short division by 10 to extract the rightmost digit.
|
|
// rem <- sig % 10
|
|
// sig <- sig / 10
|
|
let mut rem = 0;
|
|
|
|
// Use 64-bit division and remainder, with 32-bit chunks from sig.
|
|
sig::each_chunk(&mut sig, 32, |chunk| {
|
|
let chunk = chunk as u32;
|
|
let combined = ((rem as u64) << 32) | (chunk as u64);
|
|
rem = (combined % 10) as u8;
|
|
(combined / 10) as u32 as Limb
|
|
});
|
|
|
|
// Reduce the sigificand to avoid wasting time dividing 0's.
|
|
while sig.last() == Some(&0) {
|
|
sig.pop();
|
|
}
|
|
|
|
let digit = rem;
|
|
|
|
// Ignore digits we don't need.
|
|
if discard_digits > 0 {
|
|
discard_digits -= 1;
|
|
exp += 1;
|
|
continue;
|
|
}
|
|
|
|
// Drop trailing zeros.
|
|
if in_trail && digit == 0 {
|
|
exp += 1;
|
|
} else {
|
|
in_trail = false;
|
|
buffer.push(b'0' + digit);
|
|
}
|
|
}
|
|
|
|
assert!(!buffer.is_empty(), "no characters in buffer!");
|
|
|
|
// Drop down to precision.
|
|
// FIXME: don't do more precise calculations above than are required.
|
|
if buffer.len() > precision {
|
|
// The most significant figures are the last ones in the buffer.
|
|
let mut first_sig = buffer.len() - precision;
|
|
|
|
// Round.
|
|
// FIXME: this probably shouldn't use 'round half up'.
|
|
|
|
// Rounding down is just a truncation, except we also want to drop
|
|
// trailing zeros from the new result.
|
|
if buffer[first_sig - 1] < b'5' {
|
|
while first_sig < buffer.len() && buffer[first_sig] == b'0' {
|
|
first_sig += 1;
|
|
}
|
|
} else {
|
|
// Rounding up requires a decimal add-with-carry. If we continue
|
|
// the carry, the newly-introduced zeros will just be truncated.
|
|
for x in &mut buffer[first_sig..] {
|
|
if *x == b'9' {
|
|
first_sig += 1;
|
|
} else {
|
|
*x += 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
exp += first_sig as ExpInt;
|
|
buffer.drain(..first_sig);
|
|
|
|
// If we carried through, we have exactly one digit of precision.
|
|
if buffer.is_empty() {
|
|
buffer.push(b'1');
|
|
}
|
|
}
|
|
|
|
let digits = buffer.len();
|
|
|
|
// Check whether we should use scientific notation.
|
|
let scientific = if width == 0 {
|
|
true
|
|
} else if exp >= 0 {
|
|
// 765e3 --> 765000
|
|
// ^^^
|
|
// But we shouldn't make the number look more precise than it is.
|
|
exp as usize > width || digits + exp as usize > precision
|
|
} else {
|
|
// Power of the most significant digit.
|
|
let msd = exp + (digits - 1) as ExpInt;
|
|
if msd >= 0 {
|
|
// 765e-2 == 7.65
|
|
false
|
|
} else {
|
|
// 765e-5 == 0.00765
|
|
// ^ ^^
|
|
-msd as usize > width
|
|
}
|
|
};
|
|
|
|
// Scientific formatting is pretty straightforward.
|
|
if scientific {
|
|
exp += digits as ExpInt - 1;
|
|
|
|
f.write_char(buffer[digits - 1] as char)?;
|
|
f.write_char('.')?;
|
|
let truncate_zero = !alternate;
|
|
if digits == 1 && truncate_zero {
|
|
f.write_char('0')?;
|
|
} else {
|
|
for &d in buffer[..digits - 1].iter().rev() {
|
|
f.write_char(d as char)?;
|
|
}
|
|
}
|
|
// Fill with zeros up to precision.
|
|
if !truncate_zero && precision > digits - 1 {
|
|
for _ in 0..=precision - digits {
|
|
f.write_char('0')?;
|
|
}
|
|
}
|
|
// For alternate we use lower 'e'.
|
|
f.write_char(if alternate { 'e' } else { 'E' })?;
|
|
|
|
// Exponent always at least two digits if we do not truncate zeros.
|
|
if truncate_zero {
|
|
write!(f, "{:+}", exp)?;
|
|
} else {
|
|
write!(f, "{:+03}", exp)?;
|
|
}
|
|
|
|
return Ok(());
|
|
}
|
|
|
|
// Non-scientific, positive exponents.
|
|
if exp >= 0 {
|
|
for &d in buffer.iter().rev() {
|
|
f.write_char(d as char)?;
|
|
}
|
|
for _ in 0..exp {
|
|
f.write_char('0')?;
|
|
}
|
|
return Ok(());
|
|
}
|
|
|
|
// Non-scientific, negative exponents.
|
|
let unit_place = -exp as usize;
|
|
if unit_place < digits {
|
|
for &d in buffer[unit_place..].iter().rev() {
|
|
f.write_char(d as char)?;
|
|
}
|
|
f.write_char('.')?;
|
|
for &d in buffer[..unit_place].iter().rev() {
|
|
f.write_char(d as char)?;
|
|
}
|
|
} else {
|
|
f.write_str("0.")?;
|
|
for _ in digits..unit_place {
|
|
f.write_char('0')?;
|
|
}
|
|
for &d in buffer.iter().rev() {
|
|
f.write_char(d as char)?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl<S: Semantics> fmt::Debug for IeeeFloat<S> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(
|
|
f,
|
|
"{}({:?} | {}{:?} * 2^{})",
|
|
self,
|
|
self.category,
|
|
if self.sign { "-" } else { "+" },
|
|
self.sig,
|
|
self.exp
|
|
)
|
|
}
|
|
}
|
|
|
|
impl<S: Semantics> Float for IeeeFloat<S> {
|
|
const BITS: usize = S::BITS;
|
|
const PRECISION: usize = S::PRECISION;
|
|
const MAX_EXP: ExpInt = S::MAX_EXP;
|
|
const MIN_EXP: ExpInt = S::MIN_EXP;
|
|
|
|
const ZERO: Self = IeeeFloat {
|
|
sig: [0],
|
|
exp: S::MIN_EXP - 1,
|
|
category: Category::Zero,
|
|
sign: false,
|
|
marker: PhantomData,
|
|
};
|
|
|
|
const INFINITY: Self = IeeeFloat {
|
|
sig: [0],
|
|
exp: S::MAX_EXP + 1,
|
|
category: Category::Infinity,
|
|
sign: false,
|
|
marker: PhantomData,
|
|
};
|
|
|
|
// FIXME(eddyb) remove when qnan becomes const fn.
|
|
const NAN: Self = IeeeFloat {
|
|
sig: [S::QNAN_SIGNIFICAND],
|
|
exp: S::MAX_EXP + 1,
|
|
category: Category::NaN,
|
|
sign: false,
|
|
marker: PhantomData,
|
|
};
|
|
|
|
fn qnan(payload: Option<u128>) -> Self {
|
|
IeeeFloat {
|
|
sig: [S::QNAN_SIGNIFICAND
|
|
| payload.map_or(0, |payload| {
|
|
// Zero out the excess bits of the significand.
|
|
payload & ((1 << S::QNAN_BIT) - 1)
|
|
})],
|
|
exp: S::MAX_EXP + 1,
|
|
category: Category::NaN,
|
|
sign: false,
|
|
marker: PhantomData,
|
|
}
|
|
}
|
|
|
|
fn snan(payload: Option<u128>) -> Self {
|
|
let mut snan = Self::qnan(payload);
|
|
|
|
// We always have to clear the QNaN bit to make it an SNaN.
|
|
sig::clear_bit(&mut snan.sig, S::QNAN_BIT);
|
|
|
|
// If there are no bits set in the payload, we have to set
|
|
// *something* to make it a NaN instead of an infinity;
|
|
// conventionally, this is the next bit down from the QNaN bit.
|
|
if snan.sig[0] & !S::QNAN_SIGNIFICAND == 0 {
|
|
sig::set_bit(&mut snan.sig, S::QNAN_BIT - 1);
|
|
}
|
|
|
|
snan
|
|
}
|
|
|
|
fn largest() -> Self {
|
|
// We want (in interchange format):
|
|
// exponent = 1..10
|
|
// significand = 1..1
|
|
IeeeFloat {
|
|
sig: [(1 << S::PRECISION) - 1],
|
|
exp: S::MAX_EXP,
|
|
category: Category::Normal,
|
|
sign: false,
|
|
marker: PhantomData,
|
|
}
|
|
}
|
|
|
|
// We want (in interchange format):
|
|
// exponent = 0..0
|
|
// significand = 0..01
|
|
const SMALLEST: Self = IeeeFloat {
|
|
sig: [1],
|
|
exp: S::MIN_EXP,
|
|
category: Category::Normal,
|
|
sign: false,
|
|
marker: PhantomData,
|
|
};
|
|
|
|
fn smallest_normalized() -> Self {
|
|
// We want (in interchange format):
|
|
// exponent = 0..0
|
|
// significand = 10..0
|
|
IeeeFloat {
|
|
sig: [1 << (S::PRECISION - 1)],
|
|
exp: S::MIN_EXP,
|
|
category: Category::Normal,
|
|
sign: false,
|
|
marker: PhantomData,
|
|
}
|
|
}
|
|
|
|
fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
|
|
let status = match (self.category, rhs.category) {
|
|
(Category::Infinity, Category::Infinity) => {
|
|
// Differently signed infinities can only be validly
|
|
// subtracted.
|
|
if self.sign != rhs.sign {
|
|
self = Self::NAN;
|
|
Status::INVALID_OP
|
|
} else {
|
|
Status::OK
|
|
}
|
|
}
|
|
|
|
// Sign may depend on rounding mode; handled below.
|
|
(_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => {
|
|
Status::OK
|
|
}
|
|
|
|
(Category::Zero, _) | (_, Category::NaN | Category::Infinity) => {
|
|
self = rhs;
|
|
Status::OK
|
|
}
|
|
|
|
// This return code means it was not a simple case.
|
|
(Category::Normal, Category::Normal) => {
|
|
let loss = sig::add_or_sub(
|
|
&mut self.sig,
|
|
&mut self.exp,
|
|
&mut self.sign,
|
|
&mut [rhs.sig[0]],
|
|
rhs.exp,
|
|
rhs.sign,
|
|
);
|
|
let status;
|
|
self = unpack!(status=, self.normalize(round, loss));
|
|
|
|
// Can only be zero if we lost no fraction.
|
|
assert!(self.category != Category::Zero || loss == Loss::ExactlyZero);
|
|
|
|
status
|
|
}
|
|
};
|
|
|
|
// If two numbers add (exactly) to zero, IEEE 754 decrees it is a
|
|
// positive zero unless rounding to minus infinity, except that
|
|
// adding two like-signed zeroes gives that zero.
|
|
if self.category == Category::Zero
|
|
&& (rhs.category != Category::Zero || self.sign != rhs.sign)
|
|
{
|
|
self.sign = round == Round::TowardNegative;
|
|
}
|
|
|
|
status.and(self)
|
|
}
|
|
|
|
fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
|
|
self.sign ^= rhs.sign;
|
|
|
|
match (self.category, rhs.category) {
|
|
(Category::NaN, _) => {
|
|
self.sign = false;
|
|
Status::OK.and(self)
|
|
}
|
|
|
|
(_, Category::NaN) => {
|
|
self.sign = false;
|
|
self.category = Category::NaN;
|
|
self.sig = rhs.sig;
|
|
Status::OK.and(self)
|
|
}
|
|
|
|
(Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => {
|
|
Status::INVALID_OP.and(Self::NAN)
|
|
}
|
|
|
|
(_, Category::Infinity) | (Category::Infinity, _) => {
|
|
self.category = Category::Infinity;
|
|
Status::OK.and(self)
|
|
}
|
|
|
|
(Category::Zero, _) | (_, Category::Zero) => {
|
|
self.category = Category::Zero;
|
|
Status::OK.and(self)
|
|
}
|
|
|
|
(Category::Normal, Category::Normal) => {
|
|
self.exp += rhs.exp;
|
|
let mut wide_sig = [0; 2];
|
|
let loss =
|
|
sig::mul(&mut wide_sig, &mut self.exp, &self.sig, &rhs.sig, S::PRECISION);
|
|
self.sig = [wide_sig[0]];
|
|
let mut status;
|
|
self = unpack!(status=, self.normalize(round, loss));
|
|
if loss != Loss::ExactlyZero {
|
|
status |= Status::INEXACT;
|
|
}
|
|
status.and(self)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> {
|
|
// If and only if all arguments are normal do we need to do an
|
|
// extended-precision calculation.
|
|
if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() {
|
|
let mut status;
|
|
self = unpack!(status=, self.mul_r(multiplicand, round));
|
|
|
|
// FS can only be Status::OK or Status::INVALID_OP. There is no more work
|
|
// to do in the latter case. The IEEE-754R standard says it is
|
|
// implementation-defined in this case whether, if ADDEND is a
|
|
// quiet NaN, we raise invalid op; this implementation does so.
|
|
//
|
|
// If we need to do the addition we can do so with normal
|
|
// precision.
|
|
if status == Status::OK {
|
|
self = unpack!(status=, self.add_r(addend, round));
|
|
}
|
|
return status.and(self);
|
|
}
|
|
|
|
// Post-multiplication sign, before addition.
|
|
self.sign ^= multiplicand.sign;
|
|
|
|
// Allocate space for twice as many bits as the original significand, plus one
|
|
// extra bit for the addition to overflow into.
|
|
assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2);
|
|
let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]);
|
|
|
|
let mut loss = Loss::ExactlyZero;
|
|
let mut omsb = sig::omsb(&wide_sig);
|
|
self.exp += multiplicand.exp;
|
|
|
|
// Assume the operands involved in the multiplication are single-precision
|
|
// FP, and the two multiplicants are:
|
|
// lhs = a23 . a22 ... a0 * 2^e1
|
|
// rhs = b23 . b22 ... b0 * 2^e2
|
|
// the result of multiplication is:
|
|
// lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
|
|
// Note that there are three significant bits at the left-hand side of the
|
|
// radix point: two for the multiplication, and an overflow bit for the
|
|
// addition (that will always be zero at this point). Move the radix point
|
|
// toward left by two bits, and adjust exponent accordingly.
|
|
self.exp += 2;
|
|
|
|
if addend.is_non_zero() {
|
|
// Normalize our MSB to one below the top bit to allow for overflow.
|
|
let ext_precision = 2 * S::PRECISION + 1;
|
|
if omsb != ext_precision - 1 {
|
|
assert!(ext_precision > omsb);
|
|
sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb);
|
|
}
|
|
|
|
// The intermediate result of the multiplication has "2 * S::PRECISION"
|
|
// significant bit; adjust the addend to be consistent with mul result.
|
|
let mut ext_addend_sig = [addend.sig[0], 0];
|
|
|
|
// Extend the addend significand to ext_precision - 1. This guarantees
|
|
// that the high bit of the significand is zero (same as wide_sig),
|
|
// so the addition will overflow (if it does overflow at all) into the top bit.
|
|
sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION);
|
|
loss = sig::add_or_sub(
|
|
&mut wide_sig,
|
|
&mut self.exp,
|
|
&mut self.sign,
|
|
&mut ext_addend_sig,
|
|
addend.exp + 1,
|
|
addend.sign,
|
|
);
|
|
|
|
omsb = sig::omsb(&wide_sig);
|
|
}
|
|
|
|
// Convert the result having "2 * S::PRECISION" significant-bits back to the one
|
|
// having "S::PRECISION" significant-bits. First, move the radix point from
|
|
// position "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be
|
|
// adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION".
|
|
self.exp -= S::PRECISION as ExpInt + 1;
|
|
|
|
// In case MSB resides at the left-hand side of radix point, shift the
|
|
// mantissa right by some amount to make sure the MSB reside right before
|
|
// the radix point (i.e., "MSB . rest-significant-bits").
|
|
if omsb > S::PRECISION {
|
|
let bits = omsb - S::PRECISION;
|
|
loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss);
|
|
}
|
|
|
|
self.sig[0] = wide_sig[0];
|
|
|
|
let mut status;
|
|
self = unpack!(status=, self.normalize(round, loss));
|
|
if loss != Loss::ExactlyZero {
|
|
status |= Status::INEXACT;
|
|
}
|
|
|
|
// If two numbers add (exactly) to zero, IEEE 754 decrees it is a
|
|
// positive zero unless rounding to minus infinity, except that
|
|
// adding two like-signed zeroes gives that zero.
|
|
if self.category == Category::Zero
|
|
&& !status.intersects(Status::UNDERFLOW)
|
|
&& self.sign != addend.sign
|
|
{
|
|
self.sign = round == Round::TowardNegative;
|
|
}
|
|
|
|
status.and(self)
|
|
}
|
|
|
|
fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
|
|
self.sign ^= rhs.sign;
|
|
|
|
match (self.category, rhs.category) {
|
|
(Category::NaN, _) => {
|
|
self.sign = false;
|
|
Status::OK.and(self)
|
|
}
|
|
|
|
(_, Category::NaN) => {
|
|
self.category = Category::NaN;
|
|
self.sig = rhs.sig;
|
|
self.sign = false;
|
|
Status::OK.and(self)
|
|
}
|
|
|
|
(Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => {
|
|
Status::INVALID_OP.and(Self::NAN)
|
|
}
|
|
|
|
(Category::Infinity | Category::Zero, _) => Status::OK.and(self),
|
|
|
|
(Category::Normal, Category::Infinity) => {
|
|
self.category = Category::Zero;
|
|
Status::OK.and(self)
|
|
}
|
|
|
|
(Category::Normal, Category::Zero) => {
|
|
self.category = Category::Infinity;
|
|
Status::DIV_BY_ZERO.and(self)
|
|
}
|
|
|
|
(Category::Normal, Category::Normal) => {
|
|
self.exp -= rhs.exp;
|
|
let dividend = self.sig[0];
|
|
let loss = sig::div(
|
|
&mut self.sig,
|
|
&mut self.exp,
|
|
&mut [dividend],
|
|
&mut [rhs.sig[0]],
|
|
S::PRECISION,
|
|
);
|
|
let mut status;
|
|
self = unpack!(status=, self.normalize(round, loss));
|
|
if loss != Loss::ExactlyZero {
|
|
status |= Status::INEXACT;
|
|
}
|
|
status.and(self)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> {
|
|
match (self.category, rhs.category) {
|
|
(Category::NaN, _)
|
|
| (Category::Zero, Category::Infinity | Category::Normal)
|
|
| (Category::Normal, Category::Infinity) => Status::OK.and(self),
|
|
|
|
(_, Category::NaN) => {
|
|
self.sign = false;
|
|
self.category = Category::NaN;
|
|
self.sig = rhs.sig;
|
|
Status::OK.and(self)
|
|
}
|
|
|
|
(Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
|
|
|
|
(Category::Normal, Category::Normal) => {
|
|
while self.is_finite_non_zero()
|
|
&& rhs.is_finite_non_zero()
|
|
&& self.cmp_abs_normal(rhs) != Ordering::Less
|
|
{
|
|
let mut v = rhs.scalbn(self.ilogb() - rhs.ilogb());
|
|
if self.cmp_abs_normal(v) == Ordering::Less {
|
|
v = v.scalbn(-1);
|
|
}
|
|
v.sign = self.sign;
|
|
|
|
let status;
|
|
self = unpack!(status=, self - v);
|
|
assert_eq!(status, Status::OK);
|
|
}
|
|
Status::OK.and(self)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn round_to_integral(self, round: Round) -> StatusAnd<Self> {
|
|
// If the exponent is large enough, we know that this value is already
|
|
// integral, and the arithmetic below would potentially cause it to saturate
|
|
// to +/-Inf. Bail out early instead.
|
|
if self.is_finite_non_zero() && self.exp + 1 >= S::PRECISION as ExpInt {
|
|
return Status::OK.and(self);
|
|
}
|
|
|
|
// The algorithm here is quite simple: we add 2^(p-1), where p is the
|
|
// precision of our format, and then subtract it back off again. The choice
|
|
// of rounding modes for the addition/subtraction determines the rounding mode
|
|
// for our integral rounding as well.
|
|
// NOTE: When the input value is negative, we do subtraction followed by
|
|
// addition instead.
|
|
assert!(S::PRECISION <= 128);
|
|
let mut status;
|
|
let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1)));
|
|
let magic_const = magic_const.copy_sign(self);
|
|
|
|
if status != Status::OK {
|
|
return status.and(self);
|
|
}
|
|
|
|
let mut r = self;
|
|
r = unpack!(status=, r.add_r(magic_const, round));
|
|
if status != Status::OK && status != Status::INEXACT {
|
|
return status.and(self);
|
|
}
|
|
|
|
// Restore the input sign to handle 0.0/-0.0 cases correctly.
|
|
r.sub_r(magic_const, round).map(|r| r.copy_sign(self))
|
|
}
|
|
|
|
fn next_up(mut self) -> StatusAnd<Self> {
|
|
// Compute nextUp(x), handling each float category separately.
|
|
match self.category {
|
|
Category::Infinity => {
|
|
if self.sign {
|
|
// nextUp(-inf) = -largest
|
|
Status::OK.and(-Self::largest())
|
|
} else {
|
|
// nextUp(+inf) = +inf
|
|
Status::OK.and(self)
|
|
}
|
|
}
|
|
Category::NaN => {
|
|
// IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
|
|
// IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
|
|
// change the payload.
|
|
if self.is_signaling() {
|
|
// For consistency, propagate the sign of the sNaN to the qNaN.
|
|
Status::INVALID_OP.and(Self::NAN.copy_sign(self))
|
|
} else {
|
|
Status::OK.and(self)
|
|
}
|
|
}
|
|
Category::Zero => {
|
|
// nextUp(pm 0) = +smallest
|
|
Status::OK.and(Self::SMALLEST)
|
|
}
|
|
Category::Normal => {
|
|
// nextUp(-smallest) = -0
|
|
if self.is_smallest() && self.sign {
|
|
return Status::OK.and(-Self::ZERO);
|
|
}
|
|
|
|
// nextUp(largest) == INFINITY
|
|
if self.is_largest() && !self.sign {
|
|
return Status::OK.and(Self::INFINITY);
|
|
}
|
|
|
|
// Excluding the integral bit. This allows us to test for binade boundaries.
|
|
let sig_mask = (1 << (S::PRECISION - 1)) - 1;
|
|
|
|
// nextUp(normal) == normal + inc.
|
|
if self.sign {
|
|
// If we are negative, we need to decrement the significand.
|
|
|
|
// We only cross a binade boundary that requires adjusting the exponent
|
|
// if:
|
|
// 1. exponent != S::MIN_EXP. This implies we are not in the
|
|
// smallest binade or are dealing with denormals.
|
|
// 2. Our significand excluding the integral bit is all zeros.
|
|
let crossing_binade_boundary =
|
|
self.exp != S::MIN_EXP && self.sig[0] & sig_mask == 0;
|
|
|
|
// Decrement the significand.
|
|
//
|
|
// We always do this since:
|
|
// 1. If we are dealing with a non-binade decrement, by definition we
|
|
// just decrement the significand.
|
|
// 2. If we are dealing with a normal -> normal binade decrement, since
|
|
// we have an explicit integral bit the fact that all bits but the
|
|
// integral bit are zero implies that subtracting one will yield a
|
|
// significand with 0 integral bit and 1 in all other spots. Thus we
|
|
// must just adjust the exponent and set the integral bit to 1.
|
|
// 3. If we are dealing with a normal -> denormal binade decrement,
|
|
// since we set the integral bit to 0 when we represent denormals, we
|
|
// just decrement the significand.
|
|
sig::decrement(&mut self.sig);
|
|
|
|
if crossing_binade_boundary {
|
|
// Our result is a normal number. Do the following:
|
|
// 1. Set the integral bit to 1.
|
|
// 2. Decrement the exponent.
|
|
sig::set_bit(&mut self.sig, S::PRECISION - 1);
|
|
self.exp -= 1;
|
|
}
|
|
} else {
|
|
// If we are positive, we need to increment the significand.
|
|
|
|
// We only cross a binade boundary that requires adjusting the exponent if
|
|
// the input is not a denormal and all of said input's significand bits
|
|
// are set. If all of said conditions are true: clear the significand, set
|
|
// the integral bit to 1, and increment the exponent. If we have a
|
|
// denormal always increment since moving denormals and the numbers in the
|
|
// smallest normal binade have the same exponent in our representation.
|
|
let crossing_binade_boundary =
|
|
!self.is_denormal() && self.sig[0] & sig_mask == sig_mask;
|
|
|
|
if crossing_binade_boundary {
|
|
self.sig = [0];
|
|
sig::set_bit(&mut self.sig, S::PRECISION - 1);
|
|
assert_ne!(
|
|
self.exp,
|
|
S::MAX_EXP,
|
|
"We can not increment an exponent beyond the MAX_EXP \
|
|
allowed by the given floating point semantics."
|
|
);
|
|
self.exp += 1;
|
|
} else {
|
|
sig::increment(&mut self.sig);
|
|
}
|
|
}
|
|
Status::OK.and(self)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn from_bits(input: u128) -> Self {
|
|
// Dispatch to semantics.
|
|
S::from_bits(input)
|
|
}
|
|
|
|
fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> {
|
|
IeeeFloat {
|
|
sig: [input],
|
|
exp: S::PRECISION as ExpInt - 1,
|
|
category: Category::Normal,
|
|
sign: false,
|
|
marker: PhantomData,
|
|
}
|
|
.normalize(round, Loss::ExactlyZero)
|
|
}
|
|
|
|
fn from_str_r(mut s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> {
|
|
if s.is_empty() {
|
|
return Err(ParseError("Invalid string length"));
|
|
}
|
|
|
|
// Handle special cases.
|
|
match s {
|
|
"inf" | "INFINITY" => return Ok(Status::OK.and(Self::INFINITY)),
|
|
"-inf" | "-INFINITY" => return Ok(Status::OK.and(-Self::INFINITY)),
|
|
"nan" | "NaN" => return Ok(Status::OK.and(Self::NAN)),
|
|
"-nan" | "-NaN" => return Ok(Status::OK.and(-Self::NAN)),
|
|
_ => {}
|
|
}
|
|
|
|
// Handle a leading minus sign.
|
|
let minus = s.starts_with('-');
|
|
if minus || s.starts_with('+') {
|
|
s = &s[1..];
|
|
if s.is_empty() {
|
|
return Err(ParseError("String has no digits"));
|
|
}
|
|
}
|
|
|
|
// Adjust the rounding mode for the absolute value below.
|
|
if minus {
|
|
round = -round;
|
|
}
|
|
|
|
let r = if s.starts_with("0x") || s.starts_with("0X") {
|
|
s = &s[2..];
|
|
if s.is_empty() {
|
|
return Err(ParseError("Invalid string"));
|
|
}
|
|
Self::from_hexadecimal_string(s, round)?
|
|
} else {
|
|
Self::from_decimal_string(s, round)?
|
|
};
|
|
|
|
Ok(r.map(|r| if minus { -r } else { r }))
|
|
}
|
|
|
|
fn to_bits(self) -> u128 {
|
|
// Dispatch to semantics.
|
|
S::to_bits(self)
|
|
}
|
|
|
|
fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> {
|
|
// The result of trying to convert a number too large.
|
|
let overflow = if self.sign {
|
|
// Negative numbers cannot be represented as unsigned.
|
|
0
|
|
} else {
|
|
// Largest unsigned integer of the given width.
|
|
!0 >> (128 - width)
|
|
};
|
|
|
|
*is_exact = false;
|
|
|
|
match self.category {
|
|
Category::NaN => Status::INVALID_OP.and(0),
|
|
|
|
Category::Infinity => Status::INVALID_OP.and(overflow),
|
|
|
|
Category::Zero => {
|
|
// Negative zero can't be represented as an int.
|
|
*is_exact = !self.sign;
|
|
Status::OK.and(0)
|
|
}
|
|
|
|
Category::Normal => {
|
|
let mut r = 0;
|
|
|
|
// Step 1: place our absolute value, with any fraction truncated, in
|
|
// the destination.
|
|
let truncated_bits = if self.exp < 0 {
|
|
// Our absolute value is less than one; truncate everything.
|
|
// For exponent -1 the integer bit represents .5, look at that.
|
|
// For smaller exponents leftmost truncated bit is 0.
|
|
S::PRECISION - 1 + (-self.exp) as usize
|
|
} else {
|
|
// We want the most significant (exponent + 1) bits; the rest are
|
|
// truncated.
|
|
let bits = self.exp as usize + 1;
|
|
|
|
// Hopelessly large in magnitude?
|
|
if bits > width {
|
|
return Status::INVALID_OP.and(overflow);
|
|
}
|
|
|
|
if bits < S::PRECISION {
|
|
// We truncate (S::PRECISION - bits) bits.
|
|
r = self.sig[0] >> (S::PRECISION - bits);
|
|
S::PRECISION - bits
|
|
} else {
|
|
// We want at least as many bits as are available.
|
|
r = self.sig[0] << (bits - S::PRECISION);
|
|
0
|
|
}
|
|
};
|
|
|
|
// Step 2: work out any lost fraction, and increment the absolute
|
|
// value if we would round away from zero.
|
|
let mut loss = Loss::ExactlyZero;
|
|
if truncated_bits > 0 {
|
|
loss = Loss::through_truncation(&self.sig, truncated_bits);
|
|
if loss != Loss::ExactlyZero
|
|
&& self.round_away_from_zero(round, loss, truncated_bits)
|
|
{
|
|
r = r.wrapping_add(1);
|
|
if r == 0 {
|
|
return Status::INVALID_OP.and(overflow); // Overflow.
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 3: check if we fit in the destination.
|
|
if r > overflow {
|
|
return Status::INVALID_OP.and(overflow);
|
|
}
|
|
|
|
if loss == Loss::ExactlyZero {
|
|
*is_exact = true;
|
|
Status::OK.and(r)
|
|
} else {
|
|
Status::INEXACT.and(r)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn cmp_abs_normal(self, rhs: Self) -> Ordering {
|
|
assert!(self.is_finite_non_zero());
|
|
assert!(rhs.is_finite_non_zero());
|
|
|
|
// If exponents are equal, do an unsigned comparison of the significands.
|
|
self.exp.cmp(&rhs.exp).then_with(|| sig::cmp(&self.sig, &rhs.sig))
|
|
}
|
|
|
|
fn bitwise_eq(self, rhs: Self) -> bool {
|
|
if self.category != rhs.category || self.sign != rhs.sign {
|
|
return false;
|
|
}
|
|
|
|
if self.category == Category::Zero || self.category == Category::Infinity {
|
|
return true;
|
|
}
|
|
|
|
if self.is_finite_non_zero() && self.exp != rhs.exp {
|
|
return false;
|
|
}
|
|
|
|
self.sig == rhs.sig
|
|
}
|
|
|
|
fn is_negative(self) -> bool {
|
|
self.sign
|
|
}
|
|
|
|
fn is_denormal(self) -> bool {
|
|
self.is_finite_non_zero()
|
|
&& self.exp == S::MIN_EXP
|
|
&& !sig::get_bit(&self.sig, S::PRECISION - 1)
|
|
}
|
|
|
|
fn is_signaling(self) -> bool {
|
|
// IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
|
|
// first bit of the trailing significand being 0.
|
|
self.is_nan() && !sig::get_bit(&self.sig, S::QNAN_BIT)
|
|
}
|
|
|
|
fn category(self) -> Category {
|
|
self.category
|
|
}
|
|
|
|
fn get_exact_inverse(self) -> Option<Self> {
|
|
// Special floats and denormals have no exact inverse.
|
|
if !self.is_finite_non_zero() {
|
|
return None;
|
|
}
|
|
|
|
// Check that the number is a power of two by making sure that only the
|
|
// integer bit is set in the significand.
|
|
if self.sig != [1 << (S::PRECISION - 1)] {
|
|
return None;
|
|
}
|
|
|
|
// Get the inverse.
|
|
let mut reciprocal = Self::from_u128(1).value;
|
|
let status;
|
|
reciprocal = unpack!(status=, reciprocal / self);
|
|
if status != Status::OK {
|
|
return None;
|
|
}
|
|
|
|
// Avoid multiplication with a denormal, it is not safe on all platforms and
|
|
// may be slower than a normal division.
|
|
if reciprocal.is_denormal() {
|
|
return None;
|
|
}
|
|
|
|
assert!(reciprocal.is_finite_non_zero());
|
|
assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]);
|
|
|
|
Some(reciprocal)
|
|
}
|
|
|
|
fn ilogb(mut self) -> ExpInt {
|
|
if self.is_nan() {
|
|
return IEK_NAN;
|
|
}
|
|
if self.is_zero() {
|
|
return IEK_ZERO;
|
|
}
|
|
if self.is_infinite() {
|
|
return IEK_INF;
|
|
}
|
|
if !self.is_denormal() {
|
|
return self.exp;
|
|
}
|
|
|
|
let sig_bits = (S::PRECISION - 1) as ExpInt;
|
|
self.exp += sig_bits;
|
|
self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero).value;
|
|
self.exp - sig_bits
|
|
}
|
|
|
|
fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self {
|
|
// If exp is wildly out-of-scale, simply adding it to self.exp will
|
|
// overflow; clamp it to a safe range before adding, but ensure that the range
|
|
// is large enough that the clamp does not change the result. The range we
|
|
// need to support is the difference between the largest possible exponent and
|
|
// the normalized exponent of half the smallest denormal.
|
|
|
|
let sig_bits = (S::PRECISION - 1) as i32;
|
|
let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1;
|
|
|
|
// Clamp to one past the range ends to let normalize handle overflow.
|
|
let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change);
|
|
self.exp = self.exp.saturating_add(exp_change as ExpInt);
|
|
self = self.normalize(round, Loss::ExactlyZero).value;
|
|
if self.is_nan() {
|
|
sig::set_bit(&mut self.sig, S::QNAN_BIT);
|
|
}
|
|
self
|
|
}
|
|
|
|
fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self {
|
|
*exp = self.ilogb();
|
|
|
|
// Quiet signalling nans.
|
|
if *exp == IEK_NAN {
|
|
sig::set_bit(&mut self.sig, S::QNAN_BIT);
|
|
return self;
|
|
}
|
|
|
|
if *exp == IEK_INF {
|
|
return self;
|
|
}
|
|
|
|
// 1 is added because frexp is defined to return a normalized fraction in
|
|
// +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
|
|
if *exp == IEK_ZERO {
|
|
*exp = 0;
|
|
} else {
|
|
*exp += 1;
|
|
}
|
|
self.scalbn_r(-*exp, round)
|
|
}
|
|
}
|
|
|
|
impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> {
|
|
fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> {
|
|
let mut r = IeeeFloat {
|
|
sig: self.sig,
|
|
exp: self.exp,
|
|
category: self.category,
|
|
sign: self.sign,
|
|
marker: PhantomData,
|
|
};
|
|
|
|
// x86 has some unusual NaNs which cannot be represented in any other
|
|
// format; note them here.
|
|
fn is_x87_double_extended<S: Semantics>() -> bool {
|
|
S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND
|
|
}
|
|
let x87_special_nan = is_x87_double_extended::<S>()
|
|
&& !is_x87_double_extended::<T>()
|
|
&& r.category == Category::NaN
|
|
&& (r.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND;
|
|
|
|
// If this is a truncation of a denormal number, and the target semantics
|
|
// has larger exponent range than the source semantics (this can happen
|
|
// when truncating from PowerPC double-double to double format), the
|
|
// right shift could lose result mantissa bits. Adjust exponent instead
|
|
// of performing excessive shift.
|
|
let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt;
|
|
if shift < 0 && r.is_finite_non_zero() {
|
|
let mut exp_change = sig::omsb(&r.sig) as ExpInt - S::PRECISION as ExpInt;
|
|
if r.exp + exp_change < T::MIN_EXP {
|
|
exp_change = T::MIN_EXP - r.exp;
|
|
}
|
|
if exp_change < shift {
|
|
exp_change = shift;
|
|
}
|
|
if exp_change < 0 {
|
|
shift -= exp_change;
|
|
r.exp += exp_change;
|
|
}
|
|
}
|
|
|
|
// If this is a truncation, perform the shift.
|
|
let loss = if shift < 0 && (r.is_finite_non_zero() || r.category == Category::NaN) {
|
|
sig::shift_right(&mut r.sig, &mut 0, -shift as usize)
|
|
} else {
|
|
Loss::ExactlyZero
|
|
};
|
|
|
|
// If this is an extension, perform the shift.
|
|
if shift > 0 && (r.is_finite_non_zero() || r.category == Category::NaN) {
|
|
sig::shift_left(&mut r.sig, &mut 0, shift as usize);
|
|
}
|
|
|
|
let status;
|
|
if r.is_finite_non_zero() {
|
|
r = unpack!(status=, r.normalize(round, loss));
|
|
*loses_info = status != Status::OK;
|
|
} else if r.category == Category::NaN {
|
|
*loses_info = loss != Loss::ExactlyZero || x87_special_nan;
|
|
|
|
// For x87 extended precision, we want to make a NaN, not a special NaN if
|
|
// the input wasn't special either.
|
|
if !x87_special_nan && is_x87_double_extended::<T>() {
|
|
sig::set_bit(&mut r.sig, T::PRECISION - 1);
|
|
}
|
|
|
|
// Convert of sNaN creates qNaN and raises an exception (invalid op).
|
|
// This also guarantees that a sNaN does not become Inf on a truncation
|
|
// that loses all payload bits.
|
|
if self.is_signaling() {
|
|
// Quiet signaling NaN.
|
|
sig::set_bit(&mut r.sig, T::QNAN_BIT);
|
|
status = Status::INVALID_OP;
|
|
} else {
|
|
status = Status::OK;
|
|
}
|
|
} else {
|
|
*loses_info = false;
|
|
status = Status::OK;
|
|
}
|
|
|
|
status.and(r)
|
|
}
|
|
}
|
|
|
|
impl<S: Semantics> IeeeFloat<S> {
|
|
/// Handle positive overflow. We either return infinity or
|
|
/// the largest finite number. For negative overflow,
|
|
/// negate the `round` argument before calling.
|
|
fn overflow_result(round: Round) -> StatusAnd<Self> {
|
|
match round {
|
|
// Infinity?
|
|
Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => {
|
|
(Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY)
|
|
}
|
|
// Otherwise we become the largest finite number.
|
|
Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()),
|
|
}
|
|
}
|
|
|
|
/// Returns `true` if, when truncating the current number, with `bit` the
|
|
/// new LSB, with the given lost fraction and rounding mode, the result
|
|
/// would need to be rounded away from zero (i.e., by increasing the
|
|
/// signficand). This routine must work for `Category::Zero` of both signs, and
|
|
/// `Category::Normal` numbers.
|
|
fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool {
|
|
// NaNs and infinities should not have lost fractions.
|
|
assert!(self.is_finite_non_zero() || self.is_zero());
|
|
|
|
// Current callers never pass this so we don't handle it.
|
|
assert_ne!(loss, Loss::ExactlyZero);
|
|
|
|
match round {
|
|
Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf,
|
|
Round::NearestTiesToEven => {
|
|
if loss == Loss::MoreThanHalf {
|
|
return true;
|
|
}
|
|
|
|
// Our zeros don't have a significand to test.
|
|
if loss == Loss::ExactlyHalf && self.category != Category::Zero {
|
|
return sig::get_bit(&self.sig, bit);
|
|
}
|
|
|
|
false
|
|
}
|
|
Round::TowardZero => false,
|
|
Round::TowardPositive => !self.sign,
|
|
Round::TowardNegative => self.sign,
|
|
}
|
|
}
|
|
|
|
fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> {
|
|
if !self.is_finite_non_zero() {
|
|
return Status::OK.and(self);
|
|
}
|
|
|
|
// Before rounding normalize the exponent of Category::Normal numbers.
|
|
let mut omsb = sig::omsb(&self.sig);
|
|
|
|
if omsb > 0 {
|
|
// OMSB is numbered from 1. We want to place it in the integer
|
|
// bit numbered PRECISION if possible, with a compensating change in
|
|
// the exponent.
|
|
let mut final_exp = self.exp.saturating_add(omsb as ExpInt - S::PRECISION as ExpInt);
|
|
|
|
// If the resulting exponent is too high, overflow according to
|
|
// the rounding mode.
|
|
if final_exp > S::MAX_EXP {
|
|
let round = if self.sign { -round } else { round };
|
|
return Self::overflow_result(round).map(|r| r.copy_sign(self));
|
|
}
|
|
|
|
// Subnormal numbers have exponent MIN_EXP, and their MSB
|
|
// is forced based on that.
|
|
if final_exp < S::MIN_EXP {
|
|
final_exp = S::MIN_EXP;
|
|
}
|
|
|
|
// Shifting left is easy as we don't lose precision.
|
|
if final_exp < self.exp {
|
|
assert_eq!(loss, Loss::ExactlyZero);
|
|
|
|
let exp_change = (self.exp - final_exp) as usize;
|
|
sig::shift_left(&mut self.sig, &mut self.exp, exp_change);
|
|
|
|
return Status::OK.and(self);
|
|
}
|
|
|
|
// Shift right and capture any new lost fraction.
|
|
if final_exp > self.exp {
|
|
let exp_change = (final_exp - self.exp) as usize;
|
|
loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss);
|
|
|
|
// Keep OMSB up-to-date.
|
|
omsb = omsb.saturating_sub(exp_change);
|
|
}
|
|
}
|
|
|
|
// Now round the number according to round given the lost
|
|
// fraction.
|
|
|
|
// As specified in IEEE 754, since we do not trap we do not report
|
|
// underflow for exact results.
|
|
if loss == Loss::ExactlyZero {
|
|
// Canonicalize zeros.
|
|
if omsb == 0 {
|
|
self.category = Category::Zero;
|
|
}
|
|
|
|
return Status::OK.and(self);
|
|
}
|
|
|
|
// Increment the significand if we're rounding away from zero.
|
|
if self.round_away_from_zero(round, loss, 0) {
|
|
if omsb == 0 {
|
|
self.exp = S::MIN_EXP;
|
|
}
|
|
|
|
// We should never overflow.
|
|
assert_eq!(sig::increment(&mut self.sig), 0);
|
|
omsb = sig::omsb(&self.sig);
|
|
|
|
// Did the significand increment overflow?
|
|
if omsb == S::PRECISION + 1 {
|
|
// Renormalize by incrementing the exponent and shifting our
|
|
// significand right one. However if we already have the
|
|
// maximum exponent we overflow to infinity.
|
|
if self.exp == S::MAX_EXP {
|
|
self.category = Category::Infinity;
|
|
|
|
return (Status::OVERFLOW | Status::INEXACT).and(self);
|
|
}
|
|
|
|
let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1);
|
|
|
|
return Status::INEXACT.and(self);
|
|
}
|
|
}
|
|
|
|
// The normal case - we were and are not denormal, and any
|
|
// significand increment above didn't overflow.
|
|
if omsb == S::PRECISION {
|
|
return Status::INEXACT.and(self);
|
|
}
|
|
|
|
// We have a non-zero denormal.
|
|
assert!(omsb < S::PRECISION);
|
|
|
|
// Canonicalize zeros.
|
|
if omsb == 0 {
|
|
self.category = Category::Zero;
|
|
}
|
|
|
|
// The Category::Zero case is a denormal that underflowed to zero.
|
|
(Status::UNDERFLOW | Status::INEXACT).and(self)
|
|
}
|
|
|
|
fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
|
|
let mut r = IeeeFloat {
|
|
sig: [0],
|
|
exp: 0,
|
|
category: Category::Normal,
|
|
sign: false,
|
|
marker: PhantomData,
|
|
};
|
|
|
|
let mut any_digits = false;
|
|
let mut has_exp = false;
|
|
let mut bit_pos = LIMB_BITS as isize;
|
|
let mut loss = None;
|
|
|
|
// Without leading or trailing zeros, irrespective of the dot.
|
|
let mut first_sig_digit = None;
|
|
let mut dot = s.len();
|
|
|
|
for (p, c) in s.char_indices() {
|
|
// Skip leading zeros and any (hexa)decimal point.
|
|
if c == '.' {
|
|
if dot != s.len() {
|
|
return Err(ParseError("String contains multiple dots"));
|
|
}
|
|
dot = p;
|
|
} else if let Some(hex_value) = c.to_digit(16) {
|
|
any_digits = true;
|
|
|
|
if first_sig_digit.is_none() {
|
|
if hex_value == 0 {
|
|
continue;
|
|
}
|
|
first_sig_digit = Some(p);
|
|
}
|
|
|
|
// Store the number while we have space.
|
|
bit_pos -= 4;
|
|
if bit_pos >= 0 {
|
|
r.sig[0] |= (hex_value as Limb) << bit_pos;
|
|
// If zero or one-half (the hexadecimal digit 8) are followed
|
|
// by non-zero, they're a little more than zero or one-half.
|
|
} else if let Some(ref mut loss) = loss {
|
|
if hex_value != 0 {
|
|
if *loss == Loss::ExactlyZero {
|
|
*loss = Loss::LessThanHalf;
|
|
}
|
|
if *loss == Loss::ExactlyHalf {
|
|
*loss = Loss::MoreThanHalf;
|
|
}
|
|
}
|
|
} else {
|
|
loss = Some(match hex_value {
|
|
0 => Loss::ExactlyZero,
|
|
1..=7 => Loss::LessThanHalf,
|
|
8 => Loss::ExactlyHalf,
|
|
9..=15 => Loss::MoreThanHalf,
|
|
_ => unreachable!(),
|
|
});
|
|
}
|
|
} else if c == 'p' || c == 'P' {
|
|
if !any_digits {
|
|
return Err(ParseError("Significand has no digits"));
|
|
}
|
|
|
|
if dot == s.len() {
|
|
dot = p;
|
|
}
|
|
|
|
let mut chars = s[p + 1..].chars().peekable();
|
|
|
|
// Adjust for the given exponent.
|
|
let exp_minus = chars.peek() == Some(&'-');
|
|
if exp_minus || chars.peek() == Some(&'+') {
|
|
chars.next();
|
|
}
|
|
|
|
for c in chars {
|
|
if let Some(value) = c.to_digit(10) {
|
|
has_exp = true;
|
|
r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt);
|
|
} else {
|
|
return Err(ParseError("Invalid character in exponent"));
|
|
}
|
|
}
|
|
if !has_exp {
|
|
return Err(ParseError("Exponent has no digits"));
|
|
}
|
|
|
|
if exp_minus {
|
|
r.exp = -r.exp;
|
|
}
|
|
|
|
break;
|
|
} else {
|
|
return Err(ParseError("Invalid character in significand"));
|
|
}
|
|
}
|
|
if !any_digits {
|
|
return Err(ParseError("Significand has no digits"));
|
|
}
|
|
|
|
// Hex floats require an exponent but not a hexadecimal point.
|
|
if !has_exp {
|
|
return Err(ParseError("Hex strings require an exponent"));
|
|
}
|
|
|
|
// Ignore the exponent if we are zero.
|
|
let first_sig_digit = match first_sig_digit {
|
|
Some(p) => p,
|
|
None => return Ok(Status::OK.and(Self::ZERO)),
|
|
};
|
|
|
|
// Calculate the exponent adjustment implicit in the number of
|
|
// significant digits and adjust for writing the significand starting
|
|
// at the most significant nibble.
|
|
let exp_adjustment = if dot > first_sig_digit {
|
|
ExpInt::try_from(dot - first_sig_digit).unwrap()
|
|
} else {
|
|
-ExpInt::try_from(first_sig_digit - dot - 1).unwrap()
|
|
};
|
|
let exp_adjustment = exp_adjustment
|
|
.saturating_mul(4)
|
|
.saturating_sub(1)
|
|
.saturating_add(S::PRECISION as ExpInt)
|
|
.saturating_sub(LIMB_BITS as ExpInt);
|
|
r.exp = r.exp.saturating_add(exp_adjustment);
|
|
|
|
Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero)))
|
|
}
|
|
|
|
fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
|
|
// Given a normal decimal floating point number of the form
|
|
//
|
|
// dddd.dddd[eE][+-]ddd
|
|
//
|
|
// where the decimal point and exponent are optional, fill out the
|
|
// variables below. Exponent is appropriate if the significand is
|
|
// treated as an integer, and normalized_exp if the significand
|
|
// is taken to have the decimal point after a single leading
|
|
// non-zero digit.
|
|
//
|
|
// If the value is zero, first_sig_digit is None.
|
|
|
|
let mut any_digits = false;
|
|
let mut dec_exp = 0i32;
|
|
|
|
// Without leading or trailing zeros, irrespective of the dot.
|
|
let mut first_sig_digit = None;
|
|
let mut last_sig_digit = 0;
|
|
let mut dot = s.len();
|
|
|
|
for (p, c) in s.char_indices() {
|
|
if c == '.' {
|
|
if dot != s.len() {
|
|
return Err(ParseError("String contains multiple dots"));
|
|
}
|
|
dot = p;
|
|
} else if let Some(dec_value) = c.to_digit(10) {
|
|
any_digits = true;
|
|
|
|
if dec_value != 0 {
|
|
if first_sig_digit.is_none() {
|
|
first_sig_digit = Some(p);
|
|
}
|
|
last_sig_digit = p;
|
|
}
|
|
} else if c == 'e' || c == 'E' {
|
|
if !any_digits {
|
|
return Err(ParseError("Significand has no digits"));
|
|
}
|
|
|
|
if dot == s.len() {
|
|
dot = p;
|
|
}
|
|
|
|
let mut chars = s[p + 1..].chars().peekable();
|
|
|
|
// Adjust for the given exponent.
|
|
let exp_minus = chars.peek() == Some(&'-');
|
|
if exp_minus || chars.peek() == Some(&'+') {
|
|
chars.next();
|
|
}
|
|
|
|
any_digits = false;
|
|
for c in chars {
|
|
if let Some(value) = c.to_digit(10) {
|
|
any_digits = true;
|
|
dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32);
|
|
} else {
|
|
return Err(ParseError("Invalid character in exponent"));
|
|
}
|
|
}
|
|
if !any_digits {
|
|
return Err(ParseError("Exponent has no digits"));
|
|
}
|
|
|
|
if exp_minus {
|
|
dec_exp = -dec_exp;
|
|
}
|
|
|
|
break;
|
|
} else {
|
|
return Err(ParseError("Invalid character in significand"));
|
|
}
|
|
}
|
|
if !any_digits {
|
|
return Err(ParseError("Significand has no digits"));
|
|
}
|
|
|
|
// Test if we have a zero number allowing for non-zero exponents.
|
|
let first_sig_digit = match first_sig_digit {
|
|
Some(p) => p,
|
|
None => return Ok(Status::OK.and(Self::ZERO)),
|
|
};
|
|
|
|
// Adjust the exponents for any decimal point.
|
|
if dot > last_sig_digit {
|
|
dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32);
|
|
} else {
|
|
dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32);
|
|
}
|
|
let significand_digits = last_sig_digit - first_sig_digit + 1
|
|
- (dot > first_sig_digit && dot < last_sig_digit) as usize;
|
|
let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1);
|
|
|
|
// Handle the cases where exponents are obviously too large or too
|
|
// small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp
|
|
// definitely overflows if
|
|
//
|
|
// (dec_exp - 1) * L >= MAX_EXP
|
|
//
|
|
// and definitely underflows to zero where
|
|
//
|
|
// (dec_exp + 1) * L <= MIN_EXP - PRECISION
|
|
//
|
|
// With integer arithmetic the tightest bounds for L are
|
|
//
|
|
// 93/28 < L < 196/59 [ numerator <= 256 ]
|
|
// 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
|
|
|
|
// Check for MAX_EXP.
|
|
if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 {
|
|
// Overflow and round.
|
|
return Ok(Self::overflow_result(round));
|
|
}
|
|
|
|
// Check for MIN_EXP.
|
|
if normalized_exp.saturating_add(1).saturating_mul(28738)
|
|
<= 8651 * (S::MIN_EXP as i32 - S::PRECISION as i32)
|
|
{
|
|
// Underflow to zero and round.
|
|
let r =
|
|
if round == Round::TowardPositive { IeeeFloat::SMALLEST } else { IeeeFloat::ZERO };
|
|
return Ok((Status::UNDERFLOW | Status::INEXACT).and(r));
|
|
}
|
|
|
|
// A tight upper bound on number of bits required to hold an
|
|
// N-digit decimal integer is N * 196 / 59. Allocate enough space
|
|
// to hold the full significand, and an extra limb required by
|
|
// tcMultiplyPart.
|
|
let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59);
|
|
let mut dec_sig: SmallVec<[Limb; 1]> = SmallVec::with_capacity(max_limbs);
|
|
|
|
// Convert to binary efficiently - we do almost all multiplication
|
|
// in a Limb. When this would overflow do we do a single
|
|
// bignum multiplication, and then revert again to multiplication
|
|
// in a Limb.
|
|
let mut chars = s[first_sig_digit..=last_sig_digit].chars();
|
|
loop {
|
|
let mut val = 0;
|
|
let mut multiplier = 1;
|
|
|
|
loop {
|
|
let dec_value = match chars.next() {
|
|
Some('.') => continue,
|
|
Some(c) => c.to_digit(10).unwrap(),
|
|
None => break,
|
|
};
|
|
|
|
multiplier *= 10;
|
|
val = val * 10 + dec_value as Limb;
|
|
|
|
// The maximum number that can be multiplied by ten with any
|
|
// digit added without overflowing a Limb.
|
|
if multiplier > (!0 - 9) / 10 {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// If we've consumed no digits, we're done.
|
|
if multiplier == 1 {
|
|
break;
|
|
}
|
|
|
|
// Multiply out the current limb.
|
|
let mut carry = val;
|
|
for x in &mut dec_sig {
|
|
let [low, mut high] = sig::widening_mul(*x, multiplier);
|
|
|
|
// Now add carry.
|
|
let (low, overflow) = low.overflowing_add(carry);
|
|
high += overflow as Limb;
|
|
|
|
*x = low;
|
|
carry = high;
|
|
}
|
|
|
|
// If we had carry, we need another limb (likely but not guaranteed).
|
|
if carry > 0 {
|
|
dec_sig.push(carry);
|
|
}
|
|
}
|
|
|
|
// Calculate pow(5, abs(dec_exp)) into `pow5_full`.
|
|
// The *_calc Vec's are reused scratch space, as an optimization.
|
|
let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = {
|
|
let mut power = dec_exp.abs() as usize;
|
|
|
|
const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125];
|
|
|
|
let mut p5_scratch = smallvec![];
|
|
let mut p5: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[4]];
|
|
|
|
let mut r_scratch = smallvec![];
|
|
let mut r: SmallVec<[Limb; 1]> = smallvec![FIRST_EIGHT_POWERS[power & 7]];
|
|
power >>= 3;
|
|
|
|
while power > 0 {
|
|
// Calculate pow(5,pow(2,n+3)).
|
|
p5_scratch.resize(p5.len() * 2, 0);
|
|
let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
|
|
while p5_scratch.last() == Some(&0) {
|
|
p5_scratch.pop();
|
|
}
|
|
mem::swap(&mut p5, &mut p5_scratch);
|
|
|
|
if power & 1 != 0 {
|
|
r_scratch.resize(r.len() + p5.len(), 0);
|
|
let _: Loss =
|
|
sig::mul(&mut r_scratch, &mut 0, &r, &p5, (r.len() + p5.len()) * LIMB_BITS);
|
|
while r_scratch.last() == Some(&0) {
|
|
r_scratch.pop();
|
|
}
|
|
mem::swap(&mut r, &mut r_scratch);
|
|
}
|
|
|
|
power >>= 1;
|
|
}
|
|
|
|
(r, r_scratch, p5, p5_scratch)
|
|
};
|
|
|
|
// Attempt dec_sig * 10^dec_exp with increasing precision.
|
|
let mut attempt = 0;
|
|
loop {
|
|
let calc_precision = (LIMB_BITS << attempt) - 1;
|
|
attempt += 1;
|
|
|
|
let calc_normal_from_limbs = |sig: &mut SmallVec<[Limb; 1]>,
|
|
limbs: &[Limb]|
|
|
-> StatusAnd<ExpInt> {
|
|
sig.resize(limbs_for_bits(calc_precision), 0);
|
|
let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision);
|
|
|
|
// Before rounding normalize the exponent of Category::Normal numbers.
|
|
let mut omsb = sig::omsb(sig);
|
|
|
|
assert_ne!(omsb, 0);
|
|
|
|
// OMSB is numbered from 1. We want to place it in the integer
|
|
// bit numbered PRECISION if possible, with a compensating change in
|
|
// the exponent.
|
|
let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt);
|
|
|
|
// Shifting left is easy as we don't lose precision.
|
|
if final_exp < exp {
|
|
assert_eq!(loss, Loss::ExactlyZero);
|
|
|
|
let exp_change = (exp - final_exp) as usize;
|
|
sig::shift_left(sig, &mut exp, exp_change);
|
|
|
|
return Status::OK.and(exp);
|
|
}
|
|
|
|
// Shift right and capture any new lost fraction.
|
|
if final_exp > exp {
|
|
let exp_change = (final_exp - exp) as usize;
|
|
loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss);
|
|
|
|
// Keep OMSB up-to-date.
|
|
omsb = omsb.saturating_sub(exp_change);
|
|
}
|
|
|
|
assert_eq!(omsb, calc_precision);
|
|
|
|
// Now round the number according to round given the lost
|
|
// fraction.
|
|
|
|
// As specified in IEEE 754, since we do not trap we do not report
|
|
// underflow for exact results.
|
|
if loss == Loss::ExactlyZero {
|
|
return Status::OK.and(exp);
|
|
}
|
|
|
|
// Increment the significand if we're rounding away from zero.
|
|
if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) {
|
|
// We should never overflow.
|
|
assert_eq!(sig::increment(sig), 0);
|
|
omsb = sig::omsb(sig);
|
|
|
|
// Did the significand increment overflow?
|
|
if omsb == calc_precision + 1 {
|
|
let _: Loss = sig::shift_right(sig, &mut exp, 1);
|
|
|
|
return Status::INEXACT.and(exp);
|
|
}
|
|
}
|
|
|
|
// The normal case - we were and are not denormal, and any
|
|
// significand increment above didn't overflow.
|
|
Status::INEXACT.and(exp)
|
|
};
|
|
|
|
let status;
|
|
let mut exp = unpack!(status=,
|
|
calc_normal_from_limbs(&mut sig_calc, &dec_sig));
|
|
let pow5_status;
|
|
let pow5_exp = unpack!(pow5_status=,
|
|
calc_normal_from_limbs(&mut pow5_calc, &pow5_full));
|
|
|
|
// Add dec_exp, as 10^n = 5^n * 2^n.
|
|
exp += dec_exp as ExpInt;
|
|
|
|
let mut used_bits = S::PRECISION;
|
|
let mut truncated_bits = calc_precision - used_bits;
|
|
|
|
let half_ulp_err1 = (status != Status::OK) as Limb;
|
|
let (calc_loss, half_ulp_err2);
|
|
if dec_exp >= 0 {
|
|
exp += pow5_exp;
|
|
|
|
sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0);
|
|
calc_loss = sig::mul(
|
|
&mut sig_scratch_calc,
|
|
&mut exp,
|
|
&sig_calc,
|
|
&pow5_calc,
|
|
calc_precision,
|
|
);
|
|
mem::swap(&mut sig_calc, &mut sig_scratch_calc);
|
|
|
|
half_ulp_err2 = (pow5_status != Status::OK) as Limb;
|
|
} else {
|
|
exp -= pow5_exp;
|
|
|
|
sig_scratch_calc.resize(sig_calc.len(), 0);
|
|
calc_loss = sig::div(
|
|
&mut sig_scratch_calc,
|
|
&mut exp,
|
|
&mut sig_calc,
|
|
&mut pow5_calc,
|
|
calc_precision,
|
|
);
|
|
mem::swap(&mut sig_calc, &mut sig_scratch_calc);
|
|
|
|
// Denormal numbers have less precision.
|
|
if exp < S::MIN_EXP {
|
|
truncated_bits += (S::MIN_EXP - exp) as usize;
|
|
used_bits = calc_precision.saturating_sub(truncated_bits);
|
|
}
|
|
// Extra half-ulp lost in reciprocal of exponent.
|
|
half_ulp_err2 =
|
|
2 * (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb;
|
|
}
|
|
|
|
// Both sig::mul and sig::div return the
|
|
// result with the integer bit set.
|
|
assert!(sig::get_bit(&sig_calc, calc_precision - 1));
|
|
|
|
// The error from the true value, in half-ulps, on multiplying two
|
|
// floating point numbers, which differ from the value they
|
|
// approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less
|
|
// than the returned value.
|
|
//
|
|
// See "How to Read Floating Point Numbers Accurately" by William D Clinger.
|
|
assert!(half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8));
|
|
|
|
let inexact = (calc_loss != Loss::ExactlyZero) as Limb;
|
|
let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 {
|
|
inexact * 2 // <= inexact half-ulps.
|
|
} else {
|
|
inexact + 2 * (half_ulp_err1 + half_ulp_err2)
|
|
};
|
|
|
|
let ulps_from_boundary = {
|
|
let bits = calc_precision - used_bits - 1;
|
|
|
|
let i = bits / LIMB_BITS;
|
|
let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS));
|
|
let boundary = match round {
|
|
Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS),
|
|
_ => 0,
|
|
};
|
|
if i == 0 {
|
|
let delta = limb.wrapping_sub(boundary);
|
|
cmp::min(delta, delta.wrapping_neg())
|
|
} else if limb == boundary {
|
|
if !sig::is_all_zeros(&sig_calc[1..i]) {
|
|
!0 // A lot.
|
|
} else {
|
|
sig_calc[0]
|
|
}
|
|
} else if limb == boundary.wrapping_sub(1) {
|
|
if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) {
|
|
!0 // A lot.
|
|
} else {
|
|
sig_calc[0].wrapping_neg()
|
|
}
|
|
} else {
|
|
!0 // A lot.
|
|
}
|
|
};
|
|
|
|
// Are we guaranteed to round correctly if we truncate?
|
|
if ulps_from_boundary.saturating_mul(2) >= half_ulp_err {
|
|
let mut r = IeeeFloat {
|
|
sig: [0],
|
|
exp,
|
|
category: Category::Normal,
|
|
sign: false,
|
|
marker: PhantomData,
|
|
};
|
|
sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits);
|
|
// If we extracted less bits above we must adjust our exponent
|
|
// to compensate for the implicit right shift.
|
|
r.exp += (S::PRECISION - used_bits) as ExpInt;
|
|
let loss = Loss::through_truncation(&sig_calc, truncated_bits);
|
|
return Ok(r.normalize(round, loss));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Loss {
|
|
/// Combine the effect of two lost fractions.
|
|
fn combine(self, less_significant: Loss) -> Loss {
|
|
let mut more_significant = self;
|
|
if less_significant != Loss::ExactlyZero {
|
|
if more_significant == Loss::ExactlyZero {
|
|
more_significant = Loss::LessThanHalf;
|
|
} else if more_significant == Loss::ExactlyHalf {
|
|
more_significant = Loss::MoreThanHalf;
|
|
}
|
|
}
|
|
|
|
more_significant
|
|
}
|
|
|
|
/// Returns the fraction lost were a bignum truncated losing the least
|
|
/// significant `bits` bits.
|
|
fn through_truncation(limbs: &[Limb], bits: usize) -> Loss {
|
|
if bits == 0 {
|
|
return Loss::ExactlyZero;
|
|
}
|
|
|
|
let half_bit = bits - 1;
|
|
let half_limb = half_bit / LIMB_BITS;
|
|
let (half_limb, rest) = if half_limb < limbs.len() {
|
|
(limbs[half_limb], &limbs[..half_limb])
|
|
} else {
|
|
(0, limbs)
|
|
};
|
|
let half = 1 << (half_bit % LIMB_BITS);
|
|
let has_half = half_limb & half != 0;
|
|
let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest);
|
|
|
|
match (has_half, has_rest) {
|
|
(false, false) => Loss::ExactlyZero,
|
|
(false, true) => Loss::LessThanHalf,
|
|
(true, false) => Loss::ExactlyHalf,
|
|
(true, true) => Loss::MoreThanHalf,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Implementation details of IeeeFloat significands, such as big integer arithmetic.
|
|
/// As a rule of thumb, no functions in this module should dynamically allocate.
|
|
mod sig {
|
|
use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS};
|
|
use core::cmp::Ordering;
|
|
use core::iter;
|
|
use core::mem;
|
|
|
|
pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool {
|
|
limbs.iter().all(|&l| l == 0)
|
|
}
|
|
|
|
/// One, not zero, based LSB. That is, returns 0 for a zeroed significand.
|
|
pub(super) fn olsb(limbs: &[Limb]) -> usize {
|
|
limbs
|
|
.iter()
|
|
.enumerate()
|
|
.find(|(_, &limb)| limb != 0)
|
|
.map_or(0, |(i, limb)| i * LIMB_BITS + limb.trailing_zeros() as usize + 1)
|
|
}
|
|
|
|
/// One, not zero, based MSB. That is, returns 0 for a zeroed significand.
|
|
pub(super) fn omsb(limbs: &[Limb]) -> usize {
|
|
limbs
|
|
.iter()
|
|
.enumerate()
|
|
.rfind(|(_, &limb)| limb != 0)
|
|
.map_or(0, |(i, limb)| (i + 1) * LIMB_BITS - limb.leading_zeros() as usize)
|
|
}
|
|
|
|
/// Comparison (unsigned) of two significands.
|
|
pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering {
|
|
assert_eq!(a.len(), b.len());
|
|
for (a, b) in a.iter().zip(b).rev() {
|
|
match a.cmp(b) {
|
|
Ordering::Equal => {}
|
|
o => return o,
|
|
}
|
|
}
|
|
|
|
Ordering::Equal
|
|
}
|
|
|
|
/// Extracts the given bit.
|
|
pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool {
|
|
limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0
|
|
}
|
|
|
|
/// Sets the given bit.
|
|
pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) {
|
|
limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS);
|
|
}
|
|
|
|
/// Clear the given bit.
|
|
pub(super) fn clear_bit(limbs: &mut [Limb], bit: usize) {
|
|
limbs[bit / LIMB_BITS] &= !(1 << (bit % LIMB_BITS));
|
|
}
|
|
|
|
/// Shifts `dst` left `bits` bits, subtract `bits` from its exponent.
|
|
pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) {
|
|
if bits > 0 {
|
|
// Our exponent should not underflow.
|
|
*exp = exp.checked_sub(bits as ExpInt).unwrap();
|
|
|
|
// Jump is the inter-limb jump; shift is the intra-limb shift.
|
|
let jump = bits / LIMB_BITS;
|
|
let shift = bits % LIMB_BITS;
|
|
|
|
for i in (0..dst.len()).rev() {
|
|
let mut limb;
|
|
|
|
if i < jump {
|
|
limb = 0;
|
|
} else {
|
|
// dst[i] comes from the two limbs src[i - jump] and, if we have
|
|
// an intra-limb shift, src[i - jump - 1].
|
|
limb = dst[i - jump];
|
|
if shift > 0 {
|
|
limb <<= shift;
|
|
if i > jump {
|
|
limb |= dst[i - jump - 1] >> (LIMB_BITS - shift);
|
|
}
|
|
}
|
|
}
|
|
|
|
dst[i] = limb;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Shifts `dst` right `bits` bits noting lost fraction.
|
|
pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss {
|
|
let loss = Loss::through_truncation(dst, bits);
|
|
|
|
if bits > 0 {
|
|
// Our exponent should not overflow.
|
|
*exp = exp.checked_add(bits as ExpInt).unwrap();
|
|
|
|
// Jump is the inter-limb jump; shift is the intra-limb shift.
|
|
let jump = bits / LIMB_BITS;
|
|
let shift = bits % LIMB_BITS;
|
|
|
|
// Perform the shift. This leaves the most significant `bits` bits
|
|
// of the result at zero.
|
|
for i in 0..dst.len() {
|
|
let mut limb;
|
|
|
|
if i + jump >= dst.len() {
|
|
limb = 0;
|
|
} else {
|
|
limb = dst[i + jump];
|
|
if shift > 0 {
|
|
limb >>= shift;
|
|
if i + jump + 1 < dst.len() {
|
|
limb |= dst[i + jump + 1] << (LIMB_BITS - shift);
|
|
}
|
|
}
|
|
}
|
|
|
|
dst[i] = limb;
|
|
}
|
|
}
|
|
|
|
loss
|
|
}
|
|
|
|
/// Copies the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB,
|
|
/// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`.
|
|
/// All high bits above `src_bits` in `dst` are zero-filled.
|
|
pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) {
|
|
if src_bits == 0 {
|
|
return;
|
|
}
|
|
|
|
let dst_limbs = limbs_for_bits(src_bits);
|
|
assert!(dst_limbs <= dst.len());
|
|
|
|
let src = &src[src_lsb / LIMB_BITS..];
|
|
dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]);
|
|
|
|
let shift = src_lsb % LIMB_BITS;
|
|
let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift);
|
|
|
|
// We now have (dst_limbs * LIMB_BITS - shift) bits from `src`
|
|
// in `dst`. If this is less that src_bits, append the rest, else
|
|
// clear the high bits.
|
|
let n = dst_limbs * LIMB_BITS - shift;
|
|
if n < src_bits {
|
|
let mask = (1 << (src_bits - n)) - 1;
|
|
dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << (n % LIMB_BITS);
|
|
} else if n > src_bits && src_bits % LIMB_BITS > 0 {
|
|
dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1;
|
|
}
|
|
|
|
// Clear high limbs.
|
|
for x in &mut dst[dst_limbs..] {
|
|
*x = 0;
|
|
}
|
|
}
|
|
|
|
/// We want the most significant PRECISION bits of `src`. There may not
|
|
/// be that many; extract what we can.
|
|
pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) {
|
|
let omsb = omsb(src);
|
|
|
|
if precision <= omsb {
|
|
extract(dst, src, precision, omsb - precision);
|
|
(Loss::through_truncation(src, omsb - precision), omsb as ExpInt - 1)
|
|
} else {
|
|
extract(dst, src, omsb, 0);
|
|
(Loss::ExactlyZero, precision as ExpInt - 1)
|
|
}
|
|
}
|
|
|
|
/// For every consecutive chunk of `bits` bits from `limbs`,
|
|
/// going from most significant to the least significant bits,
|
|
/// call `f` to transform those bits and store the result back.
|
|
pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) {
|
|
assert_eq!(LIMB_BITS % bits, 0);
|
|
for limb in limbs.iter_mut().rev() {
|
|
let mut r = 0;
|
|
for i in (0..LIMB_BITS / bits).rev() {
|
|
r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits);
|
|
}
|
|
*limb = r;
|
|
}
|
|
}
|
|
|
|
/// Increment in-place, return the carry flag.
|
|
pub(super) fn increment(dst: &mut [Limb]) -> Limb {
|
|
for x in dst {
|
|
*x = x.wrapping_add(1);
|
|
if *x != 0 {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
1
|
|
}
|
|
|
|
/// Decrement in-place, return the borrow flag.
|
|
pub(super) fn decrement(dst: &mut [Limb]) -> Limb {
|
|
for x in dst {
|
|
*x = x.wrapping_sub(1);
|
|
if *x != !0 {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
1
|
|
}
|
|
|
|
/// `a += b + c` where `c` is zero or one. Returns the carry flag.
|
|
pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
|
|
assert!(c <= 1);
|
|
|
|
for (a, &b) in iter::zip(a, b) {
|
|
let (r, overflow) = a.overflowing_add(b);
|
|
let (r, overflow2) = r.overflowing_add(c);
|
|
*a = r;
|
|
c = (overflow | overflow2) as Limb;
|
|
}
|
|
|
|
c
|
|
}
|
|
|
|
/// `a -= b + c` where `c` is zero or one. Returns the borrow flag.
|
|
pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
|
|
assert!(c <= 1);
|
|
|
|
for (a, &b) in iter::zip(a, b) {
|
|
let (r, overflow) = a.overflowing_sub(b);
|
|
let (r, overflow2) = r.overflowing_sub(c);
|
|
*a = r;
|
|
c = (overflow | overflow2) as Limb;
|
|
}
|
|
|
|
c
|
|
}
|
|
|
|
/// `a += b` or `a -= b`. Does not preserve `b`.
|
|
pub(super) fn add_or_sub(
|
|
a_sig: &mut [Limb],
|
|
a_exp: &mut ExpInt,
|
|
a_sign: &mut bool,
|
|
b_sig: &mut [Limb],
|
|
b_exp: ExpInt,
|
|
b_sign: bool,
|
|
) -> Loss {
|
|
// Are we bigger exponent-wise than the RHS?
|
|
let bits = *a_exp - b_exp;
|
|
|
|
// Determine if the operation on the absolute values is effectively
|
|
// an addition or subtraction.
|
|
// Subtraction is more subtle than one might naively expect.
|
|
if *a_sign ^ b_sign {
|
|
let (reverse, loss);
|
|
|
|
if bits == 0 {
|
|
reverse = cmp(a_sig, b_sig) == Ordering::Less;
|
|
loss = Loss::ExactlyZero;
|
|
} else if bits > 0 {
|
|
loss = shift_right(b_sig, &mut 0, (bits - 1) as usize);
|
|
shift_left(a_sig, a_exp, 1);
|
|
reverse = false;
|
|
} else {
|
|
loss = shift_right(a_sig, a_exp, (-bits - 1) as usize);
|
|
shift_left(b_sig, &mut 0, 1);
|
|
reverse = true;
|
|
}
|
|
|
|
let borrow = (loss != Loss::ExactlyZero) as Limb;
|
|
if reverse {
|
|
// The code above is intended to ensure that no borrow is necessary.
|
|
assert_eq!(sub(b_sig, a_sig, borrow), 0);
|
|
a_sig.copy_from_slice(b_sig);
|
|
*a_sign = !*a_sign;
|
|
} else {
|
|
// The code above is intended to ensure that no borrow is necessary.
|
|
assert_eq!(sub(a_sig, b_sig, borrow), 0);
|
|
}
|
|
|
|
// Invert the lost fraction - it was on the RHS and subtracted.
|
|
match loss {
|
|
Loss::LessThanHalf => Loss::MoreThanHalf,
|
|
Loss::MoreThanHalf => Loss::LessThanHalf,
|
|
_ => loss,
|
|
}
|
|
} else {
|
|
let loss = if bits > 0 {
|
|
shift_right(b_sig, &mut 0, bits as usize)
|
|
} else {
|
|
shift_right(a_sig, a_exp, -bits as usize)
|
|
};
|
|
// We have a guard bit; generating a carry cannot happen.
|
|
assert_eq!(add(a_sig, b_sig, 0), 0);
|
|
loss
|
|
}
|
|
}
|
|
|
|
/// `[low, high] = a * b`.
|
|
///
|
|
/// This cannot overflow, because
|
|
///
|
|
/// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)`
|
|
///
|
|
/// which is less than n<sup>2</sup>.
|
|
pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] {
|
|
let mut wide = [0, 0];
|
|
|
|
if a == 0 || b == 0 {
|
|
return wide;
|
|
}
|
|
|
|
const HALF_BITS: usize = LIMB_BITS / 2;
|
|
|
|
let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1);
|
|
for i in 0..2 {
|
|
for j in 0..2 {
|
|
let mut x = [select(a, i) * select(b, j), 0];
|
|
shift_left(&mut x, &mut 0, (i + j) * HALF_BITS);
|
|
assert_eq!(add(&mut wide, &x, 0), 0);
|
|
}
|
|
}
|
|
|
|
wide
|
|
}
|
|
|
|
/// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction.
|
|
pub(super) fn mul<'a>(
|
|
dst: &mut [Limb],
|
|
exp: &mut ExpInt,
|
|
mut a: &'a [Limb],
|
|
mut b: &'a [Limb],
|
|
precision: usize,
|
|
) -> Loss {
|
|
// Put the narrower number on the `a` for less loops below.
|
|
if a.len() > b.len() {
|
|
mem::swap(&mut a, &mut b);
|
|
}
|
|
|
|
for x in &mut dst[..b.len()] {
|
|
*x = 0;
|
|
}
|
|
|
|
for i in 0..a.len() {
|
|
let mut carry = 0;
|
|
for j in 0..b.len() {
|
|
let [low, mut high] = widening_mul(a[i], b[j]);
|
|
|
|
// Now add carry.
|
|
let (low, overflow) = low.overflowing_add(carry);
|
|
high += overflow as Limb;
|
|
|
|
// And now `dst[i + j]`, and store the new low part there.
|
|
let (low, overflow) = low.overflowing_add(dst[i + j]);
|
|
high += overflow as Limb;
|
|
|
|
dst[i + j] = low;
|
|
carry = high;
|
|
}
|
|
dst[i + b.len()] = carry;
|
|
}
|
|
|
|
// Assume the operands involved in the multiplication are single-precision
|
|
// FP, and the two multiplicants are:
|
|
// a = a23 . a22 ... a0 * 2^e1
|
|
// b = b23 . b22 ... b0 * 2^e2
|
|
// the result of multiplication is:
|
|
// dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
|
|
// Note that there are three significant bits at the left-hand side of the
|
|
// radix point: two for the multiplication, and an overflow bit for the
|
|
// addition (that will always be zero at this point). Move the radix point
|
|
// toward left by two bits, and adjust exponent accordingly.
|
|
*exp += 2;
|
|
|
|
// Convert the result having "2 * precision" significant-bits back to the one
|
|
// having "precision" significant-bits. First, move the radix point from
|
|
// poision "2*precision - 1" to "precision - 1". The exponent need to be
|
|
// adjusted by "2*precision - 1" - "precision - 1" = "precision".
|
|
*exp -= precision as ExpInt + 1;
|
|
|
|
// In case MSB resides at the left-hand side of radix point, shift the
|
|
// mantissa right by some amount to make sure the MSB reside right before
|
|
// the radix point (i.e., "MSB . rest-significant-bits").
|
|
//
|
|
// Note that the result is not normalized when "omsb < precision". So, the
|
|
// caller needs to call IeeeFloat::normalize() if normalized value is
|
|
// expected.
|
|
let omsb = omsb(dst);
|
|
if omsb <= precision { Loss::ExactlyZero } else { shift_right(dst, exp, omsb - precision) }
|
|
}
|
|
|
|
/// `quotient = dividend / divisor`. Returns the lost fraction.
|
|
/// Does not preserve `dividend` or `divisor`.
|
|
pub(super) fn div(
|
|
quotient: &mut [Limb],
|
|
exp: &mut ExpInt,
|
|
dividend: &mut [Limb],
|
|
divisor: &mut [Limb],
|
|
precision: usize,
|
|
) -> Loss {
|
|
// Normalize the divisor.
|
|
let bits = precision - omsb(divisor);
|
|
shift_left(divisor, &mut 0, bits);
|
|
*exp += bits as ExpInt;
|
|
|
|
// Normalize the dividend.
|
|
let bits = precision - omsb(dividend);
|
|
shift_left(dividend, exp, bits);
|
|
|
|
// Division by 1.
|
|
let olsb_divisor = olsb(divisor);
|
|
if olsb_divisor == precision {
|
|
quotient.copy_from_slice(dividend);
|
|
return Loss::ExactlyZero;
|
|
}
|
|
|
|
// Ensure the dividend >= divisor initially for the loop below.
|
|
// Incidentally, this means that the division loop below is
|
|
// guaranteed to set the integer bit to one.
|
|
if cmp(dividend, divisor) == Ordering::Less {
|
|
shift_left(dividend, exp, 1);
|
|
assert_ne!(cmp(dividend, divisor), Ordering::Less)
|
|
}
|
|
|
|
// Helper for figuring out the lost fraction.
|
|
let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| match cmp(dividend, divisor) {
|
|
Ordering::Greater => Loss::MoreThanHalf,
|
|
Ordering::Equal => Loss::ExactlyHalf,
|
|
Ordering::Less => {
|
|
if is_all_zeros(dividend) {
|
|
Loss::ExactlyZero
|
|
} else {
|
|
Loss::LessThanHalf
|
|
}
|
|
}
|
|
};
|
|
|
|
// Try to perform a (much faster) short division for small divisors.
|
|
let divisor_bits = precision - (olsb_divisor - 1);
|
|
macro_rules! try_short_div {
|
|
($W:ty, $H:ty, $half:expr) => {
|
|
if divisor_bits * 2 <= $half {
|
|
// Extract the small divisor.
|
|
let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1);
|
|
let divisor = divisor[0] as $H as $W;
|
|
|
|
// Shift the dividend to produce a quotient with the unit bit set.
|
|
let top_limb = *dividend.last().unwrap();
|
|
let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H;
|
|
shift_left(dividend, &mut 0, divisor_bits - 1);
|
|
|
|
// Apply short division in place on $H (of $half bits) chunks.
|
|
each_chunk(dividend, $half, |chunk| {
|
|
let chunk = chunk as $H;
|
|
let combined = ((rem as $W) << $half) | (chunk as $W);
|
|
rem = (combined % divisor) as $H;
|
|
(combined / divisor) as $H as Limb
|
|
});
|
|
quotient.copy_from_slice(dividend);
|
|
|
|
return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]);
|
|
}
|
|
};
|
|
}
|
|
|
|
try_short_div!(u32, u16, 16);
|
|
try_short_div!(u64, u32, 32);
|
|
try_short_div!(u128, u64, 64);
|
|
|
|
// Zero the quotient before setting bits in it.
|
|
for x in &mut quotient[..limbs_for_bits(precision)] {
|
|
*x = 0;
|
|
}
|
|
|
|
// Long division.
|
|
for bit in (0..precision).rev() {
|
|
if cmp(dividend, divisor) != Ordering::Less {
|
|
sub(dividend, divisor, 0);
|
|
set_bit(quotient, bit);
|
|
}
|
|
shift_left(dividend, &mut 0, 1);
|
|
}
|
|
|
|
lost_fraction(dividend, divisor)
|
|
}
|
|
}
|