fixed/
f128.rs

1// Copyright © 2018–2025 Trevor Spiteri
2
3// This library is free software: you can redistribute it and/or
4// modify it under the terms of either
5//
6//   * the Apache License, Version 2.0 or
7//   * the MIT License
8//
9// at your option.
10//
11// You should have recieved copies of the Apache License and the MIT
12// License along with the library. If not, see
13// <https://www.apache.org/licenses/LICENSE-2.0> and
14// <https://opensource.org/licenses/MIT>.
15
16//! Constants specific to the [`F128`] quadruple-precision floating-point type.
17//!
18//! Mathematically significant numbers are provided in the [`consts`] sub-module.
19//!
20//! For constants related to the floating-point representation itself, see the
21//! associated constants defined directly on the [`F128`] type.
22//!
23//! # Planned deprecation
24//!
25//! This module will be deprecated when the [`f128` feature] is ready and
26//! stabilized.
27//!
28//! [`f128` feature]: https://github.com/rust-lang/rust/issues/116909
29
30use crate::F128;
31use core::cmp::Ordering;
32use core::hash::{Hash, Hasher};
33use core::num::FpCategory;
34use core::ops::Neg;
35use half::{bf16 as half_bf16, f16 as half_f16};
36
37const PREC: u32 = 113;
38const EXP_BITS: u32 = u128::BITS - PREC;
39const EXP_BIAS: u32 = (1 << (EXP_BITS - 1)) - 1;
40const SIGN_MASK: u128 = 1 << (u128::BITS - 1);
41const EXP_MASK: u128 = ((1 << EXP_BITS) - 1) << (PREC - 1);
42const MANT_MASK: u128 = (1 << (PREC - 1)) - 1;
43
44pub(crate) mod private {
45    /// A *binary128* floating-point number (`f128`).
46    ///
47    /// This type can be used to
48    ///
49    ///   * convert between fixed-point numbers and the bit representation of
50    ///     128-bit floating-point numbers.
51    ///   * compare fixed-point numbers and the bit representation of 128-bit
52    ///     floating-point numbers.
53    ///
54    /// This type does *not* support arithmetic or general analytic functions.
55    ///
56    /// Please see [<i>Quadruple-precision floating-point format</i> on
57    /// Wikipedia][quad] for more information on *binary128*.
58    ///
59    /// *See also the <code>[fixed]::[f128]::[consts]</code> module.*
60    ///
61    /// # Planned deprecation
62    ///
63    /// This struct will be deprecated when the [`f128` feature] is ready and
64    /// stabilized.
65    ///
66    /// # Examples
67    ///
68    /// ```rust
69    /// use fixed::types::I16F16;
70    /// use fixed::F128;
71    /// assert_eq!(I16F16::ONE.to_num::<F128>(), F128::ONE);
72    /// assert_eq!(I16F16::from_num(F128::ONE), I16F16::ONE);
73    ///
74    /// // fixed-point numbers can be compared directly to F128 values
75    /// assert!(I16F16::from_num(1.5) > F128::ONE);
76    /// assert!(I16F16::from_num(0.5) < F128::ONE);
77    /// ```
78    ///
79    /// [`f128` feature]: https://github.com/rust-lang/rust/issues/116909
80    /// [consts]: crate::f128::consts
81    /// [f128]: crate::f128
82    /// [fixed]: crate
83    /// [quad]: https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format
84    #[derive(Clone, Copy, Default, Debug)]
85    pub struct F128 {
86        pub(crate) bits: u128,
87    }
88}
89
90impl F128 {
91    /// Zero.
92    pub const ZERO: F128 = F128::from_bits(0);
93    /// Negative zero (&minus;0).
94    pub const NEG_ZERO: F128 = F128::from_bits(SIGN_MASK);
95    /// One.
96    pub const ONE: F128 = F128::from_bits((EXP_BIAS as u128) << (PREC - 1));
97    /// Negative one (&minus;1).
98    pub const NEG_ONE: F128 = F128::from_bits(SIGN_MASK | F128::ONE.to_bits());
99
100    /// Smallest positive subnormal number.
101    ///
102    /// Equal to 2<sup>[`MIN_EXP`]&nbsp;&minus;&nbsp;[`MANTISSA_DIGITS`]</sup>.
103    ///
104    /// [`MANTISSA_DIGITS`]: Self::MANTISSA_DIGITS
105    /// [`MIN_EXP`]: Self::MIN_EXP
106    pub const MIN_POSITIVE_SUB: F128 = F128::from_bits(1);
107
108    /// Smallest positive normal number.
109    ///
110    /// Equal to 2<sup>[`MIN_EXP`]&nbsp;&minus;&nbsp;1</sup>.
111    ///
112    /// [`MIN_EXP`]: Self::MIN_EXP
113    pub const MIN_POSITIVE: F128 = F128::from_bits(MANT_MASK + 1);
114
115    /// Largest finite number.
116    ///
117    /// Equal to
118    /// (1&nbsp;&minus;&nbsp;2<sup>&minus;[`MANTISSA_DIGITS`]</sup>)&nbsp;2<sup>[`MAX_EXP`]</sup>.
119    ///
120    /// [`MANTISSA_DIGITS`]: Self::MANTISSA_DIGITS
121    /// [`MAX_EXP`]: Self::MAX_EXP
122    pub const MAX: F128 = F128::from_bits(EXP_MASK - 1);
123
124    /// Smallest finite number (&minus;[`MAX`]).
125    ///
126    /// [`MAX`]: Self::MAX
127    pub const MIN: F128 = F128::from_bits(SIGN_MASK | F128::MAX.to_bits());
128
129    /// Infinity (∞).
130    pub const INFINITY: F128 = F128::from_bits(EXP_MASK);
131
132    /// Negative infinity (&minus;∞).
133    pub const NEG_INFINITY: F128 = F128::from_bits(SIGN_MASK | EXP_MASK);
134
135    /// NaN.
136    pub const NAN: F128 = F128::from_bits(EXP_MASK | (1u128 << (PREC - 2)));
137
138    /// The radix or base of the internal representation (2).
139    pub const RADIX: u32 = 2;
140
141    /// Number of significant digits in base 2.
142    pub const MANTISSA_DIGITS: u32 = PREC;
143
144    /// Maximum <i>x</i> such that any decimal number with <i>x</i> significant
145    /// digits can be converted to [`F128`] and back without loss.
146    ///
147    /// Equal to
148    /// floor(log<sub>10</sub>&nbsp;2<sup>[`MANTISSA_DIGITS`]&nbsp;&minus;&nbsp;1</sup>).
149    ///
150    /// [`MANTISSA_DIGITS`]: Self::MANTISSA_DIGITS
151    pub const DIGITS: u32 = 33;
152
153    /// The difference between 1 and the next larger representable number.
154    ///
155    /// Equal to 2<sup>1&nbsp;&minus;&nbsp;[`MANTISSA_DIGITS`]</sup>.
156    ///
157    /// [`MANTISSA_DIGITS`]: Self::MANTISSA_DIGITS
158    pub const EPSILON: F128 = F128::from_bits(((EXP_BIAS - (PREC - 1)) as u128) << (PREC - 1));
159
160    /// If <i>x</i>&nbsp;=&nbsp;`MIN_EXP`, then normal numbers
161    /// ≥&nbsp;0.5&nbsp;×&nbsp;2<sup><i>x</i></sup>.
162    pub const MIN_EXP: i32 = 3 - F128::MAX_EXP;
163
164    /// If <i>x</i>&nbsp;=&nbsp;`MAX_EXP`, then normal numbers
165    /// <&nbsp;1&nbsp;×&nbsp;2<sup><i>x</i></sup>.
166    pub const MAX_EXP: i32 = EXP_BIAS as i32 + 1;
167
168    /// Minimum <i>x</i> for which 10<sup><i>x</i></sup> is in the normal range
169    /// of [`F128`].
170    ///
171    /// Equal to ceil(log<sub>10</sub>&nbsp;[`MIN_POSITIVE`]).
172    ///
173    /// [`MIN_POSITIVE`]: Self::MIN_POSITIVE
174    pub const MIN_10_EXP: i32 = -4931;
175
176    /// Maximum <i>x</i> for which 10<sup><i>x</i></sup> is in the normal range
177    /// of [`F128`].
178    ///
179    /// Equal to floor(log<sub>10</sub>&nbsp;[`MAX`]).
180    ///
181    /// [`MAX`]: Self::MAX
182    pub const MAX_10_EXP: i32 = 4932;
183
184    /// Raw transmutation from [`u128`].
185    ///
186    /// # Examples
187    ///
188    /// ```rust
189    /// use fixed::F128;
190    /// let infinity_bits = 0x7FFF_u128 << 112;
191    /// assert!(F128::from_bits(infinity_bits - 1).is_finite());
192    /// assert!(!F128::from_bits(infinity_bits).is_finite());
193    /// ```
194    #[inline]
195    #[must_use]
196    pub const fn from_bits(bits: u128) -> F128 {
197        F128 { bits }
198    }
199
200    /// Raw transmutation to [`u128`].
201    ///
202    /// # Examples
203    ///
204    /// ```rust
205    /// use fixed::F128;
206    /// assert_eq!(F128::ONE.to_bits(), 0x3FFF_u128 << 112);
207    /// assert_ne!(F128::ONE.to_bits(), 1u128);
208    /// ```
209    #[inline]
210    #[must_use]
211    pub const fn to_bits(self) -> u128 {
212        self.bits
213    }
214
215    /// Creates a number from a byte array in big-endian byte order.
216    #[inline]
217    #[must_use]
218    pub const fn from_be_bytes(bytes: [u8; 16]) -> F128 {
219        F128::from_bits(u128::from_be_bytes(bytes))
220    }
221
222    /// Creates a number from a byte array in little-endian byte order.
223    #[inline]
224    #[must_use]
225    pub const fn from_le_bytes(bytes: [u8; 16]) -> F128 {
226        F128::from_bits(u128::from_le_bytes(bytes))
227    }
228
229    /// Creates a number from a byte array in native-endian byte order.
230    #[inline]
231    #[must_use]
232    pub const fn from_ne_bytes(bytes: [u8; 16]) -> F128 {
233        F128::from_bits(u128::from_ne_bytes(bytes))
234    }
235
236    /// Returns the memory representation of the number as a byte array in
237    /// big-endian byte order.
238    #[inline]
239    #[must_use]
240    pub const fn to_be_bytes(self) -> [u8; 16] {
241        self.to_bits().to_be_bytes()
242    }
243
244    /// Returns the memory representation of the number as a byte array in
245    /// little-endian byte order.
246    #[inline]
247    #[must_use]
248    pub const fn to_le_bytes(self) -> [u8; 16] {
249        self.to_bits().to_le_bytes()
250    }
251
252    /// Returns the memory representation of the number as a byte array in
253    /// native-endian byte order.
254    #[inline]
255    #[must_use]
256    pub const fn to_ne_bytes(self) -> [u8; 16] {
257        self.to_bits().to_ne_bytes()
258    }
259
260    /// Returns [`true`] if the number is NaN.
261    ///
262    /// # Example
263    ///
264    /// ```rust
265    /// use fixed::F128;
266    ///
267    /// assert!(F128::NAN.is_nan());
268    ///
269    /// assert!(!F128::ONE.is_nan());
270    /// assert!(!F128::INFINITY.is_nan());
271    /// assert!(!F128::NEG_INFINITY.is_nan());
272    /// ```
273    #[inline]
274    #[must_use]
275    pub const fn is_nan(self) -> bool {
276        (self.to_bits() & !SIGN_MASK) > EXP_MASK
277    }
278
279    /// Returns [`true`] if the number is infinite.
280    ///
281    /// # Example
282    ///
283    /// ```rust
284    /// use fixed::F128;
285    ///
286    /// assert!(F128::INFINITY.is_infinite());
287    /// assert!(F128::NEG_INFINITY.is_infinite());
288    ///
289    /// assert!(!F128::ONE.is_infinite());
290    /// assert!(!F128::NAN.is_infinite());
291    /// ```
292    #[inline]
293    #[must_use]
294    pub const fn is_infinite(self) -> bool {
295        (self.to_bits() & !SIGN_MASK) == EXP_MASK
296    }
297
298    /// Returns [`true`] if the number is neither infinite nor NaN.
299    ///
300    /// # Example
301    ///
302    /// ```rust
303    /// use fixed::F128;
304    ///
305    /// assert!(F128::ONE.is_finite());
306    /// assert!(F128::MAX.is_finite());
307    ///
308    /// assert!(!F128::INFINITY.is_finite());
309    /// assert!(!F128::NEG_INFINITY.is_finite());
310    /// assert!(!F128::NAN.is_finite());
311    /// ```
312    #[inline]
313    #[must_use]
314    pub const fn is_finite(self) -> bool {
315        (self.to_bits() & EXP_MASK) != EXP_MASK
316    }
317
318    /// Returns [`true`] if the number is zero.
319    ///
320    /// # Example
321    ///
322    /// ```rust
323    /// use fixed::F128;
324    ///
325    /// assert!(F128::ZERO.is_zero());
326    /// assert!(F128::NEG_ZERO.is_zero());
327    ///
328    /// assert!(!F128::MIN_POSITIVE_SUB.is_zero());
329    /// assert!(!F128::NAN.is_zero());
330    /// ```
331    #[inline]
332    #[must_use]
333    pub const fn is_zero(self) -> bool {
334        (self.to_bits() & !SIGN_MASK) == 0
335    }
336
337    /// Returns [`true`] if the number is subnormal.
338    ///
339    /// # Example
340    ///
341    /// ```rust
342    /// use fixed::F128;
343    ///
344    /// assert!(F128::MIN_POSITIVE_SUB.is_subnormal());
345    ///
346    /// assert!(!F128::ZERO.is_subnormal());
347    /// assert!(!F128::MIN_POSITIVE.is_subnormal());
348    /// ```
349    #[inline]
350    #[must_use]
351    pub const fn is_subnormal(self) -> bool {
352        let abs = self.to_bits() & !SIGN_MASK;
353        0 < abs && abs < F128::MIN_POSITIVE.to_bits()
354    }
355
356    /// Returns [`true`] if the number is neither zero, infinite, subnormal, or NaN.
357    ///
358    /// # Example
359    ///
360    /// ```rust
361    /// use fixed::F128;
362    ///
363    /// assert!(F128::MIN.is_normal());
364    /// assert!(F128::MIN_POSITIVE.is_normal());
365    /// assert!(F128::MAX.is_normal());
366    ///
367    /// assert!(!F128::ZERO.is_normal());
368    /// assert!(!F128::MIN_POSITIVE_SUB.is_normal());
369    /// assert!(!F128::INFINITY.is_normal());
370    /// assert!(!F128::NAN.is_normal());
371    /// ```
372    #[inline]
373    #[must_use]
374    pub const fn is_normal(self) -> bool {
375        let abs = self.to_bits() & !SIGN_MASK;
376        F128::MIN_POSITIVE.to_bits() <= abs && abs <= F128::MAX.to_bits()
377    }
378
379    /// Returns the floating point category of the number.
380    ///
381    /// If only one property is going to be tested, it is generally faster to
382    /// use the specific predicate instead.
383    ///
384    /// # Example
385    ///
386    /// ```rust
387    /// use core::num::FpCategory;
388    /// use fixed::F128;
389    ///
390    /// assert_eq!(F128::ZERO.classify(), FpCategory::Zero);
391    /// assert_eq!(F128::MIN_POSITIVE_SUB.classify(), FpCategory::Subnormal);
392    /// assert_eq!(F128::MIN_POSITIVE.classify(), FpCategory::Normal);
393    /// assert_eq!(F128::INFINITY.classify(), FpCategory::Infinite);
394    /// assert_eq!(F128::NAN.classify(), FpCategory::Nan);
395    /// ```
396    #[inline]
397    #[must_use]
398    pub const fn classify(self) -> FpCategory {
399        let exp = self.to_bits() & EXP_MASK;
400        let mant = self.to_bits() & MANT_MASK;
401        if exp == 0 {
402            if mant == 0 {
403                FpCategory::Zero
404            } else {
405                FpCategory::Subnormal
406            }
407        } else if exp == EXP_MASK {
408            if mant == 0 {
409                FpCategory::Infinite
410            } else {
411                FpCategory::Nan
412            }
413        } else {
414            FpCategory::Normal
415        }
416    }
417
418    /// Returns the absolute value of the number.
419    ///
420    /// The only difference possible between the input value and the returned
421    /// value is in the sign bit, which is always cleared in the return value.
422    ///
423    /// # Example
424    ///
425    /// ```rust
426    /// use fixed::F128;
427    ///
428    /// // -0 == +0, but -0 bits != +0 bits
429    /// assert_eq!(F128::NEG_ZERO, F128::ZERO);
430    /// assert_ne!(F128::NEG_ZERO.to_bits(), F128::ZERO.to_bits());
431    /// assert_eq!(F128::NEG_ZERO.abs().to_bits(), F128::ZERO.to_bits());
432    ///
433    /// assert_eq!(F128::NEG_INFINITY.abs(), F128::INFINITY);
434    /// assert_eq!(F128::MIN.abs(), F128::MAX);
435    ///
436    /// assert!(F128::NAN.abs().is_nan());
437    /// ```
438    #[inline]
439    #[must_use]
440    pub const fn abs(self) -> F128 {
441        F128::from_bits(self.to_bits() & !SIGN_MASK)
442    }
443
444    /// Returns a number that represents the sign of the input value.
445    ///
446    ///   * 1 if the number is positive, +0, or +∞
447    ///   * &minus;1 if the number is negative, &minus;0, or &minus;∞
448    ///   * NaN if the number is NaN
449    ///
450    /// # Example
451    ///
452    /// ```rust
453    /// use fixed::F128;
454    ///
455    /// assert_eq!(F128::ONE.signum(), F128::ONE);
456    /// assert_eq!(F128::INFINITY.signum(), F128::ONE);
457    /// assert_eq!(F128::NEG_ZERO.signum(), F128::NEG_ONE);
458    /// assert_eq!(F128::MIN.signum(), F128::NEG_ONE);
459    ///
460    /// assert!(F128::NAN.signum().is_nan());
461    /// ```
462    #[inline]
463    #[must_use]
464    pub const fn signum(self) -> F128 {
465        if self.is_nan() {
466            self
467        } else if self.is_sign_positive() {
468            F128::ONE
469        } else {
470            F128::NEG_ONE
471        }
472    }
473
474    /// Returns a number composed of the magnitude of `self` and the sign of `sign`.
475    ///
476    /// # Example
477    ///
478    /// ```rust
479    /// use fixed::F128;
480    ///
481    /// assert_eq!(F128::ONE.copysign(F128::NEG_ZERO), F128::NEG_ONE);
482    /// assert_eq!(F128::ONE.copysign(F128::ZERO), F128::ONE);
483    /// assert_eq!(F128::NEG_ONE.copysign(F128::NEG_INFINITY), F128::NEG_ONE);
484    /// assert_eq!(F128::NEG_ONE.copysign(F128::INFINITY), F128::ONE);
485    ///
486    /// assert!(F128::NAN.copysign(F128::ONE).is_nan());
487    /// assert!(F128::NAN.copysign(F128::ONE).is_sign_positive());
488    /// assert!(F128::NAN.copysign(F128::NEG_ONE).is_sign_negative());
489    /// ```
490    #[inline]
491    #[must_use]
492    pub const fn copysign(self, sign: F128) -> F128 {
493        F128::from_bits((self.to_bits() & !SIGN_MASK) | (sign.to_bits() & SIGN_MASK))
494    }
495
496    /// Returns [`true`] if the number has a positive sign, including +0, +∞,
497    /// and NaN without a negative sign bit.
498    ///
499    /// # Example
500    ///
501    /// ```rust
502    /// use fixed::F128;
503    ///
504    /// assert!(F128::ZERO.is_sign_positive());
505    /// assert!(F128::MAX.is_sign_positive());
506    /// assert!(F128::INFINITY.is_sign_positive());
507    ///
508    /// assert!(!F128::NEG_ZERO.is_sign_positive());
509    /// assert!(!F128::MIN.is_sign_positive());
510    /// assert!(!F128::NEG_INFINITY.is_sign_positive());
511    /// ```
512    #[inline]
513    #[must_use]
514    pub const fn is_sign_positive(self) -> bool {
515        (self.to_bits() & SIGN_MASK) == 0
516    }
517
518    /// Returns [`true`] if the number has a negative sign, including &minus;0,
519    /// &minus;∞, and NaN with a negative sign bit.
520    ///
521    /// # Example
522    ///
523    /// ```rust
524    /// use fixed::F128;
525    ///
526    /// assert!(F128::NEG_ZERO.is_sign_negative());
527    /// assert!(F128::MIN.is_sign_negative());
528    /// assert!(F128::NEG_INFINITY.is_sign_negative());
529    ///
530    /// assert!(!F128::ZERO.is_sign_negative());
531    /// assert!(!F128::MAX.is_sign_negative());
532    /// assert!(!F128::INFINITY.is_sign_negative());
533    /// ```
534    #[inline]
535    #[must_use]
536    pub const fn is_sign_negative(self) -> bool {
537        (self.to_bits() & SIGN_MASK) != 0
538    }
539
540    /// Returns the maximum of two numbers, ignoring NaN.
541    ///
542    /// If one of the arguments is NaN, then the other argument is returned.
543    ///
544    /// # Example
545    ///
546    /// ```rust
547    /// use fixed::F128;
548    ///
549    /// assert_eq!(F128::ZERO.max(F128::ONE), F128::ONE);
550    /// ```
551    #[inline]
552    #[must_use]
553    pub const fn max(self, other: F128) -> F128 {
554        if self.is_nan() || matches!(partial_cmp(&self, &other), Some(Ordering::Less)) {
555            other
556        } else {
557            self
558        }
559    }
560
561    /// Returns the minimum of two numbers, ignoring NaN.
562    ///
563    /// If one of the arguments is NaN, then the other argument is returned.
564    ///
565    /// # Example
566    ///
567    /// ```rust
568    /// use fixed::F128;
569    ///
570    /// assert_eq!(F128::ZERO.min(F128::ONE), F128::ZERO);
571    /// ```
572    #[inline]
573    #[must_use]
574    pub const fn min(self, other: F128) -> F128 {
575        if self.is_nan() || matches!(partial_cmp(&self, &other), Some(Ordering::Greater)) {
576            other
577        } else {
578            self
579        }
580    }
581
582    /// Clamps the value within the specified bounds.
583    ///
584    /// Returns `min` if `self`&nbsp;<&nbsp;`min`, `max` if
585    /// `self`&nbsp;>&nbsp;`max`, or `self` otherwise.
586    ///
587    /// Note that this method returns NaN if the initial value is NaN.
588    ///
589    /// # Panics
590    ///
591    /// Panics if `min`&nbsp;>&nbsp;`max`, `min` is NaN, or `max` is NaN.
592    ///
593    /// # Examples
594    ///
595    /// ```
596    /// use fixed::F128;
597    /// assert_eq!(F128::MIN.clamp(F128::NEG_ONE, F128::ONE), F128::NEG_ONE);
598    /// assert_eq!(F128::ZERO.clamp(F128::NEG_ONE, F128::ONE), F128::ZERO);
599    /// assert_eq!(F128::MAX.clamp(F128::NEG_ONE, F128::ONE), F128::ONE);
600    /// assert!(F128::NAN.clamp(F128::NEG_ONE, F128::ONE).is_nan());
601    /// ```
602    #[inline]
603    #[track_caller]
604    #[must_use]
605    pub const fn clamp(mut self, min: F128, max: F128) -> F128 {
606        match partial_cmp(&min, &max) {
607            Some(Ordering::Less | Ordering::Equal) => {}
608            _ => panic!("need min <= max"),
609        }
610        if matches!(partial_cmp(&self, &min), Some(Ordering::Less)) {
611            self = min;
612        }
613        if matches!(partial_cmp(&self, &max), Some(Ordering::Greater)) {
614            self = max;
615        }
616        self
617    }
618
619    /// Returns the ordering between `self` and `other`.
620    ///
621    /// Unlike the [`PartialOrd`] implementation, this method always returns an
622    /// order in the following sequence:
623    ///
624    ///   * NaN with the sign bit set
625    ///   * &minus;∞
626    ///   * negative normal numbers
627    ///   * negative subnormal numbers
628    ///   * &minus;0
629    ///   * +0
630    ///   * positive subnormal numbers
631    ///   * positive normal numbers
632    ///   * +∞
633    ///   * NaN with the sign bit cleared
634    ///
635    /// # Example
636    ///
637    /// ```rust
638    /// use core::cmp::Ordering;
639    /// use fixed::F128;
640    ///
641    /// let neg_nan = F128::NAN.copysign(F128::NEG_ONE);
642    /// let pos_nan = F128::NAN.copysign(F128::ONE);
643    /// let neg_inf = F128::NEG_INFINITY;
644    /// let pos_inf = F128::INFINITY;
645    /// let neg_zero = F128::NEG_ZERO;
646    /// let pos_zero = F128::ZERO;
647    ///
648    /// assert_eq!(neg_nan.total_cmp(&neg_inf), Ordering::Less);
649    /// assert_eq!(pos_nan.total_cmp(&pos_inf), Ordering::Greater);
650    /// assert_eq!(neg_zero.total_cmp(&pos_zero), Ordering::Less);
651    /// ```
652    #[inline]
653    #[must_use]
654    pub const fn total_cmp(&self, other: &F128) -> Ordering {
655        let a = self.to_bits();
656        let b = other.to_bits();
657        match (self.is_sign_negative(), other.is_sign_negative()) {
658            (false, false) => cmp_bits(a, b),
659            (true, true) => cmp_bits(b, a),
660            (false, true) => Ordering::Greater,
661            (true, false) => Ordering::Less,
662        }
663    }
664}
665
666const fn cmp_bits(a: u128, b: u128) -> Ordering {
667    if a < b {
668        Ordering::Less
669    } else if a == b {
670        Ordering::Equal
671    } else {
672        Ordering::Greater
673    }
674}
675
676impl PartialEq for F128 {
677    #[inline]
678    fn eq(&self, other: &F128) -> bool {
679        if self.is_nan() || other.is_nan() {
680            return false;
681        }
682        let a = self.to_bits();
683        let b = other.to_bits();
684        // handle zero
685        if ((a | b) & !SIGN_MASK) == 0 {
686            return true;
687        }
688        a == b
689    }
690}
691
692impl PartialOrd for F128 {
693    #[inline]
694    fn partial_cmp(&self, other: &F128) -> Option<Ordering> {
695        partial_cmp(self, other)
696    }
697}
698
699#[inline]
700const fn partial_cmp(a: &F128, b: &F128) -> Option<Ordering> {
701    if a.is_nan() || b.is_nan() {
702        return None;
703    }
704    let a_bits = a.to_bits();
705    let b_bits = b.to_bits();
706    // handle zero
707    if ((a_bits | b_bits) & !SIGN_MASK) == 0 {
708        return Some(Ordering::Equal);
709    }
710    match (a.is_sign_negative(), b.is_sign_negative()) {
711        (false, false) => Some(cmp_bits(a_bits, b_bits)),
712        (true, true) => Some(cmp_bits(b_bits, a_bits)),
713        (false, true) => Some(Ordering::Greater),
714        (true, false) => Some(Ordering::Less),
715    }
716}
717
718impl Hash for F128 {
719    #[inline]
720    fn hash<H>(&self, state: &mut H)
721    where
722        H: Hasher,
723    {
724        let mut bits = self.to_bits();
725        if bits == F128::NEG_ZERO.to_bits() {
726            bits = 0;
727        }
728        bits.hash(state);
729    }
730}
731
732impl Neg for F128 {
733    type Output = F128;
734    #[inline]
735    fn neg(self) -> F128 {
736        F128::from_bits(self.to_bits() ^ SIGN_MASK)
737    }
738}
739
740macro_rules! from_float {
741    ($f:ident, $u:ident) => {
742        impl From<$f> for F128 {
743            fn from(src: $f) -> F128 {
744                const PREC_S: u32 = $f::MANTISSA_DIGITS;
745                const EXP_BITS_S: u32 = $u::BITS - PREC_S;
746                const EXP_BIAS_S: u32 = (1 << (EXP_BITS_S - 1)) - 1;
747                const SIGN_MASK_S: $u = 1 << ($u::BITS - 1);
748                const EXP_MASK_S: $u = ((1 << EXP_BITS_S) - 1) << (PREC_S - 1);
749                const MANT_MASK_S: $u = (1 << (PREC_S - 1)) - 1;
750
751                let b = src.to_bits();
752                let sign_bit_s = b & SIGN_MASK_S;
753                let exp_bits_s = b & EXP_MASK_S;
754                let mant_bits_s = b & MANT_MASK_S;
755                let sign_bit = u128::from(sign_bit_s) << (u128::BITS - $u::BITS);
756
757                if exp_bits_s == EXP_MASK_S {
758                    if mant_bits_s == 0 {
759                        // infinity
760                        return F128::from_bits(sign_bit | EXP_MASK);
761                    }
762                    // NaN; set most significant mantissa bit
763                    let mant_bits =
764                        (u128::from(mant_bits_s) << (PREC - PREC_S)) | (1 << (PREC - 2));
765                    return F128::from_bits(sign_bit | EXP_MASK | mant_bits);
766                }
767
768                if exp_bits_s == 0 {
769                    // subnormal
770
771                    // Example: if for f64 mantissa == 0b1011 == 11, then it has 60
772                    // leading zeros, and 64 - 60 == 4 significant bits. The value is
773                    //
774                    // 0b1011 × 2^(-1021 - 53) == 0b1.011 × 2^(-1021 - 53 + 4 - 1)
775                    //
776                    // In F128, this is normal, with
777                    //   * mantissa == (1011 << ((113 - 1) - (4 - 1))) & MANT_MASK_128
778                    //              == (1011 << (113 - 4)) & MANT_MASK_128
779                    //              == (1011 << (113 - 64 + 60)) & MANT_MASK_128
780                    //   * unbiased exp == -1021 - 53 + 4 - 1
781                    //                  == -1021 - 53 - 1 + 64 - 60
782
783                    if mant_bits_s == 0 {
784                        return F128::from_bits(sign_bit);
785                    }
786                    let lz = mant_bits_s.leading_zeros();
787                    let mant_bits = (u128::from(mant_bits_s) << (PREC - $u::BITS + lz)) & MANT_MASK;
788                    let unbiased_exp =
789                        $f::MIN_EXP - PREC_S as i32 - 1 + $u::BITS as i32 - lz as i32;
790                    let exp_bits = ((unbiased_exp + EXP_BIAS as i32) as u128) << (PREC - 1);
791                    return F128::from_bits(sign_bit | exp_bits | mant_bits);
792                }
793
794                let mant_bits = u128::from(mant_bits_s) << (PREC - PREC_S);
795                let dbias = (EXP_BIAS - EXP_BIAS_S) as u128;
796                let exp_bits = (u128::from(exp_bits_s >> (PREC_S - 1)) + dbias) << (PREC - 1);
797                F128::from_bits(sign_bit | exp_bits | mant_bits)
798            }
799        }
800    };
801}
802
803#[cfg(feature = "nightly-float")]
804impl From<f128> for F128 {
805    fn from(src: f128) -> F128 {
806        F128::from_bits(src.to_bits())
807    }
808}
809
810#[cfg(feature = "nightly-float")]
811impl From<F128> for f128 {
812    fn from(src: F128) -> f128 {
813        f128::from_bits(src.to_bits())
814    }
815}
816
817from_float! { f64, u64 }
818from_float! { f32, u32 }
819from_float! { half_f16, u16 }
820from_float! { half_bf16, u16 }
821#[cfg(feature = "nightly-float")]
822from_float! { f16, u16 }
823
824/*
825```rust
826use core::cmp::Ord;
827use rug::float::{Constant, Round};
828use rug::{Assign, Float, Integer};
829
830fn decimal_string(val: &Float, prec: i32) -> String {
831    let log10 = val.clone().log10();
832    let floor_log10 = log10.to_i32_saturating_round(Round::Down).unwrap();
833    let shift = u32::try_from(prec - 1 - floor_log10).unwrap();
834    let val = val.clone() * Integer::from(Integer::u_pow_u(10, shift));
835    let int = val.to_integer_round(Round::Down).unwrap().0;
836    let padding = "0".repeat(usize::try_from(-floor_log10.min(0)).unwrap());
837    let mut s = format!("{padding}{int}");
838    s.insert(1, '.');
839    s
840}
841
842fn hex_bits(bits: u128) -> String {
843    let mut s = format!("0x{bits:016X}");
844    for i in 0..7 {
845        s.insert(6 + 5 * i, '_');
846    }
847    s
848}
849
850fn print(doc: &str, name: &str, val: Float) {
851    println!();
852    println!("    /// {} = {}…", doc, decimal_string(&val, 6));
853    println!("    // {} = {}...", name, decimal_string(&val, 40));
854    let round = Float::with_val(113, &val);
855
856    let sign_bit = if round.is_sign_negative() {
857        1u128 << 127
858    } else {
859        0
860    };
861
862    let unbiased_exp = round.get_exp().unwrap();
863    assert!(-16_381 <= unbiased_exp && unbiased_exp <= 16_384);
864    let exp_bits = u128::from((unbiased_exp + 16_382).unsigned_abs()) << 112;
865
866    let unshifted_mant = round.get_significand().unwrap();
867    let mant = unshifted_mant.clone() >> (unshifted_mant.significant_bits() - 113);
868    let mant_128 = mant.to_u128_wrapping();
869    assert_eq!(mant_128 >> 112, 1);
870    let mant_bits = mant_128 & ((1 << 112) - 1);
871
872    println!(
873        "    pub const {name}: F128 = F128::from_bits({});",
874        hex_bits(sign_bit | exp_bits | mant_bits)
875    );
876}
877
878fn float<T>(t: T) -> Float
879where
880    Float: Assign<T>,
881{
882    Float::with_val(1000, t)
883}
884
885fn main() {
886    println!("/// Basic mathematical constants.");
887    println!("pub mod consts {{");
888    println!("    use crate::F128;");
889    print("Archimedes’ constant, π", "PI", float(Constant::Pi));
890    print("A turn, τ", "TAU", float(Constant::Pi) * 2);
891    print("π/2", "FRAC_PI_2", float(Constant::Pi) / 2);
892    print("π/3", "FRAC_PI_3", float(Constant::Pi) / 3);
893    print("π/4", "FRAC_PI_4", float(Constant::Pi) / 4);
894    print("π/6", "FRAC_PI_6", float(Constant::Pi) / 6);
895    print("π/8", "FRAC_PI_8", float(Constant::Pi) / 8);
896    print("1/π", "FRAC_1_PI", 1 / float(Constant::Pi));
897    print("2/π", "FRAC_2_PI", 2 / float(Constant::Pi));
898    print("2/√π", "FRAC_2_SQRT_PI", 2 / float(Constant::Pi).sqrt());
899    print("√2", "SQRT_2", float(2).sqrt());
900    print("1/√2", "FRAC_1_SQRT_2", float(0.5).sqrt());
901    print("Euler’s number, e", "E", float(1).exp());
902    print("log<sub>2</sub> 10", "LOG2_10", float(10).log2());
903    print("log<sub>2</sub> e", "LOG2_E", float(1).exp().log2());
904    print("log<sub>10</sub> 2", "LOG10_2", float(2).log10());
905    print("log<sub>10</sub> e", "LOG10_E", float(1).exp().log10());
906    print("ln 2", "LN_2", float(2).ln());
907    print("ln 10", "LN_10", float(10).ln());
908    println!("}}");
909}
910```
911*/
912
913/// Basic mathematical constants.
914pub mod consts {
915    use crate::F128;
916
917    /// Archimedes’ constant, π = 3.14159…
918    // PI = 3.141592653589793238462643383279502884197...
919    pub const PI: F128 = F128::from_bits(0x4000_921F_B544_42D1_8469_898C_C517_01B8);
920
921    /// A turn, τ = 6.28318…
922    // TAU = 6.283185307179586476925286766559005768394...
923    pub const TAU: F128 = F128::from_bits(0x4001_921F_B544_42D1_8469_898C_C517_01B8);
924
925    /// π/2 = 1.57079…
926    // FRAC_PI_2 = 1.570796326794896619231321691639751442098...
927    pub const FRAC_PI_2: F128 = F128::from_bits(0x3FFF_921F_B544_42D1_8469_898C_C517_01B8);
928
929    /// π/3 = 1.04719…
930    // FRAC_PI_3 = 1.047197551196597746154214461093167628065...
931    pub const FRAC_PI_3: F128 = F128::from_bits(0x3FFF_0C15_2382_D736_5846_5BB3_2E0F_567B);
932
933    /// π/4 = 0.785398…
934    // FRAC_PI_4 = 0.7853981633974483096156608458198757210492...
935    pub const FRAC_PI_4: F128 = F128::from_bits(0x3FFE_921F_B544_42D1_8469_898C_C517_01B8);
936
937    /// π/6 = 0.523598…
938    // FRAC_PI_6 = 0.5235987755982988730771072305465838140328...
939    pub const FRAC_PI_6: F128 = F128::from_bits(0x3FFE_0C15_2382_D736_5846_5BB3_2E0F_567B);
940
941    /// π/8 = 0.392699…
942    // FRAC_PI_8 = 0.3926990816987241548078304229099378605246...
943    pub const FRAC_PI_8: F128 = F128::from_bits(0x3FFD_921F_B544_42D1_8469_898C_C517_01B8);
944
945    /// 1/π = 0.318309…
946    // FRAC_1_PI = 0.3183098861837906715377675267450287240689...
947    pub const FRAC_1_PI: F128 = F128::from_bits(0x3FFD_45F3_06DC_9C88_2A53_F84E_AFA3_EA6A);
948
949    /// 2/π = 0.636619…
950    // FRAC_2_PI = 0.6366197723675813430755350534900574481378...
951    pub const FRAC_2_PI: F128 = F128::from_bits(0x3FFE_45F3_06DC_9C88_2A53_F84E_AFA3_EA6A);
952
953    /// 2/√π = 1.12837…
954    // FRAC_2_SQRT_PI = 1.128379167095512573896158903121545171688...
955    pub const FRAC_2_SQRT_PI: F128 = F128::from_bits(0x3FFF_20DD_7504_29B6_D11A_E3A9_14FE_D7FE);
956
957    /// √2 = 1.41421…
958    // SQRT_2 = 1.414213562373095048801688724209698078569...
959    pub const SQRT_2: F128 = F128::from_bits(0x3FFF_6A09_E667_F3BC_C908_B2FB_1366_EA95);
960
961    /// 1/√2 = 0.707106…
962    // FRAC_1_SQRT_2 = 0.7071067811865475244008443621048490392848...
963    pub const FRAC_1_SQRT_2: F128 = F128::from_bits(0x3FFE_6A09_E667_F3BC_C908_B2FB_1366_EA95);
964
965    /// Euler’s number, e = 2.71828…
966    // E = 2.718281828459045235360287471352662497757...
967    pub const E: F128 = F128::from_bits(0x4000_5BF0_A8B1_4576_9535_5FB8_AC40_4E7A);
968
969    /// log<sub>2</sub> 10 = 3.32192…
970    // LOG2_10 = 3.321928094887362347870319429489390175864...
971    pub const LOG2_10: F128 = F128::from_bits(0x4000_A934_F097_9A37_15FC_9257_EDFE_9B60);
972
973    /// log<sub>2</sub> e = 1.44269…
974    // LOG2_E = 1.442695040888963407359924681001892137426...
975    pub const LOG2_E: F128 = F128::from_bits(0x3FFF_7154_7652_B82F_E177_7D0F_FDA0_D23A);
976
977    /// log<sub>10</sub> 2 = 0.301029…
978    // LOG10_2 = 0.3010299956639811952137388947244930267681...
979    pub const LOG10_2: F128 = F128::from_bits(0x3FFD_3441_3509_F79F_EF31_1F12_B358_16F9);
980
981    /// log<sub>10</sub> e = 0.434294…
982    // LOG10_E = 0.4342944819032518276511289189166050822943...
983    pub const LOG10_E: F128 = F128::from_bits(0x3FFD_BCB7_B152_6E50_E32A_6AB7_555F_5A68);
984
985    /// ln 2 = 0.693147…
986    // LN_2 = 0.6931471805599453094172321214581765680755...
987    pub const LN_2: F128 = F128::from_bits(0x3FFE_62E4_2FEF_A39E_F357_93C7_6730_07E6);
988
989    /// ln 10 = 2.30258…
990    // LN_10 = 2.302585092994045684017991454684364207601...
991    pub const LN_10: F128 = F128::from_bits(0x4000_26BB_1BBB_5551_582D_D4AD_AC57_05A6);
992}
993
994#[cfg(test)]
995mod tests {
996    use crate::F128;
997    use crate::traits::FromFixed;
998    use half::{bf16 as half_bf16, f16 as half_f16};
999
1000    // Apart from F128 include f16, bf16, f32, f64 as a sanity check for the tests.
1001
1002    struct Params {
1003        mantissa_digits: u32,
1004        min_exp: i32,
1005        max_exp: i32,
1006        digits: u32,
1007        min_10_exp: i32,
1008        max_10_exp: i32,
1009    }
1010
1011    impl Params {
1012        #[track_caller]
1013        fn check(self) {
1014            let p = f64::from(self.mantissa_digits);
1015            let e_min = f64::from(self.min_exp);
1016            let e_max = f64::from(self.max_exp);
1017            assert_eq!(self.digits, ((p - 1.) * 2f64.log10()).floor() as u32);
1018            assert_eq!(self.min_10_exp, ((e_min - 1.) * 2f64.log10()).ceil() as i32);
1019            assert_eq!(
1020                self.max_10_exp,
1021                ((-(-p).exp2()).ln_1p() / 10f64.ln() + e_max * 2f64.log10()).floor() as i32
1022            );
1023        }
1024    }
1025
1026    #[test]
1027    fn decimal_constants_f16() {
1028        let params = Params {
1029            mantissa_digits: half_f16::MANTISSA_DIGITS,
1030            min_exp: half_f16::MIN_EXP,
1031            max_exp: half_f16::MAX_EXP,
1032            digits: half_f16::DIGITS,
1033            min_10_exp: half_f16::MIN_10_EXP,
1034            max_10_exp: half_f16::MAX_10_EXP,
1035        };
1036        params.check();
1037    }
1038
1039    #[test]
1040    fn decimal_constants_bf16() {
1041        let params = Params {
1042            mantissa_digits: half_bf16::MANTISSA_DIGITS,
1043            min_exp: half_bf16::MIN_EXP,
1044            max_exp: half_bf16::MAX_EXP,
1045            digits: half_bf16::DIGITS,
1046            min_10_exp: half_bf16::MIN_10_EXP,
1047            max_10_exp: half_bf16::MAX_10_EXP,
1048        };
1049        params.check();
1050    }
1051
1052    #[test]
1053    fn decimal_constants_f32() {
1054        let params = Params {
1055            mantissa_digits: f32::MANTISSA_DIGITS,
1056            min_exp: f32::MIN_EXP,
1057            max_exp: f32::MAX_EXP,
1058            digits: f32::DIGITS,
1059            min_10_exp: f32::MIN_10_EXP,
1060            max_10_exp: f32::MAX_10_EXP,
1061        };
1062        params.check();
1063    }
1064
1065    #[test]
1066    fn decimal_constants_f64() {
1067        let params = Params {
1068            mantissa_digits: f64::MANTISSA_DIGITS,
1069            min_exp: f64::MIN_EXP,
1070            max_exp: f64::MAX_EXP,
1071            digits: f64::DIGITS,
1072            min_10_exp: f64::MIN_10_EXP,
1073            max_10_exp: f64::MAX_10_EXP,
1074        };
1075        params.check();
1076    }
1077
1078    #[test]
1079    fn decimal_constants_f128() {
1080        let params = Params {
1081            mantissa_digits: F128::MANTISSA_DIGITS,
1082            min_exp: F128::MIN_EXP,
1083            max_exp: F128::MAX_EXP,
1084            digits: F128::DIGITS,
1085            min_10_exp: F128::MIN_10_EXP,
1086            max_10_exp: F128::MAX_10_EXP,
1087        };
1088        params.check();
1089    }
1090
1091    #[test]
1092    fn math_constants() {
1093        use crate::consts as fix;
1094        use crate::f128::consts as f128;
1095        assert_eq!(f128::PI, F128::from_fixed(fix::PI));
1096        assert_eq!(f128::TAU, F128::from_fixed(fix::TAU));
1097        assert_eq!(f128::FRAC_PI_2, F128::from_fixed(fix::FRAC_PI_2));
1098        assert_eq!(f128::FRAC_PI_3, F128::from_fixed(fix::FRAC_PI_3));
1099        assert_eq!(f128::FRAC_PI_4, F128::from_fixed(fix::FRAC_PI_4));
1100        assert_eq!(f128::FRAC_PI_6, F128::from_fixed(fix::FRAC_PI_6));
1101        assert_eq!(f128::FRAC_PI_8, F128::from_fixed(fix::FRAC_PI_8));
1102        assert_eq!(f128::FRAC_1_PI, F128::from_fixed(fix::FRAC_1_PI));
1103        assert_eq!(f128::FRAC_2_PI, F128::from_fixed(fix::FRAC_2_PI));
1104        assert_eq!(f128::FRAC_2_SQRT_PI, F128::from_fixed(fix::FRAC_2_SQRT_PI));
1105        assert_eq!(f128::SQRT_2, F128::from_fixed(fix::SQRT_2));
1106        assert_eq!(f128::FRAC_1_SQRT_2, F128::from_fixed(fix::FRAC_1_SQRT_2));
1107        assert_eq!(f128::E, F128::from_fixed(fix::E));
1108        assert_eq!(f128::LOG2_10, F128::from_fixed(fix::LOG2_10));
1109        assert_eq!(f128::LOG2_E, F128::from_fixed(fix::LOG2_E));
1110        assert_eq!(f128::LOG10_2, F128::from_fixed(fix::LOG10_2));
1111        assert_eq!(f128::LOG10_E, F128::from_fixed(fix::LOG10_E));
1112        assert_eq!(f128::LN_2, F128::from_fixed(fix::LN_2));
1113        assert_eq!(f128::LN_10, F128::from_fixed(fix::LN_10));
1114    }
1115
1116    #[test]
1117    fn from_f64() {
1118        // normal
1119        assert_eq!(F128::from(1f64), F128::ONE);
1120        assert_eq!(F128::from(-1f64), F128::NEG_ONE);
1121        // infinity
1122        assert_eq!(F128::from(f64::INFINITY), F128::INFINITY);
1123        assert_eq!(F128::from(f64::NEG_INFINITY), F128::NEG_INFINITY);
1124        // NaN
1125        assert!(F128::from(f64::NAN).is_nan());
1126        // zero
1127        assert_eq!(F128::from(0f64), F128::ZERO);
1128        assert_eq!(F128::from(-0f64), F128::ZERO);
1129        assert!(F128::from(0f64).is_sign_positive());
1130        assert!(F128::from(-0f64).is_sign_negative());
1131
1132        // subnormal
1133        let exp_shift = F128::MANTISSA_DIGITS - 1;
1134        // minimum f64 positive subnormal = 2^(-1021 - 53)
1135        // mantissa = 0
1136        // biased exponent = 16383 - 1021 - 53
1137        let exp = (F128::MAX_EXP - 1 + f64::MIN_EXP - f64::MANTISSA_DIGITS as i32) as u128;
1138        assert_eq!(
1139            F128::from(f64::from_bits(1)),
1140            F128::from_bits(exp << exp_shift)
1141        );
1142        // minimum f64 positive subnormal * 0b1011 = 0b1.011 * 2^(-1021 - 53 + 3)
1143        // mantissa = .011 << (113 - 1) = 011 << (113 - 1 - 3)
1144        // biased exponent = 16383 - 1021 - 53 + 3
1145        let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
1146        let exp = exp + 3;
1147        assert_eq!(
1148            F128::from(f64::from_bits((1 << 63) | 11)),
1149            F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
1150        );
1151    }
1152
1153    #[test]
1154    fn from_f32() {
1155        // normal
1156        assert_eq!(F128::from(1f32), F128::ONE);
1157        assert_eq!(F128::from(-1f32), F128::NEG_ONE);
1158        // infinity
1159        assert_eq!(F128::from(f32::INFINITY), F128::INFINITY);
1160        assert_eq!(F128::from(f32::NEG_INFINITY), F128::NEG_INFINITY);
1161        // NaN
1162        assert!(F128::from(f32::NAN).is_nan());
1163        // zero
1164        assert_eq!(F128::from(0f32), F128::ZERO);
1165        assert_eq!(F128::from(-0f32), F128::ZERO);
1166        assert!(F128::from(0f32).is_sign_positive());
1167        assert!(F128::from(-0f32).is_sign_negative());
1168
1169        // subnormal
1170        let exp_shift = F128::MANTISSA_DIGITS - 1;
1171        // minimum f32 positive subnormal = 2^(-125 - 24)
1172        // mantissa = 0
1173        // biased exponent = 16383 - 125 - 24
1174        let exp = (F128::MAX_EXP - 1 + f32::MIN_EXP - f32::MANTISSA_DIGITS as i32) as u128;
1175        assert_eq!(
1176            F128::from(f32::from_bits(1)),
1177            F128::from_bits(exp << exp_shift)
1178        );
1179        // minimum f32 positive subnormal * 0b1011 = 0b1.011 * 2^(-125 - 24 + 3)
1180        // mantissa = .011 << (113 - 1) = 011 << (113 - 1 - 3)
1181        // biased exponent = 16383 - 125 - 24 + 3
1182        let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
1183        let exp = exp + 3;
1184        assert_eq!(
1185            F128::from(f32::from_bits((1 << 31) | 11)),
1186            F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
1187        );
1188    }
1189
1190    #[test]
1191    fn from_f16() {
1192        // normal
1193        assert_eq!(F128::from(half_f16::ONE), F128::ONE);
1194        assert_eq!(F128::from(half_f16::NEG_ONE), F128::NEG_ONE);
1195        // infinity
1196        assert_eq!(F128::from(half_f16::INFINITY), F128::INFINITY);
1197        assert_eq!(F128::from(half_f16::NEG_INFINITY), F128::NEG_INFINITY);
1198        // NaN
1199        assert!(F128::from(half_f16::NAN).is_nan());
1200        // zero
1201        assert_eq!(F128::from(half_f16::ZERO), F128::ZERO);
1202        assert_eq!(F128::from(half_f16::NEG_ZERO), F128::ZERO);
1203        assert!(F128::from(half_f16::ZERO).is_sign_positive());
1204        assert!(F128::from(half_f16::NEG_ZERO).is_sign_negative());
1205
1206        // subnormal
1207        let exp_shift = F128::MANTISSA_DIGITS - 1;
1208        // minimum f16 positive subnormal = 2^(-13 - 11)
1209        // mantissa = 0
1210        // biased exponent = 16383 - 13 - 11
1211        let exp =
1212            (F128::MAX_EXP - 1 + half_f16::MIN_EXP - half_f16::MANTISSA_DIGITS as i32) as u128;
1213        assert_eq!(
1214            F128::from(half_f16::from_bits(1)),
1215            F128::from_bits(exp << exp_shift)
1216        );
1217        // minimum f16 positive subnormal * 0b1011 = 0b1.011 * 2^(-13 - 11 + 3)
1218        // mantissa = .011 << (113 - 1) = 011 << (113 - 1 - 3)
1219        // biased exponent = 16383 - 13 - 11 + 3
1220        let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
1221        let exp = exp + 3;
1222        assert_eq!(
1223            F128::from(half_f16::from_bits((1 << 15) | 11)),
1224            F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
1225        );
1226    }
1227
1228    #[test]
1229    fn from_bf16() {
1230        // normal
1231        assert_eq!(F128::from(half_bf16::ONE), F128::ONE);
1232        assert_eq!(F128::from(half_bf16::NEG_ONE), F128::NEG_ONE);
1233        // infinity
1234        assert_eq!(F128::from(half_bf16::INFINITY), F128::INFINITY);
1235        assert_eq!(F128::from(half_bf16::NEG_INFINITY), F128::NEG_INFINITY);
1236        // NaN
1237        assert!(F128::from(half_bf16::NAN).is_nan());
1238        // zero
1239        assert_eq!(F128::from(half_bf16::ZERO), F128::ZERO);
1240        assert_eq!(F128::from(half_bf16::NEG_ZERO), F128::ZERO);
1241        assert!(F128::from(half_bf16::ZERO).is_sign_positive());
1242        assert!(F128::from(half_bf16::NEG_ZERO).is_sign_negative());
1243
1244        // subnormal
1245        let exp_shift = F128::MANTISSA_DIGITS - 1;
1246        // minimum half_bf16 positive subnormal = 2^(-125 - 8)
1247        // mantissa = 0
1248        // biased exponent = 16383 - 125 - 8
1249        let exp =
1250            (F128::MAX_EXP - 1 + half_bf16::MIN_EXP - half_bf16::MANTISSA_DIGITS as i32) as u128;
1251        assert_eq!(
1252            F128::from(half_bf16::from_bits(1)),
1253            F128::from_bits(exp << exp_shift)
1254        );
1255        // minimum bf16 positive subnormal * 0b1011 = 0b1.011 * 2^(-125 - 8 + 3)
1256        // mantissa = .011 << (113 - 1) = 011 << (113 - 1 - 3)
1257        // biased exponent = 16383 - 125 - 8 + 3
1258        let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
1259        let exp = exp + 3;
1260        assert_eq!(
1261            F128::from(half_bf16::from_bits((1 << 15) | 11)),
1262            F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
1263        );
1264    }
1265}
fixed/f128.rs

fixed/
f128.rs