fixed/f128.rs
1// Copyright © 2018–2025 Trevor Spiteri
2
3// This library is free software: you can redistribute it and/or
4// modify it under the terms of either
5//
6// * the Apache License, Version 2.0 or
7// * the MIT License
8//
9// at your option.
10//
11// You should have recieved copies of the Apache License and the MIT
12// License along with the library. If not, see
13// <https://www.apache.org/licenses/LICENSE-2.0> and
14// <https://opensource.org/licenses/MIT>.
15
16//! Constants specific to the [`F128`] quadruple-precision floating-point type.
17//!
18//! Mathematically significant numbers are provided in the [`consts`] sub-module.
19//!
20//! For constants related to the floating-point representation itself, see the
21//! associated constants defined directly on the [`F128`] type.
22//!
23//! # Planned deprecation
24//!
25//! This module will be deprecated when the [`f128` feature] is ready and
26//! stabilized.
27//!
28//! [`f128` feature]: https://github.com/rust-lang/rust/issues/116909
29
30use crate::F128;
31use core::cmp::Ordering;
32use core::hash::{Hash, Hasher};
33use core::num::FpCategory;
34use core::ops::Neg;
35use half::{bf16 as half_bf16, f16 as half_f16};
36
37const PREC: u32 = 113;
38const EXP_BITS: u32 = u128::BITS - PREC;
39const EXP_BIAS: u32 = (1 << (EXP_BITS - 1)) - 1;
40const SIGN_MASK: u128 = 1 << (u128::BITS - 1);
41const EXP_MASK: u128 = ((1 << EXP_BITS) - 1) << (PREC - 1);
42const MANT_MASK: u128 = (1 << (PREC - 1)) - 1;
43
44pub(crate) mod private {
45 /// A *binary128* floating-point number (`f128`).
46 ///
47 /// This type can be used to
48 ///
49 /// * convert between fixed-point numbers and the bit representation of
50 /// 128-bit floating-point numbers.
51 /// * compare fixed-point numbers and the bit representation of 128-bit
52 /// floating-point numbers.
53 ///
54 /// This type does *not* support arithmetic or general analytic functions.
55 ///
56 /// Please see [<i>Quadruple-precision floating-point format</i> on
57 /// Wikipedia][quad] for more information on *binary128*.
58 ///
59 /// *See also the <code>[fixed]::[f128]::[consts]</code> module.*
60 ///
61 /// # Planned deprecation
62 ///
63 /// This struct will be deprecated when the [`f128` feature] is ready and
64 /// stabilized.
65 ///
66 /// # Examples
67 ///
68 /// ```rust
69 /// use fixed::types::I16F16;
70 /// use fixed::F128;
71 /// assert_eq!(I16F16::ONE.to_num::<F128>(), F128::ONE);
72 /// assert_eq!(I16F16::from_num(F128::ONE), I16F16::ONE);
73 ///
74 /// // fixed-point numbers can be compared directly to F128 values
75 /// assert!(I16F16::from_num(1.5) > F128::ONE);
76 /// assert!(I16F16::from_num(0.5) < F128::ONE);
77 /// ```
78 ///
79 /// [`f128` feature]: https://github.com/rust-lang/rust/issues/116909
80 /// [consts]: crate::f128::consts
81 /// [f128]: crate::f128
82 /// [fixed]: crate
83 /// [quad]: https://en.wikipedia.org/wiki/Quadruple-precision_floating-point_format
84 #[derive(Clone, Copy, Default, Debug)]
85 pub struct F128 {
86 pub(crate) bits: u128,
87 }
88}
89
90impl F128 {
91 /// Zero.
92 pub const ZERO: F128 = F128::from_bits(0);
93 /// Negative zero (−0).
94 pub const NEG_ZERO: F128 = F128::from_bits(SIGN_MASK);
95 /// One.
96 pub const ONE: F128 = F128::from_bits((EXP_BIAS as u128) << (PREC - 1));
97 /// Negative one (−1).
98 pub const NEG_ONE: F128 = F128::from_bits(SIGN_MASK | F128::ONE.to_bits());
99
100 /// Smallest positive subnormal number.
101 ///
102 /// Equal to 2<sup>[`MIN_EXP`] − [`MANTISSA_DIGITS`]</sup>.
103 ///
104 /// [`MANTISSA_DIGITS`]: Self::MANTISSA_DIGITS
105 /// [`MIN_EXP`]: Self::MIN_EXP
106 pub const MIN_POSITIVE_SUB: F128 = F128::from_bits(1);
107
108 /// Smallest positive normal number.
109 ///
110 /// Equal to 2<sup>[`MIN_EXP`] − 1</sup>.
111 ///
112 /// [`MIN_EXP`]: Self::MIN_EXP
113 pub const MIN_POSITIVE: F128 = F128::from_bits(MANT_MASK + 1);
114
115 /// Largest finite number.
116 ///
117 /// Equal to
118 /// (1 − 2<sup>−[`MANTISSA_DIGITS`]</sup>) 2<sup>[`MAX_EXP`]</sup>.
119 ///
120 /// [`MANTISSA_DIGITS`]: Self::MANTISSA_DIGITS
121 /// [`MAX_EXP`]: Self::MAX_EXP
122 pub const MAX: F128 = F128::from_bits(EXP_MASK - 1);
123
124 /// Smallest finite number (−[`MAX`]).
125 ///
126 /// [`MAX`]: Self::MAX
127 pub const MIN: F128 = F128::from_bits(SIGN_MASK | F128::MAX.to_bits());
128
129 /// Infinity (∞).
130 pub const INFINITY: F128 = F128::from_bits(EXP_MASK);
131
132 /// Negative infinity (−∞).
133 pub const NEG_INFINITY: F128 = F128::from_bits(SIGN_MASK | EXP_MASK);
134
135 /// NaN.
136 pub const NAN: F128 = F128::from_bits(EXP_MASK | (1u128 << (PREC - 2)));
137
138 /// The radix or base of the internal representation (2).
139 pub const RADIX: u32 = 2;
140
141 /// Number of significant digits in base 2.
142 pub const MANTISSA_DIGITS: u32 = PREC;
143
144 /// Maximum <i>x</i> such that any decimal number with <i>x</i> significant
145 /// digits can be converted to [`F128`] and back without loss.
146 ///
147 /// Equal to
148 /// floor(log<sub>10</sub> 2<sup>[`MANTISSA_DIGITS`] − 1</sup>).
149 ///
150 /// [`MANTISSA_DIGITS`]: Self::MANTISSA_DIGITS
151 pub const DIGITS: u32 = 33;
152
153 /// The difference between 1 and the next larger representable number.
154 ///
155 /// Equal to 2<sup>1 − [`MANTISSA_DIGITS`]</sup>.
156 ///
157 /// [`MANTISSA_DIGITS`]: Self::MANTISSA_DIGITS
158 pub const EPSILON: F128 = F128::from_bits(((EXP_BIAS - (PREC - 1)) as u128) << (PREC - 1));
159
160 /// If <i>x</i> = `MIN_EXP`, then normal numbers
161 /// ≥ 0.5 × 2<sup><i>x</i></sup>.
162 pub const MIN_EXP: i32 = 3 - F128::MAX_EXP;
163
164 /// If <i>x</i> = `MAX_EXP`, then normal numbers
165 /// < 1 × 2<sup><i>x</i></sup>.
166 pub const MAX_EXP: i32 = EXP_BIAS as i32 + 1;
167
168 /// Minimum <i>x</i> for which 10<sup><i>x</i></sup> is in the normal range
169 /// of [`F128`].
170 ///
171 /// Equal to ceil(log<sub>10</sub> [`MIN_POSITIVE`]).
172 ///
173 /// [`MIN_POSITIVE`]: Self::MIN_POSITIVE
174 pub const MIN_10_EXP: i32 = -4931;
175
176 /// Maximum <i>x</i> for which 10<sup><i>x</i></sup> is in the normal range
177 /// of [`F128`].
178 ///
179 /// Equal to floor(log<sub>10</sub> [`MAX`]).
180 ///
181 /// [`MAX`]: Self::MAX
182 pub const MAX_10_EXP: i32 = 4932;
183
184 /// Raw transmutation from [`u128`].
185 ///
186 /// # Examples
187 ///
188 /// ```rust
189 /// use fixed::F128;
190 /// let infinity_bits = 0x7FFF_u128 << 112;
191 /// assert!(F128::from_bits(infinity_bits - 1).is_finite());
192 /// assert!(!F128::from_bits(infinity_bits).is_finite());
193 /// ```
194 #[inline]
195 #[must_use]
196 pub const fn from_bits(bits: u128) -> F128 {
197 F128 { bits }
198 }
199
200 /// Raw transmutation to [`u128`].
201 ///
202 /// # Examples
203 ///
204 /// ```rust
205 /// use fixed::F128;
206 /// assert_eq!(F128::ONE.to_bits(), 0x3FFF_u128 << 112);
207 /// assert_ne!(F128::ONE.to_bits(), 1u128);
208 /// ```
209 #[inline]
210 #[must_use]
211 pub const fn to_bits(self) -> u128 {
212 self.bits
213 }
214
215 /// Creates a number from a byte array in big-endian byte order.
216 #[inline]
217 #[must_use]
218 pub const fn from_be_bytes(bytes: [u8; 16]) -> F128 {
219 F128::from_bits(u128::from_be_bytes(bytes))
220 }
221
222 /// Creates a number from a byte array in little-endian byte order.
223 #[inline]
224 #[must_use]
225 pub const fn from_le_bytes(bytes: [u8; 16]) -> F128 {
226 F128::from_bits(u128::from_le_bytes(bytes))
227 }
228
229 /// Creates a number from a byte array in native-endian byte order.
230 #[inline]
231 #[must_use]
232 pub const fn from_ne_bytes(bytes: [u8; 16]) -> F128 {
233 F128::from_bits(u128::from_ne_bytes(bytes))
234 }
235
236 /// Returns the memory representation of the number as a byte array in
237 /// big-endian byte order.
238 #[inline]
239 #[must_use]
240 pub const fn to_be_bytes(self) -> [u8; 16] {
241 self.to_bits().to_be_bytes()
242 }
243
244 /// Returns the memory representation of the number as a byte array in
245 /// little-endian byte order.
246 #[inline]
247 #[must_use]
248 pub const fn to_le_bytes(self) -> [u8; 16] {
249 self.to_bits().to_le_bytes()
250 }
251
252 /// Returns the memory representation of the number as a byte array in
253 /// native-endian byte order.
254 #[inline]
255 #[must_use]
256 pub const fn to_ne_bytes(self) -> [u8; 16] {
257 self.to_bits().to_ne_bytes()
258 }
259
260 /// Returns [`true`] if the number is NaN.
261 ///
262 /// # Example
263 ///
264 /// ```rust
265 /// use fixed::F128;
266 ///
267 /// assert!(F128::NAN.is_nan());
268 ///
269 /// assert!(!F128::ONE.is_nan());
270 /// assert!(!F128::INFINITY.is_nan());
271 /// assert!(!F128::NEG_INFINITY.is_nan());
272 /// ```
273 #[inline]
274 #[must_use]
275 pub const fn is_nan(self) -> bool {
276 (self.to_bits() & !SIGN_MASK) > EXP_MASK
277 }
278
279 /// Returns [`true`] if the number is infinite.
280 ///
281 /// # Example
282 ///
283 /// ```rust
284 /// use fixed::F128;
285 ///
286 /// assert!(F128::INFINITY.is_infinite());
287 /// assert!(F128::NEG_INFINITY.is_infinite());
288 ///
289 /// assert!(!F128::ONE.is_infinite());
290 /// assert!(!F128::NAN.is_infinite());
291 /// ```
292 #[inline]
293 #[must_use]
294 pub const fn is_infinite(self) -> bool {
295 (self.to_bits() & !SIGN_MASK) == EXP_MASK
296 }
297
298 /// Returns [`true`] if the number is neither infinite nor NaN.
299 ///
300 /// # Example
301 ///
302 /// ```rust
303 /// use fixed::F128;
304 ///
305 /// assert!(F128::ONE.is_finite());
306 /// assert!(F128::MAX.is_finite());
307 ///
308 /// assert!(!F128::INFINITY.is_finite());
309 /// assert!(!F128::NEG_INFINITY.is_finite());
310 /// assert!(!F128::NAN.is_finite());
311 /// ```
312 #[inline]
313 #[must_use]
314 pub const fn is_finite(self) -> bool {
315 (self.to_bits() & EXP_MASK) != EXP_MASK
316 }
317
318 /// Returns [`true`] if the number is zero.
319 ///
320 /// # Example
321 ///
322 /// ```rust
323 /// use fixed::F128;
324 ///
325 /// assert!(F128::ZERO.is_zero());
326 /// assert!(F128::NEG_ZERO.is_zero());
327 ///
328 /// assert!(!F128::MIN_POSITIVE_SUB.is_zero());
329 /// assert!(!F128::NAN.is_zero());
330 /// ```
331 #[inline]
332 #[must_use]
333 pub const fn is_zero(self) -> bool {
334 (self.to_bits() & !SIGN_MASK) == 0
335 }
336
337 /// Returns [`true`] if the number is subnormal.
338 ///
339 /// # Example
340 ///
341 /// ```rust
342 /// use fixed::F128;
343 ///
344 /// assert!(F128::MIN_POSITIVE_SUB.is_subnormal());
345 ///
346 /// assert!(!F128::ZERO.is_subnormal());
347 /// assert!(!F128::MIN_POSITIVE.is_subnormal());
348 /// ```
349 #[inline]
350 #[must_use]
351 pub const fn is_subnormal(self) -> bool {
352 let abs = self.to_bits() & !SIGN_MASK;
353 0 < abs && abs < F128::MIN_POSITIVE.to_bits()
354 }
355
356 /// Returns [`true`] if the number is neither zero, infinite, subnormal, or NaN.
357 ///
358 /// # Example
359 ///
360 /// ```rust
361 /// use fixed::F128;
362 ///
363 /// assert!(F128::MIN.is_normal());
364 /// assert!(F128::MIN_POSITIVE.is_normal());
365 /// assert!(F128::MAX.is_normal());
366 ///
367 /// assert!(!F128::ZERO.is_normal());
368 /// assert!(!F128::MIN_POSITIVE_SUB.is_normal());
369 /// assert!(!F128::INFINITY.is_normal());
370 /// assert!(!F128::NAN.is_normal());
371 /// ```
372 #[inline]
373 #[must_use]
374 pub const fn is_normal(self) -> bool {
375 let abs = self.to_bits() & !SIGN_MASK;
376 F128::MIN_POSITIVE.to_bits() <= abs && abs <= F128::MAX.to_bits()
377 }
378
379 /// Returns the floating point category of the number.
380 ///
381 /// If only one property is going to be tested, it is generally faster to
382 /// use the specific predicate instead.
383 ///
384 /// # Example
385 ///
386 /// ```rust
387 /// use core::num::FpCategory;
388 /// use fixed::F128;
389 ///
390 /// assert_eq!(F128::ZERO.classify(), FpCategory::Zero);
391 /// assert_eq!(F128::MIN_POSITIVE_SUB.classify(), FpCategory::Subnormal);
392 /// assert_eq!(F128::MIN_POSITIVE.classify(), FpCategory::Normal);
393 /// assert_eq!(F128::INFINITY.classify(), FpCategory::Infinite);
394 /// assert_eq!(F128::NAN.classify(), FpCategory::Nan);
395 /// ```
396 #[inline]
397 #[must_use]
398 pub const fn classify(self) -> FpCategory {
399 let exp = self.to_bits() & EXP_MASK;
400 let mant = self.to_bits() & MANT_MASK;
401 if exp == 0 {
402 if mant == 0 {
403 FpCategory::Zero
404 } else {
405 FpCategory::Subnormal
406 }
407 } else if exp == EXP_MASK {
408 if mant == 0 {
409 FpCategory::Infinite
410 } else {
411 FpCategory::Nan
412 }
413 } else {
414 FpCategory::Normal
415 }
416 }
417
418 /// Returns the absolute value of the number.
419 ///
420 /// The only difference possible between the input value and the returned
421 /// value is in the sign bit, which is always cleared in the return value.
422 ///
423 /// # Example
424 ///
425 /// ```rust
426 /// use fixed::F128;
427 ///
428 /// // -0 == +0, but -0 bits != +0 bits
429 /// assert_eq!(F128::NEG_ZERO, F128::ZERO);
430 /// assert_ne!(F128::NEG_ZERO.to_bits(), F128::ZERO.to_bits());
431 /// assert_eq!(F128::NEG_ZERO.abs().to_bits(), F128::ZERO.to_bits());
432 ///
433 /// assert_eq!(F128::NEG_INFINITY.abs(), F128::INFINITY);
434 /// assert_eq!(F128::MIN.abs(), F128::MAX);
435 ///
436 /// assert!(F128::NAN.abs().is_nan());
437 /// ```
438 #[inline]
439 #[must_use]
440 pub const fn abs(self) -> F128 {
441 F128::from_bits(self.to_bits() & !SIGN_MASK)
442 }
443
444 /// Returns a number that represents the sign of the input value.
445 ///
446 /// * 1 if the number is positive, +0, or +∞
447 /// * −1 if the number is negative, −0, or −∞
448 /// * NaN if the number is NaN
449 ///
450 /// # Example
451 ///
452 /// ```rust
453 /// use fixed::F128;
454 ///
455 /// assert_eq!(F128::ONE.signum(), F128::ONE);
456 /// assert_eq!(F128::INFINITY.signum(), F128::ONE);
457 /// assert_eq!(F128::NEG_ZERO.signum(), F128::NEG_ONE);
458 /// assert_eq!(F128::MIN.signum(), F128::NEG_ONE);
459 ///
460 /// assert!(F128::NAN.signum().is_nan());
461 /// ```
462 #[inline]
463 #[must_use]
464 pub const fn signum(self) -> F128 {
465 if self.is_nan() {
466 self
467 } else if self.is_sign_positive() {
468 F128::ONE
469 } else {
470 F128::NEG_ONE
471 }
472 }
473
474 /// Returns a number composed of the magnitude of `self` and the sign of `sign`.
475 ///
476 /// # Example
477 ///
478 /// ```rust
479 /// use fixed::F128;
480 ///
481 /// assert_eq!(F128::ONE.copysign(F128::NEG_ZERO), F128::NEG_ONE);
482 /// assert_eq!(F128::ONE.copysign(F128::ZERO), F128::ONE);
483 /// assert_eq!(F128::NEG_ONE.copysign(F128::NEG_INFINITY), F128::NEG_ONE);
484 /// assert_eq!(F128::NEG_ONE.copysign(F128::INFINITY), F128::ONE);
485 ///
486 /// assert!(F128::NAN.copysign(F128::ONE).is_nan());
487 /// assert!(F128::NAN.copysign(F128::ONE).is_sign_positive());
488 /// assert!(F128::NAN.copysign(F128::NEG_ONE).is_sign_negative());
489 /// ```
490 #[inline]
491 #[must_use]
492 pub const fn copysign(self, sign: F128) -> F128 {
493 F128::from_bits((self.to_bits() & !SIGN_MASK) | (sign.to_bits() & SIGN_MASK))
494 }
495
496 /// Returns [`true`] if the number has a positive sign, including +0, +∞,
497 /// and NaN without a negative sign bit.
498 ///
499 /// # Example
500 ///
501 /// ```rust
502 /// use fixed::F128;
503 ///
504 /// assert!(F128::ZERO.is_sign_positive());
505 /// assert!(F128::MAX.is_sign_positive());
506 /// assert!(F128::INFINITY.is_sign_positive());
507 ///
508 /// assert!(!F128::NEG_ZERO.is_sign_positive());
509 /// assert!(!F128::MIN.is_sign_positive());
510 /// assert!(!F128::NEG_INFINITY.is_sign_positive());
511 /// ```
512 #[inline]
513 #[must_use]
514 pub const fn is_sign_positive(self) -> bool {
515 (self.to_bits() & SIGN_MASK) == 0
516 }
517
518 /// Returns [`true`] if the number has a negative sign, including −0,
519 /// −∞, and NaN with a negative sign bit.
520 ///
521 /// # Example
522 ///
523 /// ```rust
524 /// use fixed::F128;
525 ///
526 /// assert!(F128::NEG_ZERO.is_sign_negative());
527 /// assert!(F128::MIN.is_sign_negative());
528 /// assert!(F128::NEG_INFINITY.is_sign_negative());
529 ///
530 /// assert!(!F128::ZERO.is_sign_negative());
531 /// assert!(!F128::MAX.is_sign_negative());
532 /// assert!(!F128::INFINITY.is_sign_negative());
533 /// ```
534 #[inline]
535 #[must_use]
536 pub const fn is_sign_negative(self) -> bool {
537 (self.to_bits() & SIGN_MASK) != 0
538 }
539
540 /// Returns the maximum of two numbers, ignoring NaN.
541 ///
542 /// If one of the arguments is NaN, then the other argument is returned.
543 ///
544 /// # Example
545 ///
546 /// ```rust
547 /// use fixed::F128;
548 ///
549 /// assert_eq!(F128::ZERO.max(F128::ONE), F128::ONE);
550 /// ```
551 #[inline]
552 #[must_use]
553 pub const fn max(self, other: F128) -> F128 {
554 if self.is_nan() || matches!(partial_cmp(&self, &other), Some(Ordering::Less)) {
555 other
556 } else {
557 self
558 }
559 }
560
561 /// Returns the minimum of two numbers, ignoring NaN.
562 ///
563 /// If one of the arguments is NaN, then the other argument is returned.
564 ///
565 /// # Example
566 ///
567 /// ```rust
568 /// use fixed::F128;
569 ///
570 /// assert_eq!(F128::ZERO.min(F128::ONE), F128::ZERO);
571 /// ```
572 #[inline]
573 #[must_use]
574 pub const fn min(self, other: F128) -> F128 {
575 if self.is_nan() || matches!(partial_cmp(&self, &other), Some(Ordering::Greater)) {
576 other
577 } else {
578 self
579 }
580 }
581
582 /// Clamps the value within the specified bounds.
583 ///
584 /// Returns `min` if `self` < `min`, `max` if
585 /// `self` > `max`, or `self` otherwise.
586 ///
587 /// Note that this method returns NaN if the initial value is NaN.
588 ///
589 /// # Panics
590 ///
591 /// Panics if `min` > `max`, `min` is NaN, or `max` is NaN.
592 ///
593 /// # Examples
594 ///
595 /// ```
596 /// use fixed::F128;
597 /// assert_eq!(F128::MIN.clamp(F128::NEG_ONE, F128::ONE), F128::NEG_ONE);
598 /// assert_eq!(F128::ZERO.clamp(F128::NEG_ONE, F128::ONE), F128::ZERO);
599 /// assert_eq!(F128::MAX.clamp(F128::NEG_ONE, F128::ONE), F128::ONE);
600 /// assert!(F128::NAN.clamp(F128::NEG_ONE, F128::ONE).is_nan());
601 /// ```
602 #[inline]
603 #[track_caller]
604 #[must_use]
605 pub const fn clamp(mut self, min: F128, max: F128) -> F128 {
606 match partial_cmp(&min, &max) {
607 Some(Ordering::Less | Ordering::Equal) => {}
608 _ => panic!("need min <= max"),
609 }
610 if matches!(partial_cmp(&self, &min), Some(Ordering::Less)) {
611 self = min;
612 }
613 if matches!(partial_cmp(&self, &max), Some(Ordering::Greater)) {
614 self = max;
615 }
616 self
617 }
618
619 /// Returns the ordering between `self` and `other`.
620 ///
621 /// Unlike the [`PartialOrd`] implementation, this method always returns an
622 /// order in the following sequence:
623 ///
624 /// * NaN with the sign bit set
625 /// * −∞
626 /// * negative normal numbers
627 /// * negative subnormal numbers
628 /// * −0
629 /// * +0
630 /// * positive subnormal numbers
631 /// * positive normal numbers
632 /// * +∞
633 /// * NaN with the sign bit cleared
634 ///
635 /// # Example
636 ///
637 /// ```rust
638 /// use core::cmp::Ordering;
639 /// use fixed::F128;
640 ///
641 /// let neg_nan = F128::NAN.copysign(F128::NEG_ONE);
642 /// let pos_nan = F128::NAN.copysign(F128::ONE);
643 /// let neg_inf = F128::NEG_INFINITY;
644 /// let pos_inf = F128::INFINITY;
645 /// let neg_zero = F128::NEG_ZERO;
646 /// let pos_zero = F128::ZERO;
647 ///
648 /// assert_eq!(neg_nan.total_cmp(&neg_inf), Ordering::Less);
649 /// assert_eq!(pos_nan.total_cmp(&pos_inf), Ordering::Greater);
650 /// assert_eq!(neg_zero.total_cmp(&pos_zero), Ordering::Less);
651 /// ```
652 #[inline]
653 #[must_use]
654 pub const fn total_cmp(&self, other: &F128) -> Ordering {
655 let a = self.to_bits();
656 let b = other.to_bits();
657 match (self.is_sign_negative(), other.is_sign_negative()) {
658 (false, false) => cmp_bits(a, b),
659 (true, true) => cmp_bits(b, a),
660 (false, true) => Ordering::Greater,
661 (true, false) => Ordering::Less,
662 }
663 }
664}
665
666const fn cmp_bits(a: u128, b: u128) -> Ordering {
667 if a < b {
668 Ordering::Less
669 } else if a == b {
670 Ordering::Equal
671 } else {
672 Ordering::Greater
673 }
674}
675
676impl PartialEq for F128 {
677 #[inline]
678 fn eq(&self, other: &F128) -> bool {
679 if self.is_nan() || other.is_nan() {
680 return false;
681 }
682 let a = self.to_bits();
683 let b = other.to_bits();
684 // handle zero
685 if ((a | b) & !SIGN_MASK) == 0 {
686 return true;
687 }
688 a == b
689 }
690}
691
692impl PartialOrd for F128 {
693 #[inline]
694 fn partial_cmp(&self, other: &F128) -> Option<Ordering> {
695 partial_cmp(self, other)
696 }
697}
698
699#[inline]
700const fn partial_cmp(a: &F128, b: &F128) -> Option<Ordering> {
701 if a.is_nan() || b.is_nan() {
702 return None;
703 }
704 let a_bits = a.to_bits();
705 let b_bits = b.to_bits();
706 // handle zero
707 if ((a_bits | b_bits) & !SIGN_MASK) == 0 {
708 return Some(Ordering::Equal);
709 }
710 match (a.is_sign_negative(), b.is_sign_negative()) {
711 (false, false) => Some(cmp_bits(a_bits, b_bits)),
712 (true, true) => Some(cmp_bits(b_bits, a_bits)),
713 (false, true) => Some(Ordering::Greater),
714 (true, false) => Some(Ordering::Less),
715 }
716}
717
718impl Hash for F128 {
719 #[inline]
720 fn hash<H>(&self, state: &mut H)
721 where
722 H: Hasher,
723 {
724 let mut bits = self.to_bits();
725 if bits == F128::NEG_ZERO.to_bits() {
726 bits = 0;
727 }
728 bits.hash(state);
729 }
730}
731
732impl Neg for F128 {
733 type Output = F128;
734 #[inline]
735 fn neg(self) -> F128 {
736 F128::from_bits(self.to_bits() ^ SIGN_MASK)
737 }
738}
739
740macro_rules! from_float {
741 ($f:ident, $u:ident) => {
742 impl From<$f> for F128 {
743 fn from(src: $f) -> F128 {
744 const PREC_S: u32 = $f::MANTISSA_DIGITS;
745 const EXP_BITS_S: u32 = $u::BITS - PREC_S;
746 const EXP_BIAS_S: u32 = (1 << (EXP_BITS_S - 1)) - 1;
747 const SIGN_MASK_S: $u = 1 << ($u::BITS - 1);
748 const EXP_MASK_S: $u = ((1 << EXP_BITS_S) - 1) << (PREC_S - 1);
749 const MANT_MASK_S: $u = (1 << (PREC_S - 1)) - 1;
750
751 let b = src.to_bits();
752 let sign_bit_s = b & SIGN_MASK_S;
753 let exp_bits_s = b & EXP_MASK_S;
754 let mant_bits_s = b & MANT_MASK_S;
755 let sign_bit = u128::from(sign_bit_s) << (u128::BITS - $u::BITS);
756
757 if exp_bits_s == EXP_MASK_S {
758 if mant_bits_s == 0 {
759 // infinity
760 return F128::from_bits(sign_bit | EXP_MASK);
761 }
762 // NaN; set most significant mantissa bit
763 let mant_bits =
764 (u128::from(mant_bits_s) << (PREC - PREC_S)) | (1 << (PREC - 2));
765 return F128::from_bits(sign_bit | EXP_MASK | mant_bits);
766 }
767
768 if exp_bits_s == 0 {
769 // subnormal
770
771 // Example: if for f64 mantissa == 0b1011 == 11, then it has 60
772 // leading zeros, and 64 - 60 == 4 significant bits. The value is
773 //
774 // 0b1011 × 2^(-1021 - 53) == 0b1.011 × 2^(-1021 - 53 + 4 - 1)
775 //
776 // In F128, this is normal, with
777 // * mantissa == (1011 << ((113 - 1) - (4 - 1))) & MANT_MASK_128
778 // == (1011 << (113 - 4)) & MANT_MASK_128
779 // == (1011 << (113 - 64 + 60)) & MANT_MASK_128
780 // * unbiased exp == -1021 - 53 + 4 - 1
781 // == -1021 - 53 - 1 + 64 - 60
782
783 if mant_bits_s == 0 {
784 return F128::from_bits(sign_bit);
785 }
786 let lz = mant_bits_s.leading_zeros();
787 let mant_bits = (u128::from(mant_bits_s) << (PREC - $u::BITS + lz)) & MANT_MASK;
788 let unbiased_exp =
789 $f::MIN_EXP - PREC_S as i32 - 1 + $u::BITS as i32 - lz as i32;
790 let exp_bits = ((unbiased_exp + EXP_BIAS as i32) as u128) << (PREC - 1);
791 return F128::from_bits(sign_bit | exp_bits | mant_bits);
792 }
793
794 let mant_bits = u128::from(mant_bits_s) << (PREC - PREC_S);
795 let dbias = (EXP_BIAS - EXP_BIAS_S) as u128;
796 let exp_bits = (u128::from(exp_bits_s >> (PREC_S - 1)) + dbias) << (PREC - 1);
797 F128::from_bits(sign_bit | exp_bits | mant_bits)
798 }
799 }
800 };
801}
802
803#[cfg(feature = "nightly-float")]
804impl From<f128> for F128 {
805 fn from(src: f128) -> F128 {
806 F128::from_bits(src.to_bits())
807 }
808}
809
810#[cfg(feature = "nightly-float")]
811impl From<F128> for f128 {
812 fn from(src: F128) -> f128 {
813 f128::from_bits(src.to_bits())
814 }
815}
816
817from_float! { f64, u64 }
818from_float! { f32, u32 }
819from_float! { half_f16, u16 }
820from_float! { half_bf16, u16 }
821#[cfg(feature = "nightly-float")]
822from_float! { f16, u16 }
823
824/*
825```rust
826use core::cmp::Ord;
827use rug::float::{Constant, Round};
828use rug::{Assign, Float, Integer};
829
830fn decimal_string(val: &Float, prec: i32) -> String {
831 let log10 = val.clone().log10();
832 let floor_log10 = log10.to_i32_saturating_round(Round::Down).unwrap();
833 let shift = u32::try_from(prec - 1 - floor_log10).unwrap();
834 let val = val.clone() * Integer::from(Integer::u_pow_u(10, shift));
835 let int = val.to_integer_round(Round::Down).unwrap().0;
836 let padding = "0".repeat(usize::try_from(-floor_log10.min(0)).unwrap());
837 let mut s = format!("{padding}{int}");
838 s.insert(1, '.');
839 s
840}
841
842fn hex_bits(bits: u128) -> String {
843 let mut s = format!("0x{bits:016X}");
844 for i in 0..7 {
845 s.insert(6 + 5 * i, '_');
846 }
847 s
848}
849
850fn print(doc: &str, name: &str, val: Float) {
851 println!();
852 println!(" /// {} = {}…", doc, decimal_string(&val, 6));
853 println!(" // {} = {}...", name, decimal_string(&val, 40));
854 let round = Float::with_val(113, &val);
855
856 let sign_bit = if round.is_sign_negative() {
857 1u128 << 127
858 } else {
859 0
860 };
861
862 let unbiased_exp = round.get_exp().unwrap();
863 assert!(-16_381 <= unbiased_exp && unbiased_exp <= 16_384);
864 let exp_bits = u128::from((unbiased_exp + 16_382).unsigned_abs()) << 112;
865
866 let unshifted_mant = round.get_significand().unwrap();
867 let mant = unshifted_mant.clone() >> (unshifted_mant.significant_bits() - 113);
868 let mant_128 = mant.to_u128_wrapping();
869 assert_eq!(mant_128 >> 112, 1);
870 let mant_bits = mant_128 & ((1 << 112) - 1);
871
872 println!(
873 " pub const {name}: F128 = F128::from_bits({});",
874 hex_bits(sign_bit | exp_bits | mant_bits)
875 );
876}
877
878fn float<T>(t: T) -> Float
879where
880 Float: Assign<T>,
881{
882 Float::with_val(1000, t)
883}
884
885fn main() {
886 println!("/// Basic mathematical constants.");
887 println!("pub mod consts {{");
888 println!(" use crate::F128;");
889 print("Archimedes’ constant, π", "PI", float(Constant::Pi));
890 print("A turn, τ", "TAU", float(Constant::Pi) * 2);
891 print("π/2", "FRAC_PI_2", float(Constant::Pi) / 2);
892 print("π/3", "FRAC_PI_3", float(Constant::Pi) / 3);
893 print("π/4", "FRAC_PI_4", float(Constant::Pi) / 4);
894 print("π/6", "FRAC_PI_6", float(Constant::Pi) / 6);
895 print("π/8", "FRAC_PI_8", float(Constant::Pi) / 8);
896 print("1/π", "FRAC_1_PI", 1 / float(Constant::Pi));
897 print("2/π", "FRAC_2_PI", 2 / float(Constant::Pi));
898 print("2/√π", "FRAC_2_SQRT_PI", 2 / float(Constant::Pi).sqrt());
899 print("√2", "SQRT_2", float(2).sqrt());
900 print("1/√2", "FRAC_1_SQRT_2", float(0.5).sqrt());
901 print("Euler’s number, e", "E", float(1).exp());
902 print("log<sub>2</sub> 10", "LOG2_10", float(10).log2());
903 print("log<sub>2</sub> e", "LOG2_E", float(1).exp().log2());
904 print("log<sub>10</sub> 2", "LOG10_2", float(2).log10());
905 print("log<sub>10</sub> e", "LOG10_E", float(1).exp().log10());
906 print("ln 2", "LN_2", float(2).ln());
907 print("ln 10", "LN_10", float(10).ln());
908 println!("}}");
909}
910```
911*/
912
913/// Basic mathematical constants.
914pub mod consts {
915 use crate::F128;
916
917 /// Archimedes’ constant, π = 3.14159…
918 // PI = 3.141592653589793238462643383279502884197...
919 pub const PI: F128 = F128::from_bits(0x4000_921F_B544_42D1_8469_898C_C517_01B8);
920
921 /// A turn, τ = 6.28318…
922 // TAU = 6.283185307179586476925286766559005768394...
923 pub const TAU: F128 = F128::from_bits(0x4001_921F_B544_42D1_8469_898C_C517_01B8);
924
925 /// π/2 = 1.57079…
926 // FRAC_PI_2 = 1.570796326794896619231321691639751442098...
927 pub const FRAC_PI_2: F128 = F128::from_bits(0x3FFF_921F_B544_42D1_8469_898C_C517_01B8);
928
929 /// π/3 = 1.04719…
930 // FRAC_PI_3 = 1.047197551196597746154214461093167628065...
931 pub const FRAC_PI_3: F128 = F128::from_bits(0x3FFF_0C15_2382_D736_5846_5BB3_2E0F_567B);
932
933 /// π/4 = 0.785398…
934 // FRAC_PI_4 = 0.7853981633974483096156608458198757210492...
935 pub const FRAC_PI_4: F128 = F128::from_bits(0x3FFE_921F_B544_42D1_8469_898C_C517_01B8);
936
937 /// π/6 = 0.523598…
938 // FRAC_PI_6 = 0.5235987755982988730771072305465838140328...
939 pub const FRAC_PI_6: F128 = F128::from_bits(0x3FFE_0C15_2382_D736_5846_5BB3_2E0F_567B);
940
941 /// π/8 = 0.392699…
942 // FRAC_PI_8 = 0.3926990816987241548078304229099378605246...
943 pub const FRAC_PI_8: F128 = F128::from_bits(0x3FFD_921F_B544_42D1_8469_898C_C517_01B8);
944
945 /// 1/π = 0.318309…
946 // FRAC_1_PI = 0.3183098861837906715377675267450287240689...
947 pub const FRAC_1_PI: F128 = F128::from_bits(0x3FFD_45F3_06DC_9C88_2A53_F84E_AFA3_EA6A);
948
949 /// 2/π = 0.636619…
950 // FRAC_2_PI = 0.6366197723675813430755350534900574481378...
951 pub const FRAC_2_PI: F128 = F128::from_bits(0x3FFE_45F3_06DC_9C88_2A53_F84E_AFA3_EA6A);
952
953 /// 2/√π = 1.12837…
954 // FRAC_2_SQRT_PI = 1.128379167095512573896158903121545171688...
955 pub const FRAC_2_SQRT_PI: F128 = F128::from_bits(0x3FFF_20DD_7504_29B6_D11A_E3A9_14FE_D7FE);
956
957 /// √2 = 1.41421…
958 // SQRT_2 = 1.414213562373095048801688724209698078569...
959 pub const SQRT_2: F128 = F128::from_bits(0x3FFF_6A09_E667_F3BC_C908_B2FB_1366_EA95);
960
961 /// 1/√2 = 0.707106…
962 // FRAC_1_SQRT_2 = 0.7071067811865475244008443621048490392848...
963 pub const FRAC_1_SQRT_2: F128 = F128::from_bits(0x3FFE_6A09_E667_F3BC_C908_B2FB_1366_EA95);
964
965 /// Euler’s number, e = 2.71828…
966 // E = 2.718281828459045235360287471352662497757...
967 pub const E: F128 = F128::from_bits(0x4000_5BF0_A8B1_4576_9535_5FB8_AC40_4E7A);
968
969 /// log<sub>2</sub> 10 = 3.32192…
970 // LOG2_10 = 3.321928094887362347870319429489390175864...
971 pub const LOG2_10: F128 = F128::from_bits(0x4000_A934_F097_9A37_15FC_9257_EDFE_9B60);
972
973 /// log<sub>2</sub> e = 1.44269…
974 // LOG2_E = 1.442695040888963407359924681001892137426...
975 pub const LOG2_E: F128 = F128::from_bits(0x3FFF_7154_7652_B82F_E177_7D0F_FDA0_D23A);
976
977 /// log<sub>10</sub> 2 = 0.301029…
978 // LOG10_2 = 0.3010299956639811952137388947244930267681...
979 pub const LOG10_2: F128 = F128::from_bits(0x3FFD_3441_3509_F79F_EF31_1F12_B358_16F9);
980
981 /// log<sub>10</sub> e = 0.434294…
982 // LOG10_E = 0.4342944819032518276511289189166050822943...
983 pub const LOG10_E: F128 = F128::from_bits(0x3FFD_BCB7_B152_6E50_E32A_6AB7_555F_5A68);
984
985 /// ln 2 = 0.693147…
986 // LN_2 = 0.6931471805599453094172321214581765680755...
987 pub const LN_2: F128 = F128::from_bits(0x3FFE_62E4_2FEF_A39E_F357_93C7_6730_07E6);
988
989 /// ln 10 = 2.30258…
990 // LN_10 = 2.302585092994045684017991454684364207601...
991 pub const LN_10: F128 = F128::from_bits(0x4000_26BB_1BBB_5551_582D_D4AD_AC57_05A6);
992}
993
994#[cfg(test)]
995mod tests {
996 use crate::F128;
997 use crate::traits::FromFixed;
998 use half::{bf16 as half_bf16, f16 as half_f16};
999
1000 // Apart from F128 include f16, bf16, f32, f64 as a sanity check for the tests.
1001
1002 struct Params {
1003 mantissa_digits: u32,
1004 min_exp: i32,
1005 max_exp: i32,
1006 digits: u32,
1007 min_10_exp: i32,
1008 max_10_exp: i32,
1009 }
1010
1011 impl Params {
1012 #[track_caller]
1013 fn check(self) {
1014 let p = f64::from(self.mantissa_digits);
1015 let e_min = f64::from(self.min_exp);
1016 let e_max = f64::from(self.max_exp);
1017 assert_eq!(self.digits, ((p - 1.) * 2f64.log10()).floor() as u32);
1018 assert_eq!(self.min_10_exp, ((e_min - 1.) * 2f64.log10()).ceil() as i32);
1019 assert_eq!(
1020 self.max_10_exp,
1021 ((-(-p).exp2()).ln_1p() / 10f64.ln() + e_max * 2f64.log10()).floor() as i32
1022 );
1023 }
1024 }
1025
1026 #[test]
1027 fn decimal_constants_f16() {
1028 let params = Params {
1029 mantissa_digits: half_f16::MANTISSA_DIGITS,
1030 min_exp: half_f16::MIN_EXP,
1031 max_exp: half_f16::MAX_EXP,
1032 digits: half_f16::DIGITS,
1033 min_10_exp: half_f16::MIN_10_EXP,
1034 max_10_exp: half_f16::MAX_10_EXP,
1035 };
1036 params.check();
1037 }
1038
1039 #[test]
1040 fn decimal_constants_bf16() {
1041 let params = Params {
1042 mantissa_digits: half_bf16::MANTISSA_DIGITS,
1043 min_exp: half_bf16::MIN_EXP,
1044 max_exp: half_bf16::MAX_EXP,
1045 digits: half_bf16::DIGITS,
1046 min_10_exp: half_bf16::MIN_10_EXP,
1047 max_10_exp: half_bf16::MAX_10_EXP,
1048 };
1049 params.check();
1050 }
1051
1052 #[test]
1053 fn decimal_constants_f32() {
1054 let params = Params {
1055 mantissa_digits: f32::MANTISSA_DIGITS,
1056 min_exp: f32::MIN_EXP,
1057 max_exp: f32::MAX_EXP,
1058 digits: f32::DIGITS,
1059 min_10_exp: f32::MIN_10_EXP,
1060 max_10_exp: f32::MAX_10_EXP,
1061 };
1062 params.check();
1063 }
1064
1065 #[test]
1066 fn decimal_constants_f64() {
1067 let params = Params {
1068 mantissa_digits: f64::MANTISSA_DIGITS,
1069 min_exp: f64::MIN_EXP,
1070 max_exp: f64::MAX_EXP,
1071 digits: f64::DIGITS,
1072 min_10_exp: f64::MIN_10_EXP,
1073 max_10_exp: f64::MAX_10_EXP,
1074 };
1075 params.check();
1076 }
1077
1078 #[test]
1079 fn decimal_constants_f128() {
1080 let params = Params {
1081 mantissa_digits: F128::MANTISSA_DIGITS,
1082 min_exp: F128::MIN_EXP,
1083 max_exp: F128::MAX_EXP,
1084 digits: F128::DIGITS,
1085 min_10_exp: F128::MIN_10_EXP,
1086 max_10_exp: F128::MAX_10_EXP,
1087 };
1088 params.check();
1089 }
1090
1091 #[test]
1092 fn math_constants() {
1093 use crate::consts as fix;
1094 use crate::f128::consts as f128;
1095 assert_eq!(f128::PI, F128::from_fixed(fix::PI));
1096 assert_eq!(f128::TAU, F128::from_fixed(fix::TAU));
1097 assert_eq!(f128::FRAC_PI_2, F128::from_fixed(fix::FRAC_PI_2));
1098 assert_eq!(f128::FRAC_PI_3, F128::from_fixed(fix::FRAC_PI_3));
1099 assert_eq!(f128::FRAC_PI_4, F128::from_fixed(fix::FRAC_PI_4));
1100 assert_eq!(f128::FRAC_PI_6, F128::from_fixed(fix::FRAC_PI_6));
1101 assert_eq!(f128::FRAC_PI_8, F128::from_fixed(fix::FRAC_PI_8));
1102 assert_eq!(f128::FRAC_1_PI, F128::from_fixed(fix::FRAC_1_PI));
1103 assert_eq!(f128::FRAC_2_PI, F128::from_fixed(fix::FRAC_2_PI));
1104 assert_eq!(f128::FRAC_2_SQRT_PI, F128::from_fixed(fix::FRAC_2_SQRT_PI));
1105 assert_eq!(f128::SQRT_2, F128::from_fixed(fix::SQRT_2));
1106 assert_eq!(f128::FRAC_1_SQRT_2, F128::from_fixed(fix::FRAC_1_SQRT_2));
1107 assert_eq!(f128::E, F128::from_fixed(fix::E));
1108 assert_eq!(f128::LOG2_10, F128::from_fixed(fix::LOG2_10));
1109 assert_eq!(f128::LOG2_E, F128::from_fixed(fix::LOG2_E));
1110 assert_eq!(f128::LOG10_2, F128::from_fixed(fix::LOG10_2));
1111 assert_eq!(f128::LOG10_E, F128::from_fixed(fix::LOG10_E));
1112 assert_eq!(f128::LN_2, F128::from_fixed(fix::LN_2));
1113 assert_eq!(f128::LN_10, F128::from_fixed(fix::LN_10));
1114 }
1115
1116 #[test]
1117 fn from_f64() {
1118 // normal
1119 assert_eq!(F128::from(1f64), F128::ONE);
1120 assert_eq!(F128::from(-1f64), F128::NEG_ONE);
1121 // infinity
1122 assert_eq!(F128::from(f64::INFINITY), F128::INFINITY);
1123 assert_eq!(F128::from(f64::NEG_INFINITY), F128::NEG_INFINITY);
1124 // NaN
1125 assert!(F128::from(f64::NAN).is_nan());
1126 // zero
1127 assert_eq!(F128::from(0f64), F128::ZERO);
1128 assert_eq!(F128::from(-0f64), F128::ZERO);
1129 assert!(F128::from(0f64).is_sign_positive());
1130 assert!(F128::from(-0f64).is_sign_negative());
1131
1132 // subnormal
1133 let exp_shift = F128::MANTISSA_DIGITS - 1;
1134 // minimum f64 positive subnormal = 2^(-1021 - 53)
1135 // mantissa = 0
1136 // biased exponent = 16383 - 1021 - 53
1137 let exp = (F128::MAX_EXP - 1 + f64::MIN_EXP - f64::MANTISSA_DIGITS as i32) as u128;
1138 assert_eq!(
1139 F128::from(f64::from_bits(1)),
1140 F128::from_bits(exp << exp_shift)
1141 );
1142 // minimum f64 positive subnormal * 0b1011 = 0b1.011 * 2^(-1021 - 53 + 3)
1143 // mantissa = .011 << (113 - 1) = 011 << (113 - 1 - 3)
1144 // biased exponent = 16383 - 1021 - 53 + 3
1145 let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
1146 let exp = exp + 3;
1147 assert_eq!(
1148 F128::from(f64::from_bits((1 << 63) | 11)),
1149 F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
1150 );
1151 }
1152
1153 #[test]
1154 fn from_f32() {
1155 // normal
1156 assert_eq!(F128::from(1f32), F128::ONE);
1157 assert_eq!(F128::from(-1f32), F128::NEG_ONE);
1158 // infinity
1159 assert_eq!(F128::from(f32::INFINITY), F128::INFINITY);
1160 assert_eq!(F128::from(f32::NEG_INFINITY), F128::NEG_INFINITY);
1161 // NaN
1162 assert!(F128::from(f32::NAN).is_nan());
1163 // zero
1164 assert_eq!(F128::from(0f32), F128::ZERO);
1165 assert_eq!(F128::from(-0f32), F128::ZERO);
1166 assert!(F128::from(0f32).is_sign_positive());
1167 assert!(F128::from(-0f32).is_sign_negative());
1168
1169 // subnormal
1170 let exp_shift = F128::MANTISSA_DIGITS - 1;
1171 // minimum f32 positive subnormal = 2^(-125 - 24)
1172 // mantissa = 0
1173 // biased exponent = 16383 - 125 - 24
1174 let exp = (F128::MAX_EXP - 1 + f32::MIN_EXP - f32::MANTISSA_DIGITS as i32) as u128;
1175 assert_eq!(
1176 F128::from(f32::from_bits(1)),
1177 F128::from_bits(exp << exp_shift)
1178 );
1179 // minimum f32 positive subnormal * 0b1011 = 0b1.011 * 2^(-125 - 24 + 3)
1180 // mantissa = .011 << (113 - 1) = 011 << (113 - 1 - 3)
1181 // biased exponent = 16383 - 125 - 24 + 3
1182 let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
1183 let exp = exp + 3;
1184 assert_eq!(
1185 F128::from(f32::from_bits((1 << 31) | 11)),
1186 F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
1187 );
1188 }
1189
1190 #[test]
1191 fn from_f16() {
1192 // normal
1193 assert_eq!(F128::from(half_f16::ONE), F128::ONE);
1194 assert_eq!(F128::from(half_f16::NEG_ONE), F128::NEG_ONE);
1195 // infinity
1196 assert_eq!(F128::from(half_f16::INFINITY), F128::INFINITY);
1197 assert_eq!(F128::from(half_f16::NEG_INFINITY), F128::NEG_INFINITY);
1198 // NaN
1199 assert!(F128::from(half_f16::NAN).is_nan());
1200 // zero
1201 assert_eq!(F128::from(half_f16::ZERO), F128::ZERO);
1202 assert_eq!(F128::from(half_f16::NEG_ZERO), F128::ZERO);
1203 assert!(F128::from(half_f16::ZERO).is_sign_positive());
1204 assert!(F128::from(half_f16::NEG_ZERO).is_sign_negative());
1205
1206 // subnormal
1207 let exp_shift = F128::MANTISSA_DIGITS - 1;
1208 // minimum f16 positive subnormal = 2^(-13 - 11)
1209 // mantissa = 0
1210 // biased exponent = 16383 - 13 - 11
1211 let exp =
1212 (F128::MAX_EXP - 1 + half_f16::MIN_EXP - half_f16::MANTISSA_DIGITS as i32) as u128;
1213 assert_eq!(
1214 F128::from(half_f16::from_bits(1)),
1215 F128::from_bits(exp << exp_shift)
1216 );
1217 // minimum f16 positive subnormal * 0b1011 = 0b1.011 * 2^(-13 - 11 + 3)
1218 // mantissa = .011 << (113 - 1) = 011 << (113 - 1 - 3)
1219 // biased exponent = 16383 - 13 - 11 + 3
1220 let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
1221 let exp = exp + 3;
1222 assert_eq!(
1223 F128::from(half_f16::from_bits((1 << 15) | 11)),
1224 F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
1225 );
1226 }
1227
1228 #[test]
1229 fn from_bf16() {
1230 // normal
1231 assert_eq!(F128::from(half_bf16::ONE), F128::ONE);
1232 assert_eq!(F128::from(half_bf16::NEG_ONE), F128::NEG_ONE);
1233 // infinity
1234 assert_eq!(F128::from(half_bf16::INFINITY), F128::INFINITY);
1235 assert_eq!(F128::from(half_bf16::NEG_INFINITY), F128::NEG_INFINITY);
1236 // NaN
1237 assert!(F128::from(half_bf16::NAN).is_nan());
1238 // zero
1239 assert_eq!(F128::from(half_bf16::ZERO), F128::ZERO);
1240 assert_eq!(F128::from(half_bf16::NEG_ZERO), F128::ZERO);
1241 assert!(F128::from(half_bf16::ZERO).is_sign_positive());
1242 assert!(F128::from(half_bf16::NEG_ZERO).is_sign_negative());
1243
1244 // subnormal
1245 let exp_shift = F128::MANTISSA_DIGITS - 1;
1246 // minimum half_bf16 positive subnormal = 2^(-125 - 8)
1247 // mantissa = 0
1248 // biased exponent = 16383 - 125 - 8
1249 let exp =
1250 (F128::MAX_EXP - 1 + half_bf16::MIN_EXP - half_bf16::MANTISSA_DIGITS as i32) as u128;
1251 assert_eq!(
1252 F128::from(half_bf16::from_bits(1)),
1253 F128::from_bits(exp << exp_shift)
1254 );
1255 // minimum bf16 positive subnormal * 0b1011 = 0b1.011 * 2^(-125 - 8 + 3)
1256 // mantissa = .011 << (113 - 1) = 011 << (113 - 1 - 3)
1257 // biased exponent = 16383 - 125 - 8 + 3
1258 let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
1259 let exp = exp + 3;
1260 assert_eq!(
1261 F128::from(half_bf16::from_bits((1 << 15) | 11)),
1262 F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
1263 );
1264 }
1265}