pxfm/
common.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use num_traits::MulAdd;
30use std::ops::{Add, Mul};
31
32#[inline(always)]
33pub(crate) fn is_integerf(x: f32) -> bool {
34    #[cfg(any(
35        all(
36            any(target_arch = "x86", target_arch = "x86_64"),
37            target_feature = "sse4.1"
38        ),
39        target_arch = "aarch64"
40    ))]
41    {
42        x.round_ties_even() == x
43    }
44    #[cfg(not(any(
45        all(
46            any(target_arch = "x86", target_arch = "x86_64"),
47            target_feature = "sse4.1"
48        ),
49        target_arch = "aarch64"
50    )))]
51    {
52        let x_u = x.to_bits();
53        let x_e = (x_u & EXP_MASK_F32) >> 23;
54        let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
55        const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
56        const UNIT_EXPONENT: u32 = E_BIAS + 23;
57        x_e + lsb >= UNIT_EXPONENT
58    }
59}
60
61#[inline(always)]
62pub(crate) fn is_odd_integerf(x: f32) -> bool {
63    #[cfg(target_arch = "aarch64")]
64    {
65        (x as i32 & 1) != 0
66    }
67    #[cfg(not(target_arch = "aarch64"))]
68    {
69        let x_u = x.to_bits();
70        let x_e = (x_u & EXP_MASK_F32) >> 23;
71        let lsb = (x_u | EXP_MASK_F32).trailing_zeros();
72        const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
73
74        const UNIT_EXPONENT: u32 = E_BIAS + 23;
75        x_e + lsb == UNIT_EXPONENT
76    }
77}
78
79#[inline(always)]
80pub(crate) fn is_integer(n: f64) -> bool {
81    #[cfg(any(
82        all(
83            any(target_arch = "x86", target_arch = "x86_64"),
84            target_feature = "sse4.1"
85        ),
86        target_arch = "aarch64"
87    ))]
88    {
89        n == n.round_ties_even()
90    }
91    #[cfg(not(any(
92        all(
93            any(target_arch = "x86", target_arch = "x86_64"),
94            target_feature = "sse4.1"
95        ),
96        target_arch = "aarch64"
97    )))]
98    {
99        use crate::bits::EXP_MASK;
100        let x_u = n.to_bits();
101        let x_e = (x_u & EXP_MASK) >> 52;
102        let lsb = (x_u | EXP_MASK).trailing_zeros();
103        const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
104
105        const UNIT_EXPONENT: u64 = E_BIAS + 52;
106        x_e + lsb as u64 >= UNIT_EXPONENT
107    }
108}
109
110#[inline(always)]
111#[allow(unused)]
112pub(crate) fn is_odd_integer_fast(x: f64) -> bool {
113    unsafe { (x.to_int_unchecked::<i64>() & 1) != 0 }
114}
115
116#[inline(always)]
117#[allow(unused)]
118pub(crate) fn is_odd_integerf_fast(x: f32) -> bool {
119    unsafe { (x.to_int_unchecked::<i32>() & 1) != 0 }
120}
121
122#[inline(always)]
123pub(crate) fn is_odd_integer(x: f64) -> bool {
124    #[cfg(any(
125        all(
126            any(target_arch = "x86", target_arch = "x86_64"),
127            target_feature = "sse4.1"
128        ),
129        target_arch = "aarch64"
130    ))]
131    {
132        (x as i64 & 1) != 0
133    }
134    #[cfg(not(any(
135        all(
136            any(target_arch = "x86", target_arch = "x86_64"),
137            target_feature = "sse4.1"
138        ),
139        target_arch = "aarch64"
140    )))]
141    {
142        use crate::bits::EXP_MASK;
143        let x_u = x.to_bits();
144        let x_e = (x_u & EXP_MASK) >> 52;
145        let lsb = (x_u | EXP_MASK).trailing_zeros();
146        const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
147
148        const UNIT_EXPONENT: u64 = E_BIAS + 52;
149        x_e + lsb as u64 == UNIT_EXPONENT
150    }
151}
152
153#[cfg(any(
154    all(
155        any(target_arch = "x86", target_arch = "x86_64"),
156        target_feature = "fma"
157    ),
158    target_arch = "aarch64"
159))]
160#[inline(always)]
161pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
162    acc: T,
163    a: T,
164    b: T,
165) -> T {
166    MulAdd::mul_add(a, b, acc)
167}
168
169#[inline(always)]
170#[cfg(not(any(
171    all(
172        any(target_arch = "x86", target_arch = "x86_64"),
173        target_feature = "fma"
174    ),
175    target_arch = "aarch64"
176)))]
177pub(crate) fn mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
178    acc: T,
179    a: T,
180    b: T,
181) -> T {
182    acc + a * b
183}
184
185#[inline]
186pub(crate) const fn rintfk(x: f32) -> f32 {
187    (if x < 0. { x - 0.5 } else { x + 0.5 }) as i32 as f32
188}
189
190#[inline(always)]
191pub(crate) const fn fmlaf(a: f32, b: f32, c: f32) -> f32 {
192    c + a * b
193}
194
195#[inline(always)]
196pub(crate) fn f_fmlaf(a: f32, b: f32, c: f32) -> f32 {
197    #[cfg(any(
198        all(
199            any(target_arch = "x86", target_arch = "x86_64"),
200            target_feature = "fma"
201        ),
202        target_arch = "aarch64"
203    ))]
204    {
205        f32::mul_add(a, b, c)
206    }
207    #[cfg(not(any(
208        all(
209            any(target_arch = "x86", target_arch = "x86_64"),
210            target_feature = "fma"
211        ),
212        target_arch = "aarch64"
213    )))]
214    {
215        a * b + c
216    }
217}
218
219/// Optional FMA, if it is available hardware FMA will use, if not then just scalar `c + a * b`
220#[inline(always)]
221pub(crate) fn f_fmla(a: f64, b: f64, c: f64) -> f64 {
222    #[cfg(any(
223        all(
224            any(target_arch = "x86", target_arch = "x86_64"),
225            target_feature = "fma"
226        ),
227        target_arch = "aarch64"
228    ))]
229    {
230        f64::mul_add(a, b, c)
231    }
232    #[cfg(not(any(
233        all(
234            any(target_arch = "x86", target_arch = "x86_64"),
235            target_feature = "fma"
236        ),
237        target_arch = "aarch64"
238    )))]
239    {
240        a * b + c
241    }
242}
243
244#[inline(always)]
245pub(crate) const fn fmla(a: f64, b: f64, c: f64) -> f64 {
246    c + a * b
247}
248
249/// Executes mandatory FMA
250/// if not available will be simulated through Dekker and Veltkamp
251#[inline(always)]
252pub(crate) fn dd_fmla(a: f64, b: f64, c: f64) -> f64 {
253    #[cfg(any(
254        all(
255            any(target_arch = "x86", target_arch = "x86_64"),
256            target_feature = "fma"
257        ),
258        target_arch = "aarch64"
259    ))]
260    {
261        f_fmla(a, b, c)
262    }
263    #[cfg(not(any(
264        all(
265            any(target_arch = "x86", target_arch = "x86_64"),
266            target_feature = "fma"
267        ),
268        target_arch = "aarch64"
269    )))]
270    {
271        use crate::double_double::DoubleDouble;
272        DoubleDouble::dd_f64_mul_add(a, b, c)
273    }
274}
275
276// Executes mandatory FMA
277// if not available will be simulated through dyadic float 128
278#[inline(always)]
279pub(crate) fn dyad_fmla(a: f64, b: f64, c: f64) -> f64 {
280    #[cfg(any(
281        all(
282            any(target_arch = "x86", target_arch = "x86_64"),
283            target_feature = "fma"
284        ),
285        target_arch = "aarch64"
286    ))]
287    {
288        f_fmla(a, b, c)
289    }
290    #[cfg(not(any(
291        all(
292            any(target_arch = "x86", target_arch = "x86_64"),
293            target_feature = "fma"
294        ),
295        target_arch = "aarch64"
296    )))]
297    {
298        use crate::dyadic_float::DyadicFloat128;
299        let z = DyadicFloat128::new_from_f64(a);
300        let k = DyadicFloat128::new_from_f64(b);
301        let p = z * k + DyadicFloat128::new_from_f64(c);
302        p.fast_as_f64()
303    }
304}
305
306// Executes mandatory FMA
307// if not available will be simulated through Dekker and Veltkamp
308#[inline(always)]
309#[allow(unused)]
310pub(crate) fn dd_fmlaf(a: f32, b: f32, c: f32) -> f32 {
311    #[cfg(any(
312        all(
313            any(target_arch = "x86", target_arch = "x86_64"),
314            target_feature = "fma"
315        ),
316        target_arch = "aarch64"
317    ))]
318    {
319        f_fmlaf(a, b, c)
320    }
321    #[cfg(not(any(
322        all(
323            any(target_arch = "x86", target_arch = "x86_64"),
324            target_feature = "fma"
325        ),
326        target_arch = "aarch64"
327    )))]
328    {
329        (a as f64 * b as f64 + c as f64) as f32
330    }
331}
332
333#[allow(dead_code)]
334#[inline(always)]
335pub(crate) fn c_mlaf<T: Copy + Mul<T, Output = T> + Add<T, Output = T> + MulAdd<T, Output = T>>(
336    a: T,
337    b: T,
338    c: T,
339) -> T {
340    mlaf(c, a, b)
341}
342
343/// Copies sign from `y` to `x`
344#[inline]
345pub const fn copysignfk(x: f32, y: f32) -> f32 {
346    f32::from_bits((x.to_bits() & !(1 << 31)) ^ (y.to_bits() & (1 << 31)))
347}
348
349// #[inline]
350// // Founds n in ln(𝑥)=ln(𝑎)+𝑛ln(2)
351// pub(crate) const fn ilogb2kf(d: f32) -> i32 {
352//     (((d.to_bits() as i32) >> 23) & 0xff) - 0x7f
353// }
354//
355// #[inline]
356// // Founds a in x=a+𝑛ln(2)
357// pub(crate) const fn ldexp3kf(d: f32, n: i32) -> f32 {
358//     f32::from_bits(((d.to_bits() as i32) + (n << 23)) as u32)
359// }
360
361#[inline]
362pub(crate) const fn pow2if(q: i32) -> f32 {
363    f32::from_bits((q.wrapping_add(0x7f) as u32) << 23)
364}
365
366/// Round towards whole integral number
367#[inline]
368pub(crate) const fn rintk(x: f64) -> f64 {
369    (if x < 0. { x - 0.5 } else { x + 0.5 }) as i64 as f64
370}
371
372/// Computes 2^n
373#[inline(always)]
374pub(crate) const fn pow2i(q: i32) -> f64 {
375    f64::from_bits((q.wrapping_add(0x3ff) as u64) << 52)
376}
377
378// #[inline]
379// pub(crate) const fn ilogb2k(d: f64) -> i32 {
380//     (((d.to_bits() >> 52) & 0x7ff) as i32) - 0x3ff
381// }
382//
383// #[inline]
384// pub(crate) const fn ldexp3k(d: f64, e: i32) -> f64 {
385//     f64::from_bits(((d.to_bits() as i64) + ((e as i64) << 52)) as u64)
386// }
387
388/// Copies sign from `y` to `x`
389#[inline]
390pub const fn copysignk(x: f64, y: f64) -> f64 {
391    f64::from_bits((x.to_bits() & !(1 << 63)) ^ (y.to_bits() & (1 << 63)))
392}
393
394#[inline]
395pub(crate) const fn min_normal_f64() -> f64 {
396    let exponent_bits = 1u64 << 52;
397    let bits = exponent_bits;
398
399    f64::from_bits(bits)
400}
401
402#[inline]
403const fn mask_trailing_ones_u32(len: u32) -> u32 {
404    if len >= 32 {
405        u32::MAX // All ones if length is 64 or more
406    } else {
407        (1u32 << len).wrapping_sub(1)
408    }
409}
410
411pub(crate) const EXP_MASK_F32: u32 = mask_trailing_ones_u32(8) << 23;
412
413#[inline]
414pub(crate) fn set_exponent_f32(x: u32, new_exp: u32) -> u32 {
415    let encoded_mask = new_exp.wrapping_shl(23) & EXP_MASK_F32;
416    x ^ ((x ^ encoded_mask) & EXP_MASK_F32)
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422    #[test]
423    fn test_is_integer() {
424        assert_eq!(is_integer(5.), true);
425        assert_eq!(is_integer(6.), true);
426        assert_eq!(is_integer(6.01), false);
427        assert_eq!(is_odd_integer(5.), true);
428        assert_eq!(is_odd_integer(6.), false);
429        assert_eq!(is_odd_integer(6.01), false);
430        assert_eq!(is_integerf(5.), true);
431        assert_eq!(is_integerf(6.), true);
432        assert_eq!(is_integerf(6.01), false);
433        assert_eq!(is_odd_integerf(5.), true);
434        assert_eq!(is_odd_integerf(6.), false);
435        assert_eq!(is_odd_integerf(6.01), false);
436    }
437}