pxfm/compound/
compound_m1f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::*;
30use crate::compound::compoundf::{
31    COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, compoundf_exp2_poly2, compoundf_log2p1_accurate,
32    compoundf_log2p1_fast,
33};
34use crate::double_double::DoubleDouble;
35use crate::exponents::exp2m1_accurate_tiny;
36use crate::rounding::CpuRoundTiesEven;
37use std::hint::black_box;
38
39// INVLOG2 = 1/log(2) * (1 + eps1) with |eps1| < 2^-55.976
40const INVLOG2: f64 = f64::from_bits(0x3ff71547652b82fe);
41
42#[cold]
43#[inline(never)]
44fn as_compoundm1f_special(x: f32, y: f32) -> f32 {
45    let nx = x.to_bits();
46    let ny = y.to_bits();
47    let ax: u32 = nx.wrapping_shl(1);
48    let ay: u32 = ny.wrapping_shl(1);
49
50    if ax == 0 || ay == 0 {
51        // x or y is 0
52        if ax == 0 {
53            // compound(0,y) = 1 except for y = sNaN
54            return if y.is_nan() { x + y } else { 0.0 };
55        }
56
57        if ay == 0 {
58            // compound (x, 0)
59            if x.is_nan() {
60                return x + y;
61            } // x = sNaN
62            return if x < -1.0 {
63                f32::NAN // rule (g)
64            } else {
65                0.0
66            }; // rule (a)
67        }
68    }
69
70    let mone = (-1.0f32).to_bits();
71    if ay >= 0xffu32 << 24 {
72        // y=Inf/NaN
73        // the case x=0 was already checked above
74        if ax > 0xffu32 << 24 {
75            return x + y;
76        } // x=NaN
77        if ay == 0xffu32 << 24 {
78            // y = +/-Inf
79            if nx > mone {
80                return f32::NAN;
81            } // rule (g)
82            let sy = ny >> 31; // sign bit of y
83            if nx == mone {
84                return if sy == 0 {
85                    -1. // Rule (c)
86                } else {
87                    f32::INFINITY // Rule (b)
88                };
89            }
90            if x < 0.0 {
91                return if sy == 0 { -1. } else { f32::INFINITY };
92            }
93            if x > 0.0 {
94                return if sy != 0 { -1. } else { f32::INFINITY };
95            }
96            return 0.0;
97        }
98        return x + y; // case y=NaN
99    }
100
101    if nx >= mone || nx >= 0xffu32 << 23 {
102        // x is Inf, NaN or <= -1
103        if ax == 0xffu32 << 24 {
104            // x is +Inf or -Inf
105            if (nx >> 31) != 0 {
106                return f32::NAN;
107            } // x = -Inf, rule (g)
108            // (1 + Inf)^y = +Inf for y > 0, +0 for y < 0
109            return (if (ny >> 31) != 0 { 1.0 / x } else { x }) - 1.;
110        }
111        if ax > 0xffu32 << 24 {
112            return x + y;
113        } // x is NaN
114        if nx > mone {
115            return f32::NAN; // x < -1.0: rule (g)
116        }
117        // now x = -1
118        return if (ny >> 31) != 0 {
119            // y < 0
120            f32::INFINITY
121        } else {
122            // y > 0
123            -1.0
124        };
125    }
126    -1.
127}
128
129/* for |z| <= 2^-6, returns an approximation of 2^z
130with absolute error < 2^-43.540  */
131#[inline]
132pub(crate) fn compoundf_expf_poly(z: f64) -> f64 {
133    /* Q is a degree-4 polynomial generated by Sollya (cf compoundf_expf.sollya)
134    with absolute error < 2^-43.549 */
135    const Q: [u64; 5] = [
136        0x3fe62e42fefa39ef,
137        0x3fcebfbdff8098eb,
138        0x3fac6b08d7045dc3,
139        0x3f83b2b276ce985d,
140        0x3f55d8849c67ace4,
141    ];
142    let z2 = z * z;
143    let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
144    let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
145    let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
146    dd_fmla(c2, z2, c0) * z
147}
148
149/* return the correct rounding of (1+x)^y, otherwise -1.0
150where t is an approximation of y*log2(1+x) with absolute error < 2^-40.680,
151assuming 0x1.7154759a0df53p-24 <= |t| <= 150
152exact is non-zero iff (1+x)^y is exact or midpoint */
153fn exp2m1_fast(t: f64) -> f64 {
154    let k = t.cpu_round_ties_even(); // 0 <= |k| <= 150
155    let mut r = t - k; // |r| <= 1/2, exact
156    let mut v: u64 = (3.015625 + r).to_bits(); // 2.5 <= v <= 3.5015625
157    // we add 2^-6 so that i is rounded to nearest
158    let i: i32 = (v >> 46) as i32 - 0x10010; // 0 <= i <= 32
159    r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
160    // now |r| <= 2^-6
161    // 2^t = 2^k * exp2_U[i][0] * 2^r
162    let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
163    let su = unsafe { ((k.to_int_unchecked::<i64>() as u64).wrapping_add(0x3ffu64)) << 52 }; // k is already integer
164    s *= f64::from_bits(su);
165    let q_poly = compoundf_expf_poly(r);
166    v = q_poly.to_bits();
167    /* the absolute error on exp2_U[i][0] is bounded by 2^-53.092, with
168    exp2_U[i][0] < 2^0.5, and that on q1(r) is bounded by 2^-43.540,
169    with |q1(r)| < 1.011, thus |v| < 1.43, and the absolute error on v is
170    bounded by ulp(v) + 2^0.5s * 2^-43.540 + 2^-53.092 * 1.011 < 2^-43.035.
171    Now t approximates u := y*log2(1+x) with |t-u| < 2^-40.680 thus
172    2^u = 2^t * (1 + eps) with eps < 2^(2^-40.680)-1 < 2^-41.208.
173    The total absolute error is thus bounded by 2^-43.035 + 2^-41.208
174    < 2^-40.849. */
175    let mut err: u64 = 0x3d61d00000000000; // 2^-40.849 < 0x1.1dp-41
176
177    #[cfg(any(
178        all(
179            any(target_arch = "x86", target_arch = "x86_64"),
180            target_feature = "fma"
181        ),
182        target_arch = "aarch64"
183    ))]
184    {
185        v = f_fmla(f64::from_bits(v), s, s - 1f64).to_bits();
186    }
187    #[cfg(not(any(
188        all(
189            any(target_arch = "x86", target_arch = "x86_64"),
190            target_feature = "fma"
191        ),
192        target_arch = "aarch64"
193    )))]
194    {
195        let p0 = DoubleDouble::from_full_exact_add(s, -1.);
196        let z = DoubleDouble::from_exact_mult(f64::from_bits(v), s);
197        v = DoubleDouble::add(z, p0).to_f64().to_bits();
198    }
199
200    // in case of potential underflow, we defer to the accurate path
201    if f64::from_bits(v) < f64::from_bits(0x3d61d00000000000) {
202        return -1.0;
203    }
204    err = unsafe { err.wrapping_add((k.to_int_unchecked::<i64>() << 52) as u64) }; // scale the error by 2^k too
205    let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
206    let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
207    if lb != rb {
208        return -1.0;
209    } // rounding test failed
210
211    f64::from_bits(v)
212}
213
214fn compoundf_exp2m1_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
215    if y == 1.0 {
216        let res = x;
217        return res;
218    }
219
220    // check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
221    // if x_dd.hi.abs() <= f64::from_bits(0x3fc0000000000000u64) {
222    //     /* the relative error between h and y*log2(1+x) is bounded by
223    //     (1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
224    //     2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
225    //     The above threshold is such that h*(1+2^-48.444) < H0. */
226    //     return  exp2m1_accurate_tiny(x_dd.to_f64()) as f32;
227    // }
228
229    let k = x_dd.hi.cpu_round_ties_even(); // |k| <= 150
230
231    // check easy cases h+l is tiny thus 2^(h+l) rounds to 1, 1- or 1+
232    if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
233        /* the relative error between h and y*log2(1+x) is bounded by
234        (1 + 2^-48.445) * (1 + 2^-91.120) - 1 < 2^-48.444.
235        2^h rounds to 1 to nearest for |h| <= H0 := 0x1.715476af0d4d9p-25.
236        The above threshold is such that h*(1+2^-48.444) < H0. */
237        // let z0 = 1.0 + x_dd.hi * 0.5;
238        // let k = Dekker::from_exact_sub(z0, 1.);
239        // return k.to_f64() as f32;
240
241        use crate::exponents::GenericExpfBackend;
242        return exp2m1_accurate_tiny(x_dd.to_f64(), &GenericExpfBackend {}) as f32;
243    }
244
245    let r = x_dd.hi - k; // |r| <= 1/2, exact
246    // since r is an integer multiple of ulp(h), fast_two_sum() below is exact
247    let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
248    let mut v = (3.015625 + v_dd.hi).to_bits(); // 2.5 <= v <= 3.5015625
249    // we add 2^-6 so that i is rounded to nearest
250    let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); // 0 <= i <= 32
251    // h is near (i-16)/2^5
252    v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); // exact
253
254    // now |h| <= 2^-6
255    // 2^(h+l) = 2^k * exp2_U[i] * 2^(h+l)
256    v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
257    let q = compoundf_exp2_poly2(v_dd);
258
259    /* we have 0.989 < qh < 1.011, |ql| < 2^-51.959, and
260    |qh + ql - 2^(h+l)| < 2^-85.210 */
261    let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
262    let mut q = DoubleDouble::quick_mult(exp2u, q);
263
264    q = DoubleDouble::from_exact_add(q.hi, q.lo);
265
266    let mut du = unsafe {
267        k.to_int_unchecked::<i64>()
268            .wrapping_add(0x3ff)
269            .wrapping_shl(52) as u64
270    };
271    du = f64::from_bits(du).to_bits();
272    let scale = f64::from_bits(du);
273
274    q.hi *= scale;
275    q.lo *= scale;
276
277    let zf: DoubleDouble = DoubleDouble::from_full_exact_add(q.hi, -1.0);
278    q.lo += zf.lo;
279    q.hi = zf.hi;
280
281    v = q.to_f64().to_bits();
282
283    f64::from_bits(v) as f32
284}
285
286// at input, exact is non-zero iff (1+x)^y is exact
287// x,y=0x1.0f6f1ap+1,0x1.c643bp+5: 49 identical bits after round bit
288// x,y=0x1.ef272cp+15,-0x1.746ab2p+1: 55 identical bits after round bit
289// x,y=0x1.07ffcp+0,-0x1.921a8ap+4: 47 identical bits after round bit
290#[cold]
291#[inline(never)]
292fn compoundm1f_accurate(x: f32, y: f32) -> f32 {
293    let mut v = compoundf_log2p1_accurate(x as f64);
294    v = DoubleDouble::quick_mult_f64(v, y as f64);
295    compoundf_exp2m1_accurate(v, x, y)
296}
297
298/// Computes compound (1.0 + x)^y - 1
299///
300/// Max ULP 0.5
301#[inline]
302pub fn f_compound_m1f(x: f32, y: f32) -> f32 {
303    /* Rules from IEEE 754-2019 for compound (x, n) with n integer:
304       (a) compound (x, 0) is 1 for x >= -1 or quiet NaN
305       (b) compound (-1, n) is +Inf and signals the divideByZero exception for n < 0
306       (c) compound (-1, n) is +0 for n > 0
307       (d) compound (+/-0, n) is 1
308       (e) compound (+Inf, n) is +Inf for n > 0
309       (f) compound (+Inf, n) is +0 for n < 0
310       (g) compound (x, n) is qNaN and signals the invalid exception for x < -1
311       (h) compound (qNaN, n) is qNaN for n <> 0.
312    */
313    let mone = (-1.0f32).to_bits();
314    let nx = x.to_bits();
315    let ny = y.to_bits();
316    if nx >= mone {
317        return as_compoundm1f_special(x, y);
318    } // x <= -1 
319    // now x > -1
320
321    let ax: u32 = nx.wrapping_shl(1);
322    let ay: u32 = ny.wrapping_shl(1);
323
324    if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
325        return as_compoundm1f_special(x, y);
326    } // x=+-0 || x=+-inf/nan || y=+-0 || y=+-inf/nan
327
328    // evaluate (1+x)^y explicitly for integer y in [-16,16] range and |x|<2^64
329    if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
330        if ax <= 0x62000000u32 {
331            return 1.0 + y * x;
332        } // does it work for |x|<2^-29 and |y|<=16?
333        let mut s = x as f64 + 1.;
334        let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
335
336        // exponentiation by squaring: O(log(y)) complexity
337        let mut acc = if iter_count % 2 != 0 { s } else { 1. };
338
339        while {
340            iter_count >>= 1;
341            iter_count
342        } != 0
343        {
344            s = s * s;
345            if iter_count % 2 != 0 {
346                acc *= s;
347            }
348        }
349
350        let dz = if y.is_sign_negative() { 1. / acc } else { acc };
351        return DoubleDouble::from_full_exact_add(dz, -1.).to_f64() as f32;
352    }
353
354    let xd = x as f64;
355    let yd = y as f64;
356    let tx = xd.to_bits();
357    let ty = yd.to_bits();
358
359    let l: f64 = if ax < 0x62000000u32 {
360        // |x| < 2^-29
361        /* |log2(1+x) - 1/log(2) * (x - x^2/2)| < 2^-59.584 * |log2(1+x)|
362        (cf compoundf.sollya) */
363        let t = xd - (xd * xd) * 0.5;
364        /* since x is epresentable in binary32, x*x is exact, and so is (x * x) * 0.5.
365           Thus the only error in the computation of t is the final rounding, which
366           is bounded by ulp(t): t = (x - x^2/2) * (1 + eps2) with |eps2| < 2^-52
367        */
368        INVLOG2 * t
369        /* since INVLOG2 = 1/log(2) * (1 + eps1) and
370        and   t = (x - x^2/2) * (1 + eps2)
371        let u = o(INVLOG2 * t) then u = INVLOG2 * t * (1 + eps3) with |eps3|<2^-53
372        thus u = 1/log(2) * (x - x^2/2) * (1 + eps1)*(1 + eps2)*(1 + eps3)
373        = 1/log(2) * (x - x^2/2) * (1 + eps4) with |eps4| < 2^-50.954
374        Now Sollya says the relative error by approximating log2(1+x) by
375        1/log(2) * (x - x^2/2) for |x| < 2^-29 is bounded by 2^-59.584
376        (file compoundf.sollya), thus:
377        u = log2(1+x) * (1+eps4)*(1+eps5) with |eps5| < 2^-59.584
378        = log2(1+x) * (1+eps6) with |eps6| < 2^-50.950 */
379    } else {
380        compoundf_log2p1_fast(f64::from_bits(tx))
381    };
382
383    /* l approximates log2(1+x) with relative error < 2^-47.997,
384    and 2^-149 <= |l| < 128 */
385
386    let t: u64 = (l * f64::from_bits(ty)).to_bits();
387    /* since 2^-149 <= |l| < 128 and 2^-149 <= |y| < 2^128, we have
388    2^-298 <= |t| < 2^135, thus no underflow/overflow in double is possible.
389    The relative error is bounded by (1+2^-47.997)*(1+2^-52)-1 < 2^-47.909 */
390
391    // detect overflow/underflow
392    if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
393        // |t| >= 128
394        if t >= 0x3018bu64 << 46 {
395            // t <= -150
396            return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
397        } else if (t >> 63) == 0 {
398            // t >= 128: overflow
399            return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
400        }
401    }
402
403    let res = exp2m1_fast(f64::from_bits(t));
404    if res != -1.0 {
405        return res as f32;
406    }
407    compoundm1f_accurate(x, y)
408}
409
410#[cfg(test)]
411mod tests {
412    use super::*;
413    use crate::compound::compound_m1f::{compoundf_exp2m1_accurate, exp2m1_fast};
414    use crate::double_double::DoubleDouble;
415
416    #[test]
417    fn test_compoundf() {
418        assert_eq!(
419            f_compound_m1f(-0.000000000000001191123, -0.000000000000001191123),
420            0.0000000000000000000000000000014187741
421        );
422        assert_eq!(f_compound_m1f(-0.000000000000001191123, 16.), 1.0);
423        assert_eq!(f_compound_m1f(0.91123, 16.), 31695.21);
424        assert_eq!(f_compound_m1f(0.91123, -16.), -0.99996847);
425    }
426
427    #[test]
428    fn test_compoundf_expm1_fast() {
429        assert_eq!(exp2m1_fast(3.764), 12.585539943149435);
430    }
431
432    #[test]
433    fn test_compoundf_expm1_accurate() {
434        assert_eq!(
435            compoundf_exp2m1_accurate(DoubleDouble::new(0., 2.74), 12., 53.),
436            5.680703,
437        );
438    }
439}