pxfm/exponents/
exp2f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{f_fmla, f_fmlaf, pow2if};
30use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
31use std::hint::black_box;
32
33const TBLSIZE: usize = 64;
34
35#[repr(align(64))]
36struct Exp2Table([(u32, u32); TBLSIZE]);
37
38#[rustfmt::skip]
39static EXP2FT: Exp2Table = Exp2Table([(0x3F3504F3, 0xB2D4175E),(0x3F36FD92, 0x3268D5EF),(0x3F38FBAF, 0xB30E8719),(0x3F3AFF5B, 0x3319E7DA),(0x3F3D08A4, 0x333CD82F),(0x3F3F179A, 0x330E1902),(0x3F412C4D, 0x32CCF4D7),(0x3F4346CD, 0x328F330E),(0x3F45672A, 0xB201B5B7),(0x3F478D75, 0x32CCCE34),(0x3F49B9BE, 0x335E937C),(0x3F4BEC15, 0x2FF41909),(0x3F4E248C, 0xB21760EA),(0x3F506334, 0x3283628B),(0x3F52A81E, 0x3340F500),(0x3F54F35B, 0x331202BD),(0x3F5744FD, 0x32B66A3E),(0x3F599D16, 0x32D0D9B1),(0x3F5BFBB8, 0x332ED93F),(0x3F5E60F5, 0x3350A709),(0x3F60CCDF, 0x32025744),(0x3F633F89, 0xB33A7C4D),(0x3F65B907, 0x321DA4E9),(0x3F68396A, 0xB2FF36A7),(0x3F6AC0C7, 0x3217E40E),(0x3F6D4F30, 0xB2400CBB),(0x3F6FE4BA, 0x331A2ACC),(0x3F728177, 0xB2B7D3E5),(0x3F75257D, 0xB1FED2BE),(0x3F77D0DF, 0xB32B73BA),(0x3F7A83B3, 0x32579081),(0x3F7D3E0C, 0xB19726B5),(0x3F800000, 0x00000000),(0x3F8164D2, 0x320C09FB),(0x3F82CD87, 0x3391E031),(0x3F843A29, 0x33287EEF),(0x3F85AAC3, 0xB38F6665),(0x3F871F62, 0x339004AB),(0x3F88980F, 0x33AC4561),(0x3F8A14D5, 0xB39CDAEA),(0x3F8B95C2, 0x32949D5C),(0x3F8D1ADF, 0xB36F79FA),(0x3F8EA43A, 0x33971DC2),(0x3F9031DC, 0xB32BD022),(0x3F91C3D3, 0xB3928952),(0x3F935A2B, 0xB2EBFECF),(0x3F94F4F0, 0x3357B8BB),(0x3F96942D, 0xB307353B),(0x3F9837F0, 0xB345DFE9),(0x3F99E046, 0x3382A804),(0x3F9B8D3A, 0x3326993E),(0x3F9D3EDA, 0x3350A029),(0x3F9EF532, 0xB3605F62),(0x3FA0B051, 0xB210909B),(0x3FA27043, 0xB0DDC369),(0x3FA43516, 0x33385844),(0x3FA5FED7, 0x33400757),(0x3FA7CD94, 0x3325446E),(0x3FA9A15B, 0x33237A50),(0x3FAB7A3A, 0x33201CA4),(0x3FAD583F, 0x32394687),(0x3FAF3B79, 0x332E1225),(0x3FB123F6, 0x33838969),(0x3FB311C4, 0xB219F2BA)]);
40
41/**
42Generated by SageMath:
43```python
44print("[")
45for k in range(64):
46    k = RealField(150)(2)**(RealField(150)(k) / RealField(150)(64))
47    print(double_to_hex(k) + ",")
48print("];")
49```
50**/
51pub(crate) static EXP2F_TABLE: [u64; 64] = [
52    0x3ff0000000000000,
53    0x3ff02c9a3e778061,
54    0x3ff059b0d3158574,
55    0x3ff0874518759bc8,
56    0x3ff0b5586cf9890f,
57    0x3ff0e3ec32d3d1a2,
58    0x3ff11301d0125b51,
59    0x3ff1429aaea92de0,
60    0x3ff172b83c7d517b,
61    0x3ff1a35beb6fcb75,
62    0x3ff1d4873168b9aa,
63    0x3ff2063b88628cd6,
64    0x3ff2387a6e756238,
65    0x3ff26b4565e27cdd,
66    0x3ff29e9df51fdee1,
67    0x3ff2d285a6e4030b,
68    0x3ff306fe0a31b715,
69    0x3ff33c08b26416ff,
70    0x3ff371a7373aa9cb,
71    0x3ff3a7db34e59ff7,
72    0x3ff3dea64c123422,
73    0x3ff4160a21f72e2a,
74    0x3ff44e086061892d,
75    0x3ff486a2b5c13cd0,
76    0x3ff4bfdad5362a27,
77    0x3ff4f9b2769d2ca7,
78    0x3ff5342b569d4f82,
79    0x3ff56f4736b527da,
80    0x3ff5ab07dd485429,
81    0x3ff5e76f15ad2148,
82    0x3ff6247eb03a5585,
83    0x3ff6623882552225,
84    0x3ff6a09e667f3bcd,
85    0x3ff6dfb23c651a2f,
86    0x3ff71f75e8ec5f74,
87    0x3ff75feb564267c9,
88    0x3ff7a11473eb0187,
89    0x3ff7e2f336cf4e62,
90    0x3ff82589994cce13,
91    0x3ff868d99b4492ed,
92    0x3ff8ace5422aa0db,
93    0x3ff8f1ae99157736,
94    0x3ff93737b0cdc5e5,
95    0x3ff97d829fde4e50,
96    0x3ff9c49182a3f090,
97    0x3ffa0c667b5de565,
98    0x3ffa5503b23e255d,
99    0x3ffa9e6b5579fdbf,
100    0x3ffae89f995ad3ad,
101    0x3ffb33a2b84f15fb,
102    0x3ffb7f76f2fb5e47,
103    0x3ffbcc1e904bc1d2,
104    0x3ffc199bdd85529c,
105    0x3ffc67f12e57d14b,
106    0x3ffcb720dcef9069,
107    0x3ffd072d4a07897c,
108    0x3ffd5818dcfba487,
109    0x3ffda9e603db3285,
110    0x3ffdfc97337b9b5f,
111    0x3ffe502ee78b3ff6,
112    0x3ffea4afa2a490da,
113    0x3ffefa1bee615a27,
114    0x3fff50765b6e4540,
115    0x3fffa7c1819e90d8,
116];
117
118/* ULP 0.508 method
119  let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
120
121  let ui = f32::to_bits(d + redux);
122  let mut i0 = ui;
123  i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
124  let k = i0 / TBLSIZE as u32;
125  i0 &= TBLSIZE as u32 - 1;
126  let mut uf = f32::from_bits(ui);
127  uf -= redux;
128
129  let item = EXP2FT.0[i0 as usize];
130  let z0: f32 = f32::from_bits(item.0);
131  let z1: f32 = f32::from_bits(item.1);
132
133  let f: f32 = d - uf - z1;
134
135  let mut u = 0.055504108664458832;
136  u = f_fmlaf(u, f, 0.24022650695908768);
137  u = f_fmlaf(u, f, 0.69314718055994973);
138  u *= f;
139
140  let i2 = pow2if(k as i32);
141  f_fmlaf(u, z0, z0) * i2
142*/
143
144#[inline(always)]
145fn exp2f_gen<B: ExpfBackend>(x: f32, backend: B) -> f32 {
146    let mut t = x.to_bits();
147
148    if (t & 0xffff) == 0 {
149        // x maybe integer
150        let k: i32 = (((t >> 23) & 0xff) as i32).wrapping_sub(127); // 2^k <= |x| < 2^(k+1)
151        if k >= 0 && k < 9 && (t << (9i32.wrapping_add(k))) == 0 {
152            // x integer, with 1 <= |x| < 2^9
153            let msk = (t as i32) >> 31;
154            let mut m: i32 = (((t & 0x7fffff) | (1 << 23)) >> (23 - k)) as i32;
155            m = (m ^ msk).wrapping_sub(msk).wrapping_add(127);
156            if m > 0 && m < 255 {
157                t = (m as u32).wrapping_shl(23);
158                return f32::from_bits(t);
159            } else if m <= 0 && m > -23 {
160                t = 1i32.wrapping_shl(22i32.wrapping_add(m) as u32) as u32;
161                return f32::from_bits(t);
162            }
163        }
164    }
165    let ux = t.wrapping_shl(1);
166
167    if ux >= 0x86000000u32 || ux < 0x65000000u32 {
168        // |x| >= 128 or x=nan or |x| < 0x1p-26
169        if ux < 0x65000000u32 {
170            return 1.0 + x;
171        } // |x| < 0x1p-26
172        // if x < -149 or 128 <= x is special
173        if !(t >= 0xc3000000u32 && t < 0xc3150000u32) {
174            if ux >= 0xffu32 << 24 {
175                // x is inf or nan
176                if ux > (0xffu32 << 24) {
177                    return x + x;
178                } // x = nan
179                static IR: [f32; 2] = [f32::INFINITY, 0.];
180                return IR[(t >> 31) as usize]; // x = +-inf
181            }
182            if t >= 0xc3150000u32 {
183                // x < -149
184                let z = x;
185                let mut y = f_fmla(
186                    z as f64 + 149.,
187                    f64::from_bits(0x3690000000000000),
188                    f64::from_bits(0x36a0000000000000),
189                );
190                y = y.max(f64::from_bits(0x3680000000000000));
191                return y as f32;
192            }
193            // now x >= 128
194            let r = black_box(f64::from_bits(0x47e0000000000000))
195                * black_box(f64::from_bits(0x47e0000000000000));
196            return r as f32;
197        }
198    }
199
200    if ux <= 0x7a000000u32 {
201        // |x| < 1/32
202
203        // Generated by Sollya exp2 on range [-1/32;1/32]:
204        // d = [-1/32, 1/32];
205        // f_exp2f = (2^y - 1)/y;
206        // Q = fpminimax(f_exp2f, 5, [|D...|], d, relative, floating);
207
208        // See ./notes/exp2f_small.sollya
209        const C: [u64; 6] = [
210            0x3fe62e42fefa39f3,
211            0x3fcebfbdff82c57b,
212            0x3fac6b08d6f2d7aa,
213            0x3f83b2ab6fc92f5d,
214            0x3f55d897cfe27125,
215            0x3f243090e61e6af1,
216        ];
217        let xd = x as f64;
218        let p = backend.polyeval6(
219            xd,
220            f64::from_bits(C[0]),
221            f64::from_bits(C[1]),
222            f64::from_bits(C[2]),
223            f64::from_bits(C[3]),
224            f64::from_bits(C[4]),
225            f64::from_bits(C[5]),
226        );
227        return backend.fma(p, xd, 1.) as f32;
228    }
229
230    let x_d = x as f64;
231    let kf = backend.round(x_d * 64.);
232    let k = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate.
233    // dx = lo = x - (hi + mid) = x - kf * 2^(-6)
234    let dx = backend.fma(f64::from_bits(0xbf90000000000000), kf, x_d);
235
236    const TABLE_BITS: u32 = 6;
237    const TABLE_MASK: u64 = (1u64 << TABLE_BITS) - 1;
238
239    // hi = floor(kf * 2^(-5))
240    // exp_hi = shift hi to the exponent field of double precision.
241    let exp_hi: i64 = ((k >> TABLE_BITS) as i64).wrapping_shl(52);
242
243    // mh = 2^hi * 2^mid
244    // mh_bits = bit field of mh
245    let mh_bits = (EXP2F_TABLE[((k as u64) & TABLE_MASK) as usize] as i64).wrapping_add(exp_hi);
246    let mh = f64::from_bits(mh_bits as u64);
247
248    // Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with:
249    // > P = fpminimax((2^y - 1)/y, 4, [|D...|], [-1/64. 1/64]);
250    // see ./notes/exp2f.sollya
251    const C: [u64; 5] = [
252        0x3fe62e42fefa39ef,
253        0x3fcebfbdff8131c4,
254        0x3fac6b08d7061695,
255        0x3f83b2b1bee74b2a,
256        0x3f55d88091198529,
257    ];
258    let dx_sq = dx * dx;
259    let c1 = backend.fma(dx, f64::from_bits(C[0]), 1.0);
260    let c2 = backend.fma(dx, f64::from_bits(C[2]), f64::from_bits(C[1]));
261    let c3 = backend.fma(dx, f64::from_bits(C[4]), f64::from_bits(C[3]));
262    let p = backend.fma(dx_sq, c3, c2);
263    // 2^x = 2^(hi + mid + lo)
264    //     = 2^(hi + mid) * 2^lo
265    //     ~ mh * (1 + lo * P(lo))
266    //     = mh + (mh*lo) * P(lo)
267    backend.fma(p, dx_sq * mh, c1 * mh) as f32
268}
269
270#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
271#[target_feature(enable = "avx", enable = "fma")]
272unsafe fn exp2f_fma_impl(x: f32) -> f32 {
273    use crate::exponents::expf::FmaBackend;
274    exp2f_gen(x, FmaBackend {})
275}
276
277/// Computing exp2f
278///
279/// ULP 0.4999994
280#[inline]
281pub fn f_exp2f(x: f32) -> f32 {
282    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
283    {
284        exp2f_gen(x, GenericExpfBackend {})
285    }
286    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
287    {
288        use std::sync::OnceLock;
289        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
290        let q = EXECUTOR.get_or_init(|| {
291            if std::arch::is_x86_feature_detected!("avx")
292                && std::arch::is_x86_feature_detected!("fma")
293            {
294                exp2f_fma_impl
295            } else {
296                fn def_exp2f(x: f32) -> f32 {
297                    exp2f_gen(x, GenericExpfBackend {})
298                }
299                def_exp2f
300            }
301        });
302        unsafe { q(x) }
303    }
304}
305
306#[inline]
307pub(crate) fn dirty_exp2f(d: f32) -> f32 {
308    let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
309
310    let ui = f32::to_bits(d + redux);
311    let mut i0 = ui;
312    i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
313    let k = i0 / TBLSIZE as u32;
314    i0 &= TBLSIZE as u32 - 1;
315    let mut uf = f32::from_bits(ui);
316    uf -= redux;
317
318    let item = EXP2FT.0[i0 as usize];
319    let z0: f32 = f32::from_bits(item.0);
320
321    let f: f32 = d - uf;
322
323    let mut u = 0.24022650695908768;
324    u = f_fmlaf(u, f, 0.69314718055994973);
325    u *= f;
326
327    let i2 = pow2if(k as i32);
328    f_fmlaf(u, z0, z0) * i2
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334
335    #[test]
336    fn test_exp2f() {
337        assert_eq!(f_exp2f(1. / 64.), 1.0108893);
338        assert_eq!(f_exp2f(2.0), 4.0);
339        assert_eq!(f_exp2f(3.0), 8.0);
340        assert_eq!(f_exp2f(4.0), 16.0);
341        assert_eq!(f_exp2f(10.0), 1024.0);
342        assert_eq!(f_exp2f(-10.0), 0.0009765625);
343        assert!(f_exp2f(f32::NAN).is_nan());
344        assert_eq!(f_exp2f(-0.35), 0.7845841);
345        assert_eq!(f_exp2f(0.35), 1.2745606);
346        assert!(f_exp2f(f32::INFINITY).is_infinite());
347        assert_eq!(f_exp2f(f32::NEG_INFINITY), 0.0);
348    }
349
350    #[test]
351    fn test_dirty_exp2f() {
352        assert!((dirty_exp2f(0.35f32) - 0.35f32.exp2()).abs() < 1e-5);
353        assert!((dirty_exp2f(-0.6f32) - (-0.6f32).exp2()).abs() < 1e-5);
354    }
355}