pxfm/logs/
log2f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{f_fmla, f_fmlaf, set_exponent_f32};
30use crate::logs::logf::{GenLogfBackend, LogfBackend};
31use crate::polyeval::f_polyeval3;
32
33pub(crate) static LOG2_R: [u64; 128] = [
34    0x0000000000000000,
35    0x3f872c7ba20f7327,
36    0x3f9743ee861f3556,
37    0x3fa184b8e4c56af8,
38    0x3fa77394c9d958d5,
39    0x3fad6ebd1f1febfe,
40    0x3fb1bb32a600549d,
41    0x3fb4c560fe68af88,
42    0x3fb7d60496cfbb4c,
43    0x3fb960caf9abb7ca,
44    0x3fbc7b528b70f1c5,
45    0x3fbf9c95dc1d1165,
46    0x3fc097e38ce60649,
47    0x3fc22dadc2ab3497,
48    0x3fc3c6fb650cde51,
49    0x3fc494f863b8df35,
50    0x3fc633a8bf437ce1,
51    0x3fc7046031c79f85,
52    0x3fc8a8980abfbd32,
53    0x3fc97c1cb13c7ec1,
54    0x3fcb2602497d5346,
55    0x3fcbfc67a7fff4cc,
56    0x3fcdac22d3e441d3,
57    0x3fce857d3d361368,
58    0x3fd01d9bbcfa61d4,
59    0x3fd08bce0d95fa38,
60    0x3fd169c05363f158,
61    0x3fd1d982c9d52708,
62    0x3fd249cd2b13cd6c,
63    0x3fd32bfee370ee68,
64    0x3fd39de8e1559f6f,
65    0x3fd4106017c3eca3,
66    0x3fd4f6fbb2cec598,
67    0x3fd56b22e6b578e5,
68    0x3fd5dfdcf1eeae0e,
69    0x3fd6552b49986277,
70    0x3fd6cb0f6865c8ea,
71    0x3fd7b89f02cf2aad,
72    0x3fd8304d90c11fd3,
73    0x3fd8a8980abfbd32,
74    0x3fd921800924dd3b,
75    0x3fd99b072a96c6b2,
76    0x3fda8ff971810a5e,
77    0x3fdb0b67f4f46810,
78    0x3fdb877c57b1b070,
79    0x3fdc043859e2fdb3,
80    0x3fdc819dc2d45fe4,
81    0x3fdcffae611ad12b,
82    0x3fdd7e6c0abc3579,
83    0x3fddfdd89d586e2b,
84    0x3fde7df5fe538ab3,
85    0x3fdefec61b011f85,
86    0x3fdf804ae8d0cd02,
87    0x3fe0014332be0033,
88    0x3fe042bd4b9a7c99,
89    0x3fe08494c66b8ef0,
90    0x3fe0c6caaf0c5597,
91    0x3fe1096015dee4da,
92    0x3fe14c560fe68af9,
93    0x3fe18fadb6e2d3c2,
94    0x3fe1d368296b5255,
95    0x3fe217868b0c37e8,
96    0x3fe25c0a0463beb0,
97    0x3fe2a0f3c340705c,
98    0x3fe2e644fac04fd8,
99    0x3fe2e644fac04fd8,
100    0x3fe32bfee370ee68,
101    0x3fe37222bb70747c,
102    0x3fe3b8b1c68fa6ed,
103    0x3fe3ffad4e74f1d6,
104    0x3fe44716a2c08262,
105    0x3fe44716a2c08262,
106    0x3fe48eef19317991,
107    0x3fe4d7380dcc422d,
108    0x3fe51ff2e30214bc,
109    0x3fe5692101d9b4a6,
110    0x3fe5b2c3da19723b,
111    0x3fe5b2c3da19723b,
112    0x3fe5fcdce2727ddb,
113    0x3fe6476d98ad990a,
114    0x3fe6927781d932a8,
115    0x3fe6927781d932a8,
116    0x3fe6ddfc2a78fc63,
117    0x3fe729fd26b707c8,
118    0x3fe7767c12967a45,
119    0x3fe7767c12967a45,
120    0x3fe7c37a9227e7fb,
121    0x3fe810fa51bf65fd,
122    0x3fe810fa51bf65fd,
123    0x3fe85efd062c656d,
124    0x3fe8ad846cf369a4,
125    0x3fe8ad846cf369a4,
126    0x3fe8fc924c89ac84,
127    0x3fe94c287492c4db,
128    0x3fe94c287492c4db,
129    0x3fe99c48be2063c8,
130    0x3fe9ecf50bf43f13,
131    0x3fe9ecf50bf43f13,
132    0x3fea3e2f4ac43f60,
133    0x3fea8ff971810a5e,
134    0x3fea8ff971810a5e,
135    0x3feae255819f022d,
136    0x3feb35458761d479,
137    0x3feb35458761d479,
138    0x3feb88cb9a2ab521,
139    0x3feb88cb9a2ab521,
140    0x3febdce9dcc96187,
141    0x3fec31a27dd00b4a,
142    0x3fec31a27dd00b4a,
143    0x3fec86f7b7ea4a89,
144    0x3fec86f7b7ea4a89,
145    0x3fecdcebd2373995,
146    0x3fed338120a6dd9d,
147    0x3fed338120a6dd9d,
148    0x3fed8aba045b01c8,
149    0x3fed8aba045b01c8,
150    0x3fede298ec0bac0d,
151    0x3fede298ec0bac0d,
152    0x3fee3b20546f554a,
153    0x3fee3b20546f554a,
154    0x3fee9452c8a71028,
155    0x3fee9452c8a71028,
156    0x3feeee32e2aeccbf,
157    0x3feeee32e2aeccbf,
158    0x3fef48c34bd1e96f,
159    0x3fef48c34bd1e96f,
160    0x3fefa406bd2443df,
161    0x3ff0000000000000,
162];
163
164#[inline(always)]
165fn log2f_gen<B: LogfBackend>(x: f32, backend: B) -> f32 {
166    let mut x_u = x.to_bits();
167
168    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
169
170    let mut m = -(E_BIAS as i32);
171    if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() {
172        if x == 0.0 {
173            return f32::NEG_INFINITY;
174        }
175        if x_u == 0x80000000u32 {
176            return f32::NEG_INFINITY;
177        }
178        if x.is_sign_negative() && !x.is_nan() {
179            return f32::NAN + x;
180        }
181        // x is +inf or nan
182        if x.is_nan() || x.is_infinite() {
183            return x + x;
184        }
185        // Normalize denormal inputs.
186        x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
187        m -= 23;
188    }
189
190    m = m.wrapping_add(x_u.wrapping_shr(23) as i32);
191    let mant = x_u & 0x007F_FFFF;
192    let index = mant.wrapping_shr(16);
193
194    x_u = set_exponent_f32(x_u, 0x7F);
195
196    let u = f32::from_bits(x_u);
197
198    let v = if B::HAS_FMA {
199        use crate::logs::logf::LOG_REDUCTION_F32;
200        backend.fmaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64 // Exact.
201    } else {
202        use crate::logs::LOG_RANGE_REDUCTION;
203        backend.fma(
204            u as f64,
205            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
206            -1.0,
207        ) // Exact
208    };
209    // Degree-5 polynomial approximation of log2 generated by Sollya with:
210    // > P = fpminimax(log2(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
211
212    let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]);
213
214    const COEFFS: [u64; 5] = [
215        0x3ff71547652b8133,
216        0xbfe71547652d1e33,
217        0x3fdec70a098473de,
218        0xbfd7154c5ccdf121,
219        0x3fd2514fd90a130a,
220    ];
221    let v2 = v * v; // Exact
222    let c0 = backend.fma(v, f64::from_bits(COEFFS[0]), extra_factor);
223    let c1 = backend.fma(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1]));
224    let c2 = backend.fma(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3]));
225
226    let r = backend.polyeval3(v2, c0, c1, c2);
227    r as f32
228}
229
230#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
231#[target_feature(enable = "avx", enable = "fma")]
232unsafe fn log2f_fma_impl(x: f32) -> f32 {
233    use crate::logs::logf::FmaLogfBackend;
234    log2f_gen(x, FmaLogfBackend {})
235}
236
237/// Logarithm of base 2
238///
239/// Max found ULP 0.4999996
240pub fn f_log2f(x: f32) -> f32 {
241    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
242    {
243        log2f_gen(x, GenLogfBackend {})
244    }
245    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
246    {
247        use std::sync::OnceLock;
248        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
249        let q = EXECUTOR.get_or_init(|| {
250            if std::arch::is_x86_feature_detected!("avx")
251                && std::arch::is_x86_feature_detected!("fma")
252            {
253                log2f_fma_impl
254            } else {
255                fn def_log2f(x: f32) -> f32 {
256                    log2f_gen(x, GenLogfBackend {})
257                }
258                def_log2f
259            }
260        });
261        unsafe { q(x) }
262    }
263}
264
265/// Natural logarithm using FMA
266///
267/// Max found ULP 0.4999996
268#[inline(always)]
269#[allow(dead_code)]
270pub(crate) fn f_log2fx(x: f32) -> f64 {
271    let mut x_u = x.to_bits();
272
273    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
274
275    let mut m = -(E_BIAS as i32);
276    if x_u == 0x3f80_0000u32 {
277        return 0.0;
278    }
279
280    if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() {
281        if x == 0.0 {
282            return f64::NEG_INFINITY;
283        }
284        if x_u == 0x80000000u32 {
285            return f64::NEG_INFINITY;
286        }
287        if x.is_sign_negative() && !x.is_nan() {
288            return f64::NAN + x as f64;
289        }
290        // x is +inf or nan
291        if x.is_nan() || x.is_infinite() {
292            return (x + x) as f64;
293        }
294        // Normalize denormal inputs.
295        x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
296        m -= 23;
297    }
298
299    m = m.wrapping_add(x_u.wrapping_shr(23) as i32);
300    let mant = x_u & 0x007F_FFFF;
301    let index = mant.wrapping_shr(16);
302
303    x_u = set_exponent_f32(x_u, 0x7F);
304
305    let v;
306    let u = f32::from_bits(x_u);
307
308    #[cfg(any(
309        all(
310            any(target_arch = "x86", target_arch = "x86_64"),
311            target_feature = "fma"
312        ),
313        target_arch = "aarch64"
314    ))]
315    {
316        use crate::logs::logf::LOG_REDUCTION_F32;
317        v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact.
318    }
319    #[cfg(not(any(
320        all(
321            any(target_arch = "x86", target_arch = "x86_64"),
322            target_feature = "fma"
323        ),
324        target_arch = "aarch64"
325    )))]
326    {
327        use crate::logs::log2::LOG_RANGE_REDUCTION;
328        v = f_fmla(
329            u as f64,
330            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
331            -1.0,
332        ); // Exact
333    }
334    // Degree-5 polynomial approximation of log2 generated by Sollya with:
335    // > P = fpminimax(log2(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
336
337    let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]);
338
339    const COEFFS: [u64; 5] = [
340        0x3ff71547652b8133,
341        0xbfe71547652d1e33,
342        0x3fdec70a098473de,
343        0xbfd7154c5ccdf121,
344        0x3fd2514fd90a130a,
345    ];
346    let v2 = v * v; // Exact
347    let c0 = f_fmla(v, f64::from_bits(COEFFS[0]), extra_factor);
348    let c1 = f_fmla(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1]));
349    let c2 = f_fmla(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3]));
350
351    f_polyeval3(v2, c0, c1, c2)
352}
353
354/// Dirty log2 using FMA
355#[inline(always)]
356#[allow(dead_code)]
357pub(crate) fn dirty_log2f(d: f32) -> f32 {
358    let mut ix = d.to_bits();
359    /* reduce x into [sqrt(2)/2, sqrt(2)] */
360    ix = ix.wrapping_add(0x3f800000 - 0x3f3504f3);
361    let n = (ix >> 23) as i32 - 0x7f;
362    ix = (ix & 0x007fffff).wrapping_add(0x3f3504f3);
363    let a = f32::from_bits(ix);
364
365    let x = (a - 1.) / (a + 1.);
366
367    let x2 = x * x;
368    let mut u = 0.4121985850084821691e+0;
369    u = f_fmlaf(u, x2, 0.5770780163490337802e+0);
370    u = f_fmlaf(u, x2, 0.9617966939259845749e+0);
371    f_fmlaf(x2 * x, u, f_fmlaf(x, 0.2885390081777926802e+1, n as f32))
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377
378    #[test]
379    fn test_log2f() {
380        assert!((f_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5);
381        assert!((f_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5);
382        assert_eq!(f_log2f(0.), f32::NEG_INFINITY);
383        assert_eq!(f_log2f(1.0), 0.0);
384        assert!(f_log2f(-1.).is_nan());
385        assert!(f_log2f(f32::NAN).is_nan());
386        assert!(f_log2f(f32::NEG_INFINITY).is_nan());
387        assert_eq!(f_log2f(f32::INFINITY), f32::INFINITY);
388    }
389
390    #[test]
391    fn test_dirty_log2f() {
392        assert!((dirty_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5);
393        assert!((dirty_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5);
394    }
395}