pxfm/
sincpi.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::double_double::DoubleDouble;
31use crate::polyeval::f_estrin_polyeval5;
32use crate::sincospi::{GenSinCosPiBackend, SinCosPiBackend, reduce_pi_64};
33use crate::sincospi_tables::SINPI_K_PI_OVER_64;
34
35/**
36Sinpi on range [0.0, 0.03515625]
37
38Generated poly by Sollya:
39```text
40d = [0, 0.03515625];
41f_sincpi = sin(y*pi)/(y*pi);
42Q = fpminimax(f_sincpi, [|0, 2, 4, 6, 8, 10, 12|], [|107...|], d, relative, floating);
43```
44See ./notes/sincpi_at_zero_dd.sollya
45**/
46#[cold]
47fn as_sincpi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
48    const C: [(u64, u64); 7] = [
49        (0xb9d3080000000000, 0x3ff0000000000000),
50        (0xbc81873d86314302, 0xbffa51a6625307d3),
51        (0x3c84871b4ffeefae, 0x3fe9f9cb402bc46c),
52        (0xbc5562d6ae037010, 0xbfc86a8e4720db66),
53        (0xbc386c93f4549bac, 0x3f9ac6805cf31ffd),
54        (0x3c0dbda368edfa40, 0xbf633816a3399d4e),
55        (0xbbcf22ccc18f27a9, 0x3f23736e6a59edd9),
56    ];
57    let x2 = backend.exact_mult(x, x);
58    let mut p = backend.quick_mul_add(
59        x2,
60        DoubleDouble::from_bit_pair(C[6]),
61        DoubleDouble::from_bit_pair(C[5]),
62    );
63    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
64    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
65    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
66    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
67    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
68    p.to_f64()
69}
70
71#[inline(always)]
72fn sincpi_gen_impl(x: f64) -> f64 {
73    let ix = x.to_bits();
74    let ax = ix & 0x7fff_ffff_ffff_ffff;
75    if ax == 0 {
76        return 1.;
77    }
78    let e: i32 = (ax >> 52) as i32;
79    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
80    let sgn: i64 = (ix as i64) >> 63;
81    let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
82    let mut s: i32 = 1063i32.wrapping_sub(e);
83    if s < 0 {
84        if e == 0x7ff {
85            if (ix << 12) == 0 {
86                return f64::NAN;
87            }
88            return x + x; // case x=NaN
89        }
90        s = -s - 1;
91        if s > 10 {
92            return f64::copysign(0.0, x);
93        }
94        let iq: u64 = (m as u64).wrapping_shl(s as u32);
95        if (iq & 2047) == 0 {
96            return f64::copysign(0.0, x);
97        }
98    }
99
100    if ax <= 0x3fa2000000000000u64 {
101        // |x| <= 0.03515625
102
103        if ax < 0x3c90000000000000u64 {
104            // |x| < f64::EPSILON
105            if ax <= 0x3b05798ee2308c3au64 {
106                // |x| <= 2.2204460492503131e-24
107                return 1.;
108            }
109            // Small values approximated with Taylor poly
110            // sincpi(x) ~ 1 - x^2*Pi^2/6 + O(x^4)
111            #[cfg(any(
112                all(
113                    any(target_arch = "x86", target_arch = "x86_64"),
114                    target_feature = "fma"
115                ),
116                target_arch = "aarch64"
117            ))]
118            {
119                const M_SQR_PI_OVER_6: f64 = f64::from_bits(0xbffa51a6625307d3);
120                let p = f_fmla(x, M_SQR_PI_OVER_6 * x, 1.);
121                return p;
122            }
123            #[cfg(not(any(
124                all(
125                    any(target_arch = "x86", target_arch = "x86_64"),
126                    target_feature = "fma"
127                ),
128                target_arch = "aarch64"
129            )))]
130            {
131                use crate::common::min_normal_f64;
132                return 1. - min_normal_f64();
133            }
134        }
135
136        // Poly generated by Sollya:
137        // d = [0, 0.03515625];
138        // f_sincpi = sin(y*pi)/(y*pi);
139        // Q = fpminimax(f_sincpi, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating);
140        // See ./notes/sincpi_at_zero.sollya
141
142        let x2 = x * x;
143
144        let eps = x * f_fmla(
145            x2,
146            f64::from_bits(0x3d00000000000000), // 2^-47
147            f64::from_bits(0x3bd0000000000000), // 2^-66
148        );
149
150        const C: [u64; 5] = [
151            0xbffa51a6625307d3,
152            0x3fe9f9cb402bbeaa,
153            0xbfc86a8e466bbb5b,
154            0x3f9ac66d887e2f38,
155            0xbf628473a38d289a,
156        ];
157
158        const F: DoubleDouble =
159            DoubleDouble::from_bit_pair((0xbb93f0a925810000, 0x3ff0000000000000));
160
161        let p = f_estrin_polyeval5(
162            x2,
163            f64::from_bits(C[0]),
164            f64::from_bits(C[1]),
165            f64::from_bits(C[2]),
166            f64::from_bits(C[3]),
167            f64::from_bits(C[4]),
168        );
169        let v = DoubleDouble::from_exact_mult(p, x2);
170        let z = DoubleDouble::add(F, v);
171
172        let lb = z.hi + (z.lo - eps);
173        let ub = z.hi + (z.lo + eps);
174        if lb == ub {
175            return lb;
176        }
177        return as_sincpi_zero(x, &GenSinCosPiBackend {});
178    }
179
180    let si = e.wrapping_sub(1011);
181    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
182        // x is integer or half-integer
183        if (m0.wrapping_shl(si as u32)) == 0 {
184            return f64::copysign(0.0, x); // x is integer
185        }
186        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
187        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
188        #[cfg(any(
189            all(
190                any(target_arch = "x86", target_arch = "x86_64"),
191                target_feature = "fma"
192            ),
193            target_arch = "aarch64"
194        ))]
195        {
196            let num = if t == 0 {
197                f64::copysign(1.0, x)
198            } else {
199                -f64::copysign(1.0, x)
200            };
201            const PI: DoubleDouble =
202                DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
203            let r = DoubleDouble::quick_mult_f64(PI, x);
204            let v = DoubleDouble::from_f64_div_dd(num, r);
205            return v.to_f64();
206        }
207
208        #[cfg(not(any(
209            all(
210                any(target_arch = "x86", target_arch = "x86_64"),
211                target_feature = "fma"
212            ),
213            target_arch = "aarch64"
214        )))]
215        {
216            use crate::double_double::two_product_compatible;
217            return if two_product_compatible(x) {
218                let num = if t == 0 {
219                    f64::copysign(1.0, x)
220                } else {
221                    -f64::copysign(1.0, x)
222                };
223                const PI: DoubleDouble =
224                    DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
225                let r = DoubleDouble::quick_mult_f64(PI, x);
226                let v = DoubleDouble::from_f64_div_dd(num, r);
227                v.to_f64()
228            } else {
229                use crate::dyadic_float::{DyadicFloat128, DyadicSign};
230                let num = DyadicFloat128::new_from_f64(if t == 0 {
231                    f64::copysign(1.0, x)
232                } else {
233                    -f64::copysign(1.0, x)
234                });
235                const PI: DyadicFloat128 = DyadicFloat128 {
236                    sign: DyadicSign::Pos,
237                    exponent: -126,
238                    mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
239                };
240                let dx = DyadicFloat128::new_from_f64(x);
241                let r = (PI * dx).reciprocal();
242                (num * r).fast_as_f64()
243            };
244        }
245    }
246
247    let (y, k) = reduce_pi_64(x);
248
249    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
250    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
251    let cos_k = DoubleDouble::from_bit_pair(
252        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
253    );
254
255    let r_sincos = crate::sincospi::sincospi_eval(y, &GenSinCosPiBackend {});
256
257    const PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
258    let scale = DoubleDouble::quick_mult_f64(PI, x);
259
260    let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
261    let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
262
263    // sin_k_cos_y is always >> cos_k_sin_y
264    let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
265    rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
266    rr = DoubleDouble::div(rr, scale);
267
268    let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR);
269    let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR);
270
271    if ub == lb {
272        return rr.to_f64();
273    }
274    sincpi_dd(y, sin_k, cos_k, scale, &GenSinCosPiBackend {})
275}
276
277#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
278#[target_feature(enable = "avx", enable = "fma")]
279unsafe fn sincpi_fma_impl(x: f64) -> f64 {
280    use crate::sincospi::FmaSinCosPiBackend;
281    let ix = x.to_bits();
282    let ax = ix & 0x7fff_ffff_ffff_ffff;
283    if ax == 0 {
284        return 1.;
285    }
286    let e: i32 = (ax >> 52) as i32;
287    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
288    let sgn: i64 = (ix as i64) >> 63;
289    let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
290    let mut s: i32 = 1063i32.wrapping_sub(e);
291    if s < 0 {
292        if e == 0x7ff {
293            if (ix << 12) == 0 {
294                return f64::NAN;
295            }
296            return x + x; // case x=NaN
297        }
298        s = -s - 1;
299        if s > 10 {
300            return f64::copysign(0.0, x);
301        }
302        let iq: u64 = (m as u64).wrapping_shl(s as u32);
303        if (iq & 2047) == 0 {
304            return f64::copysign(0.0, x);
305        }
306    }
307
308    if ax <= 0x3fa2000000000000u64 {
309        // |x| <= 0.03515625
310
311        if ax < 0x3c90000000000000u64 {
312            // |x| < f64::EPSILON
313            if ax <= 0x3b05798ee2308c3au64 {
314                // |x| <= 2.2204460492503131e-24
315                return 1.;
316            }
317            // Small values approximated with Taylor poly
318            // sincpi(x) ~ 1 - x^2*Pi^2/6 + O(x^4)
319            const M_SQR_PI_OVER_6: f64 = f64::from_bits(0xbffa51a6625307d3);
320            return f64::mul_add(x, M_SQR_PI_OVER_6 * x, 1.);
321        }
322
323        // Poly generated by Sollya:
324        // d = [0, 0.03515625];
325        // f_sincpi = sin(y*pi)/(y*pi);
326        // Q = fpminimax(f_sincpi, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating);
327        // See ./notes/sincpi_at_zero.sollya
328
329        let x2 = x * x;
330
331        let eps = x * f64::mul_add(
332            x2,
333            f64::from_bits(0x3d00000000000000), // 2^-47
334            f64::from_bits(0x3bd0000000000000), // 2^-66
335        );
336
337        const C: [u64; 5] = [
338            0xbffa51a6625307d3,
339            0x3fe9f9cb402bbeaa,
340            0xbfc86a8e466bbb5b,
341            0x3f9ac66d887e2f38,
342            0xbf628473a38d289a,
343        ];
344
345        const F: DoubleDouble =
346            DoubleDouble::from_bit_pair((0xbb93f0a925810000, 0x3ff0000000000000));
347
348        use crate::polyeval::d_estrin_polyeval5;
349        let p = d_estrin_polyeval5(
350            x2,
351            f64::from_bits(C[0]),
352            f64::from_bits(C[1]),
353            f64::from_bits(C[2]),
354            f64::from_bits(C[3]),
355            f64::from_bits(C[4]),
356        );
357        let v = DoubleDouble::from_exact_mult_fma(p, x2);
358        let z = DoubleDouble::add(F, v);
359
360        let lb = z.hi + (z.lo - eps);
361        let ub = z.hi + (z.lo + eps);
362        if lb == ub {
363            return lb;
364        }
365        return as_sincpi_zero(x, &FmaSinCosPiBackend {});
366    }
367
368    let si = e.wrapping_sub(1011);
369    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
370        // x is integer or half-integer
371        if (m0.wrapping_shl(si as u32)) == 0 {
372            return f64::copysign(0.0, x); // x is integer
373        }
374        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
375        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
376        let num = if t == 0 {
377            f64::copysign(1.0, x)
378        } else {
379            -f64::copysign(1.0, x)
380        };
381        const PI: DoubleDouble =
382            DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
383        let r = DoubleDouble::quick_mult_f64_fma(PI, x);
384        let v = DoubleDouble::from_f64_div_dd_fma(num, r);
385        return v.to_f64();
386    }
387
388    use crate::sincospi::reduce_pi_64_fma;
389
390    let (y, k) = reduce_pi_64_fma(x);
391
392    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
393    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
394    let cos_k = DoubleDouble::from_bit_pair(
395        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
396    );
397
398    let r_sincos = crate::sincospi::sincospi_eval(y, &FmaSinCosPiBackend {});
399
400    const PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
401    let scale = DoubleDouble::quick_mult_f64_fma(PI, x);
402
403    let sin_k_cos_y = DoubleDouble::quick_mult_fma(sin_k, r_sincos.v_cos);
404    let cos_k_sin_y = DoubleDouble::quick_mult_fma(cos_k, r_sincos.v_sin);
405
406    // sin_k_cos_y is always >> cos_k_sin_y
407    let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
408    rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
409    rr = DoubleDouble::div_fma(rr, scale);
410
411    let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR);
412    let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR);
413
414    if ub == lb {
415        return rr.to_f64();
416    }
417    sincpi_dd(y, sin_k, cos_k, scale, &FmaSinCosPiBackend {})
418}
419
420/// Computes sin(PI\*x)/(PI\*x)
421///
422/// Produces normalized sinc.
423///
424/// Max ULP 0.5
425pub fn f_sincpi(x: f64) -> f64 {
426    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
427    {
428        sincpi_gen_impl(x)
429    }
430    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
431    {
432        use std::sync::OnceLock;
433        static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
434        let q = EXECUTOR.get_or_init(|| {
435            if std::arch::is_x86_feature_detected!("avx")
436                && std::arch::is_x86_feature_detected!("fma")
437            {
438                sincpi_fma_impl
439            } else {
440                fn def_sincpi(x: f64) -> f64 {
441                    sincpi_gen_impl(x)
442                }
443                def_sincpi
444            }
445        });
446        unsafe { q(x) }
447    }
448}
449
450#[cold]
451#[inline(always)]
452fn sincpi_dd<B: SinCosPiBackend>(
453    x: f64,
454    sin_k: DoubleDouble,
455    cos_k: DoubleDouble,
456    scale: DoubleDouble,
457    backend: &B,
458) -> f64 {
459    let r_sincos = crate::sincospi::sincospi_eval_dd(x, backend);
460    let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
461    let mut rr = backend.mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y);
462    rr = backend.div(rr, scale);
463    rr.to_f64()
464}
465
466#[cfg(test)]
467mod tests {
468    use super::*;
469
470    #[test]
471    fn test_sincpi_zero() {
472        assert_eq!(f_sincpi(2.2204460492503131e-24), 1.0);
473        assert_eq!(f_sincpi(f64::EPSILON), 1.0);
474        assert_eq!(f_sincpi(0.007080019335262543), 0.9999175469662566);
475        assert_eq!(f_sincpi(0.05468860710998057), 0.9950875152844803);
476        assert_eq!(f_sincpi(0.5231231231), 0.6068750737806441);
477        assert_eq!(f_sincpi(1.), 0.);
478        assert_eq!(f_sincpi(-1.), 0.);
479        assert_eq!(f_sincpi(-2.), 0.);
480        assert_eq!(f_sincpi(-3.), 0.);
481        assert!(f_sincpi(f64::INFINITY).is_nan());
482        assert!(f_sincpi(f64::NEG_INFINITY).is_nan());
483        assert!(f_sincpi(f64::NAN).is_nan());
484    }
485}