pxfm/
sincospi.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, dyad_fmla, f_fmla, is_odd_integer};
30use crate::double_double::DoubleDouble;
31use crate::polyeval::{f_polyeval3, f_polyeval4};
32use crate::rounding::CpuRound;
33use crate::sin::SinCos;
34use crate::sincospi_tables::SINPI_K_PI_OVER_64;
35
36/**
37Cospi(x) on [0; 0.000244140625]
38
39Generated by Sollya:
40```text
41d = [0, 0.000244140625];
42f_cos = cos(y*pi);
43Q = fpminimax(f_cos, [|0, 2, 4, 6, 8, 10|], [|107...|], d, relative, floating);
44```
45
46See ./notes/cospi_zero_dd.sollya
47**/
48#[cold]
49#[inline(always)]
50fn as_cospi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
51    const C: [(u64, u64); 5] = [
52        (0xbcb692b71366cc04, 0xc013bd3cc9be45de),
53        (0xbcb32b33fb803bd5, 0x40103c1f081b5ac4),
54        (0xbc9f5b752e98b088, 0xbff55d3c7e3cbff9),
55        (0x3c30023d540b9350, 0x3fce1f506446cb66),
56        (0x3c1a5d47937787d2, 0xbf8a9b062a36ba1c),
57    ];
58    let x2 = backend.exact_mult(x, x);
59    let mut p = backend.quick_mul_add(
60        x2,
61        DoubleDouble::from_bit_pair(C[3]),
62        DoubleDouble::from_bit_pair(C[3]),
63    );
64    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
65    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
66    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
67    p = backend.mul_add_f64(x2, p, 1.);
68    p.to_f64()
69}
70
71/**
72Sinpi on range [0.0, 0.03515625]
73
74Generated poly by Sollya:
75```text
76d = [0, 0.03515625];
77
78f_sin = sin(y*pi)/y;
79Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107...|], d, relative, floating);
80```
81See ./notes/sinpi_zero_dd.sollya
82**/
83#[cold]
84#[inline(always)]
85fn as_sinpi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
86    const C: [(u64, u64); 6] = [
87        (0x3ca1a626311d9056, 0x400921fb54442d18),
88        (0x3cb055f12c462211, 0xc014abbce625be53),
89        (0xbc9789ea63534250, 0x400466bc6775aae1),
90        (0xbc78b86de6962184, 0xbfe32d2cce62874e),
91        (0x3c4eddf7cd887302, 0x3fb507833e2b781f),
92        (0x3bf180c9d4af2894, 0xbf7e2ea4e143707e),
93    ];
94    let x2 = backend.exact_mult(x, x);
95    let mut p = backend.quick_mul_add(
96        x2,
97        DoubleDouble::from_bit_pair(C[5]),
98        DoubleDouble::from_bit_pair(C[4]),
99    );
100    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
101    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
102    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
103    p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
104    p = backend.quick_mult_f64(p, x);
105    p.to_f64()
106}
107
108// Return k and y, where
109// k = round(x * 64 / pi) and y = (x * 64 / pi) - k.
110#[inline]
111pub(crate) fn reduce_pi_64(x: f64) -> (f64, i64) {
112    let kd = (x * 64.).cpu_round();
113    let y = dd_fmla(kd, -1. / 64., x);
114    (y, unsafe {
115        kd.to_int_unchecked::<i64>() // indeterminate values is always filtered out before this call, as well only lowest bits are used
116    })
117}
118
119// Return k and y, where
120// k = round(x * 64 / pi) and y = (x * 64 / pi) - k.
121#[inline(always)]
122#[allow(unused)]
123pub(crate) fn reduce_pi_64_fma(x: f64) -> (f64, i64) {
124    let kd = (x * 64.).round();
125    let y = f64::mul_add(kd, -1. / 64., x);
126    (y, unsafe {
127        kd.to_int_unchecked::<i64>() // indeterminate values is always filtered out before this call, as well only lowest bits are used
128    })
129}
130
131pub(crate) trait SinCosPiBackend {
132    fn fma(&self, x: f64, y: f64, z: f64) -> f64;
133    fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64;
134    fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64;
135    fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64;
136    fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64);
137    fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble;
138    fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble;
139    fn odd_integer(&self, x: f64) -> bool;
140    fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble;
141    fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble;
142    fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble;
143    fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble;
144    fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble;
145}
146
147pub(crate) struct GenSinCosPiBackend {}
148
149impl SinCosPiBackend for GenSinCosPiBackend {
150    #[inline(always)]
151    fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
152        f_fmla(x, y, z)
153    }
154    #[inline(always)]
155    fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64 {
156        dd_fmla(x, y, z)
157    }
158    #[inline(always)]
159    fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64 {
160        dyad_fmla(x, y, z)
161    }
162    #[inline(always)]
163    fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
164        use crate::polyeval::f_polyeval3;
165        f_polyeval3(x, a0, a1, a2)
166    }
167    #[inline(always)]
168    fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64) {
169        reduce_pi_64(x)
170    }
171    #[inline(always)]
172    fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
173        DoubleDouble::quick_mult_f64(x, y)
174    }
175    #[inline(always)]
176    fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
177        DoubleDouble::quick_mult(x, y)
178    }
179
180    #[inline(always)]
181    fn odd_integer(&self, x: f64) -> bool {
182        is_odd_integer(x)
183    }
184
185    #[inline(always)]
186    fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
187        DoubleDouble::div(x, y)
188    }
189
190    #[inline(always)]
191    fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
192        DoubleDouble::mul_add_f64(a, b, c)
193    }
194
195    #[inline(always)]
196    fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
197        DoubleDouble::quick_mul_add(a, b, c)
198    }
199
200    #[inline(always)]
201    fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
202        DoubleDouble::mul_add(a, b, c)
203    }
204
205    #[inline(always)]
206    fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble {
207        DoubleDouble::from_exact_mult(x, y)
208    }
209}
210
211#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
212pub(crate) struct FmaSinCosPiBackend {}
213
214#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
215impl SinCosPiBackend for FmaSinCosPiBackend {
216    #[inline(always)]
217    fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
218        f64::mul_add(x, y, z)
219    }
220    #[inline(always)]
221    fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64 {
222        f64::mul_add(x, y, z)
223    }
224    #[inline(always)]
225    fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64 {
226        f64::mul_add(x, y, z)
227    }
228    #[inline(always)]
229    fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
230        use crate::polyeval::d_polyeval3;
231        d_polyeval3(x, a0, a1, a2)
232    }
233    #[inline(always)]
234    fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64) {
235        reduce_pi_64_fma(x)
236    }
237    #[inline(always)]
238    fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
239        DoubleDouble::quick_mult_f64_fma(x, y)
240    }
241    #[inline(always)]
242    fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
243        DoubleDouble::quick_mult_fma(x, y)
244    }
245
246    #[inline(always)]
247    fn odd_integer(&self, x: f64) -> bool {
248        use crate::common::is_odd_integer_fast;
249        is_odd_integer_fast(x)
250    }
251
252    #[inline(always)]
253    fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
254        DoubleDouble::div_fma(x, y)
255    }
256
257    #[inline(always)]
258    fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
259        DoubleDouble::mul_add_f64_fma(a, b, c)
260    }
261
262    #[inline(always)]
263    fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
264        DoubleDouble::quick_mul_add_fma(a, b, c)
265    }
266
267    #[inline(always)]
268    fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
269        DoubleDouble::mul_add_fma(a, b, c)
270    }
271
272    #[inline(always)]
273    fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble {
274        DoubleDouble::from_exact_mult_fma(x, y)
275    }
276}
277
278#[inline(always)]
279pub(crate) fn sincospi_eval<B: SinCosPiBackend>(x: f64, backend: &B) -> SinCos {
280    let x2 = x * x;
281    /*
282        sinpi(pi*x) poly generated by Sollya:
283        d = [0, 0.0078128];
284        f_sin = sin(y*pi)/y;
285        Q = fpminimax(f_sin, [|0, 2, 4, 6|], [|107, D...|], d, relative, floating);
286        See ./notes/sinpi.sollya
287    */
288    let sin_lop = backend.polyeval3(
289        x2,
290        f64::from_bits(0xc014abbce625be4d),
291        f64::from_bits(0x400466bc6767f259),
292        f64::from_bits(0xbfe32d176b0b3baf),
293    ) * x2;
294    // We're splitting polynomial in two parts, since first term dominates
295    // we compute: (a0_lo + a0_hi) * x + x * (a1 * x^2 + a2 + x^4) ...
296    let sin_lo = backend.dd_fma(f64::from_bits(0x3ca1a5c04563817a), x, sin_lop * x);
297    let sin_hi = x * f64::from_bits(0x400921fb54442d18);
298
299    /*
300       cospi(pi*x) poly generated by Sollya:
301       d = [0, 0.015625];
302       f_cos = cos(y*pi);
303       Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, D...|], d, relative, floating);
304       See ./notes/cospi.sollya
305    */
306    let p = backend.polyeval3(
307        x2,
308        f64::from_bits(0xc013bd3cc9be45cf),
309        f64::from_bits(0x40103c1f08085ad1),
310        f64::from_bits(0xbff55d1e43463fc3),
311    );
312
313    // We're splitting polynomial in two parts, since first term dominates
314    // we compute: (a0_lo + a0_hi) + (a1 * x^2 + a2 + x^4)...
315    let cos_lo = backend.dd_fma(p, x2, f64::from_bits(0xbbdf72adefec0800));
316    let cos_hi = f64::from_bits(0x3ff0000000000000);
317
318    let err = backend.fma(
319        x2,
320        f64::from_bits(0x3cb0000000000000), // 2^-52
321        f64::from_bits(0x3c40000000000000), // 2^-59
322    );
323    SinCos {
324        v_sin: DoubleDouble::from_exact_add(sin_hi, sin_lo),
325        v_cos: DoubleDouble::from_exact_add(cos_hi, cos_lo),
326        err,
327    }
328}
329
330#[inline(always)]
331pub(crate) fn sincospi_eval_dd<B: SinCosPiBackend>(x: f64, backend: &B) -> SinCos {
332    let x2 = backend.exact_mult(x, x);
333    // Sin coeffs
334    // poly sin(pi*x) generated by Sollya:
335    // d = [0, 0.0078128];
336    // f_sin = sin(y*pi)/y;
337    // Q = fpminimax(f_sin, [|0, 2, 4, 6, 8|], [|107...|], d, relative, floating);
338    // see ./notes/sinpi_dd.sollya
339    const SC: [(u64, u64); 5] = [
340        (0x3ca1a626330ccf19, 0x400921fb54442d18),
341        (0x3cb05540f6323de9, 0xc014abbce625be53),
342        (0xbc9050fdd1229756, 0x400466bc6775aadf),
343        (0xbc780d406f3472e8, 0xbfe32d2cce5a7bf1),
344        (0x3c4cfcf8b6b817f2, 0x3fb5077069d8a182),
345    ];
346
347    let mut sin_y = backend.quick_mul_add(
348        x2,
349        DoubleDouble::from_bit_pair(SC[4]),
350        DoubleDouble::from_bit_pair(SC[3]),
351    );
352    sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[2]));
353    sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[1]));
354    sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[0]));
355    sin_y = backend.quick_mult_f64(sin_y, x);
356
357    // Cos coeffs
358    // d = [0, 0.0078128];
359    // f_cos = cos(y*pi);
360    // Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107...|], d, relative, floating);
361    // See ./notes/cospi_dd.sollya
362    const CC: [(u64, u64); 5] = [
363        (0xbaaa70a580000000, 0x3ff0000000000000),
364        (0xbcb69211d8dd1237, 0xc013bd3cc9be45de),
365        (0xbcbd96cfd637eeb7, 0x40103c1f081b5abf),
366        (0x3c994d75c577f029, 0xbff55d3c7e2e4ba5),
367        (0xbc5c542d998a4e48, 0x3fce1f2f5f747411),
368    ];
369
370    let mut cos_y = backend.quick_mul_add(
371        x2,
372        DoubleDouble::from_bit_pair(CC[4]),
373        DoubleDouble::from_bit_pair(CC[3]),
374    );
375    cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[2]));
376    cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[1]));
377    cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[0]));
378    SinCos {
379        v_sin: sin_y,
380        v_cos: cos_y,
381        err: 0.,
382    }
383}
384
385#[cold]
386#[inline(always)]
387fn sinpi_dd<B: SinCosPiBackend>(
388    x: f64,
389    sin_k: DoubleDouble,
390    cos_k: DoubleDouble,
391    backend: &B,
392) -> f64 {
393    let r_sincos = sincospi_eval_dd(x, backend);
394    let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
395    let rr = backend.mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y);
396    rr.to_f64()
397}
398
399#[cold]
400#[inline(always)]
401fn sincospi_dd<B: SinCosPiBackend>(
402    x: f64,
403    sin_sin_k: DoubleDouble,
404    sin_cos_k: DoubleDouble,
405    cos_sin_k: DoubleDouble,
406    cos_cos_k: DoubleDouble,
407    backend: &B,
408) -> (f64, f64) {
409    let r_sincos = sincospi_eval_dd(x, backend);
410
411    let cos_k_sin_y = backend.quick_mult(sin_cos_k, r_sincos.v_sin);
412    let rr_sin = backend.mul_add(sin_sin_k, r_sincos.v_cos, cos_k_sin_y);
413
414    let cos_k_sin_y = backend.quick_mult(cos_cos_k, r_sincos.v_sin);
415    let rr_cos = backend.mul_add(cos_sin_k, r_sincos.v_cos, cos_k_sin_y);
416
417    (rr_sin.to_f64(), rr_cos.to_f64())
418}
419
420// [sincospi_eval] gives precision around 2^-66 what is not enough for DD case this method gives 2^-84
421#[inline]
422fn sincospi_eval_extended(x: f64) -> SinCos {
423    let x2 = DoubleDouble::from_exact_mult(x, x);
424    /*
425        sinpi(pi*x) poly generated by Sollya:
426        d = [0, 0.0078128];
427        f_sin = sin(y*pi)/y;
428        Q = fpminimax(f_sin, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
429        See ./notes/sinpi.sollya
430    */
431    let sin_lop = f_polyeval3(
432        x2.hi,
433        f64::from_bits(0x400466bc67763662),
434        f64::from_bits(0xbfe32d2cce5aad86),
435        f64::from_bits(0x3fb5077099a1f35b),
436    );
437    let mut v_sin = DoubleDouble::mul_f64_add(
438        x2,
439        sin_lop,
440        DoubleDouble::from_bit_pair((0x3cb0553d6ee5e8ec, 0xc014abbce625be53)),
441    );
442    v_sin = DoubleDouble::mul_add(
443        x2,
444        v_sin,
445        DoubleDouble::from_bit_pair((0x3ca1a626330dd130, 0x400921fb54442d18)),
446    );
447    v_sin = DoubleDouble::quick_mult_f64(v_sin, x);
448
449    /*
450       cospi(pi*x) poly generated by Sollya:
451       d = [0, 0.015625];
452       f_cos = cos(y*pi);
453       Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
454       See ./notes/cospi_fast_dd.sollya
455    */
456    let p = f_polyeval3(
457        x2.hi,
458        f64::from_bits(0x40103c1f081b5abf),
459        f64::from_bits(0xbff55d3c7e2edd89),
460        f64::from_bits(0x3fce1f2fd9d79484),
461    );
462
463    let mut v_cos = DoubleDouble::mul_f64_add(
464        x2,
465        p,
466        DoubleDouble::from_bit_pair((0xbcb69236a9b3ed73, 0xc013bd3cc9be45de)),
467    );
468    v_cos = DoubleDouble::mul_add_f64(x2, v_cos, f64::from_bits(0x3ff0000000000000));
469
470    SinCos {
471        v_sin: DoubleDouble::from_exact_add(v_sin.hi, v_sin.lo),
472        v_cos: DoubleDouble::from_exact_add(v_cos.hi, v_cos.lo),
473        err: 0.,
474    }
475}
476
477pub(crate) fn f_fast_sinpi_dd(x: f64) -> DoubleDouble {
478    let ix = x.to_bits();
479    let ax = ix & 0x7fff_ffff_ffff_ffff;
480    if ax == 0 {
481        return DoubleDouble::new(0., 0.);
482    }
483    let e: i32 = (ax >> 52) as i32;
484    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
485    let sgn: i64 = (ix as i64) >> 63;
486    let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
487    let mut s: i32 = 1063i32.wrapping_sub(e);
488    if s < 0 {
489        s = -s - 1;
490        if s > 10 {
491            return DoubleDouble::new(0., f64::copysign(0.0, x));
492        }
493        let iq: u64 = (m as u64).wrapping_shl(s as u32);
494        if (iq & 2047) == 0 {
495            return DoubleDouble::new(0., f64::copysign(0.0, x));
496        }
497    }
498
499    if ax <= 0x3fa2000000000000u64 {
500        // |x| <= 0.03515625
501        const PI: DoubleDouble = DoubleDouble::new(
502            f64::from_bits(0x3ca1a62633145c07),
503            f64::from_bits(0x400921fb54442d18),
504        );
505
506        if ax < 0x3c90000000000000 {
507            // for x near zero, sinpi(x) = pi*x + O(x^3), thus worst cases are those
508            // of the function pi*x, and if x is a worst case, then 2*x is another
509            // one in the next binade. For this reason, worst cases are only included
510            // for the binade [2^-1022, 2^-1021). For larger binades,
511            // up to [2^-54,2^-53), worst cases should be deduced by multiplying
512            // by some power of 2.
513            if ax < 0x0350000000000000 {
514                let t = x * f64::from_bits(0x4690000000000000);
515                let z = DoubleDouble::quick_mult_f64(PI, t);
516                let r = z.to_f64();
517                let rs = r * f64::from_bits(0x3950000000000000);
518                let rt = rs * f64::from_bits(0x4690000000000000);
519                return DoubleDouble::new(
520                    0.,
521                    dyad_fmla((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs),
522                );
523            }
524            let z = DoubleDouble::quick_mult_f64(PI, x);
525            return z;
526        }
527
528        /*
529           Poly generated by Sollya:
530           d = [0, 0.03515625];
531           f_sin = sin(y*pi)/y;
532           Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, 107, D...|], d, relative, floating);
533
534           See ./notes/sinpi_zero_fast_dd.sollya
535        */
536        const C: [u64; 4] = [
537            0xbfe32d2cce62bd85,
538            0x3fb50783487eb73d,
539            0xbf7e3074f120ad1f,
540            0x3f3e8d9011340e5a,
541        ];
542
543        let x2 = DoubleDouble::from_exact_mult(x, x);
544
545        const C_PI: DoubleDouble =
546            DoubleDouble::from_bit_pair((0x3ca1a626331457a4, 0x400921fb54442d18));
547
548        let p = f_polyeval4(
549            x2.hi,
550            f64::from_bits(C[0]),
551            f64::from_bits(C[1]),
552            f64::from_bits(C[2]),
553            f64::from_bits(C[3]),
554        );
555        let mut r = DoubleDouble::mul_f64_add(
556            x2,
557            p,
558            DoubleDouble::from_bit_pair((0xbc96dd7ae221e58c, 0x400466bc6775aae2)),
559        );
560        r = DoubleDouble::mul_add(
561            x2,
562            r,
563            DoubleDouble::from_bit_pair((0x3cb05511c8a6c478, 0xc014abbce625be53)),
564        );
565        r = DoubleDouble::mul_add(r, x2, C_PI);
566        r = DoubleDouble::quick_mult_f64(r, x);
567        let k = DoubleDouble::from_exact_add(r.hi, r.lo);
568        return k;
569    }
570
571    let si = e.wrapping_sub(1011);
572    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
573        // x is integer or half-integer
574        if (m0.wrapping_shl(si as u32)) == 0 {
575            return DoubleDouble::new(0., f64::copysign(0.0, x)); // x is integer
576        }
577        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
578        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
579        return DoubleDouble::new(
580            0.,
581            if t == 0 {
582                f64::copysign(1.0, x)
583            } else {
584                -f64::copysign(1.0, x)
585            },
586        );
587    }
588
589    let (y, k) = reduce_pi_64(x);
590
591    // // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
592    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
593    let cos_k = DoubleDouble::from_bit_pair(
594        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
595    );
596
597    let r_sincos = sincospi_eval_extended(y);
598
599    let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
600    let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
601
602    // sin_k_cos_y is always >> cos_k_sin_y
603    let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
604    rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
605    DoubleDouble::from_exact_add(rr.hi, rr.lo)
606}
607
608#[inline(always)]
609fn sinpi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> f64 {
610    let ix = x.to_bits();
611    let ax = ix & 0x7fff_ffff_ffff_ffff;
612    if ax == 0 {
613        return x;
614    }
615    let e: i32 = (ax >> 52) as i32;
616    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
617    let sgn: i64 = (ix as i64) >> 63;
618    let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
619    let mut s: i32 = 1063i32.wrapping_sub(e);
620    if s < 0 {
621        if e == 0x7ff {
622            if (ix << 12) == 0 {
623                return f64::NAN;
624            }
625            return x + x; // case x=NaN
626        }
627        s = -s - 1;
628        if s > 10 {
629            return f64::copysign(0.0, x);
630        }
631        let iq: u64 = (m as u64).wrapping_shl(s as u32);
632        if (iq & 2047) == 0 {
633            return f64::copysign(0.0, x);
634        }
635    }
636
637    if ax <= 0x3fa2000000000000u64 {
638        // |x| <= 0.03515625
639        const PI: DoubleDouble = DoubleDouble::new(
640            f64::from_bits(0x3ca1a62633145c07),
641            f64::from_bits(0x400921fb54442d18),
642        );
643
644        if ax < 0x3c90000000000000 {
645            // for x near zero, sinpi(x) = pi*x + O(x^3), thus worst cases are those
646            // of the function pi*x, and if x is a worst case, then 2*x is another
647            // one in the next binade. For this reason, worst cases are only included
648            // for the binade [2^-1022, 2^-1021). For larger binades,
649            // up to [2^-54,2^-53), worst cases should be deduced by multiplying
650            // by some power of 2.
651            if ax < 0x0350000000000000 {
652                let t = x * f64::from_bits(0x4690000000000000);
653                let z = backend.quick_mult_f64(PI, t);
654                let r = z.to_f64();
655                let rs = r * f64::from_bits(0x3950000000000000);
656                let rt = rs * f64::from_bits(0x4690000000000000);
657                return backend.dyad_fma(
658                    (z.hi - rt) + z.lo,
659                    f64::from_bits(0x3950000000000000),
660                    rs,
661                );
662            }
663            let z = backend.quick_mult_f64(PI, x);
664            return z.to_f64();
665        }
666
667        /*
668           Poly generated by Sollya:
669           d = [0, 0.03515625];
670           f_sin = sin(y*pi)/y;
671           Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating);
672
673           See ./notes/sinpi_zero.sollya
674        */
675
676        let x2 = x * x;
677        let x3 = x2 * x;
678        let x4 = x2 * x2;
679
680        let eps = x * backend.fma(
681            x2,
682            f64::from_bits(0x3d00000000000000), // 2^-47
683            f64::from_bits(0x3bd0000000000000), // 2^-66
684        );
685
686        const C: [u64; 4] = [
687            0xc014abbce625be51,
688            0x400466bc67754b46,
689            0xbfe32d2cc12a51f4,
690            0x3fb5060540058476,
691        ];
692
693        const C_PI: DoubleDouble =
694            DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18));
695
696        let mut z = backend.quick_mult_f64(C_PI, x);
697
698        let zl0 = backend.fma(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
699        let zl1 = backend.fma(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
700
701        z.lo = backend.fma(x3, backend.fma(x4, zl1, zl0), z.lo);
702        let lb = z.hi + (z.lo - eps);
703        let ub = z.hi + (z.lo + eps);
704        if lb == ub {
705            return lb;
706        }
707        return as_sinpi_zero(x, &backend);
708    }
709
710    let si = e.wrapping_sub(1011);
711    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
712        // x is integer or half-integer
713        if (m0.wrapping_shl(si as u32)) == 0 {
714            return f64::copysign(0.0, x); // x is integer
715        }
716        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
717        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
718        return if t == 0 {
719            f64::copysign(1.0, x)
720        } else {
721            -f64::copysign(1.0, x)
722        };
723    }
724
725    let (y, k) = backend.arg_reduce_pi_64(x);
726
727    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
728    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
729    let cos_k = DoubleDouble::from_bit_pair(
730        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
731    );
732
733    let r_sincos = sincospi_eval(y, &backend);
734
735    let sin_k_cos_y = backend.quick_mult(sin_k, r_sincos.v_cos);
736    let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
737
738    // sin_k_cos_y is always >> cos_k_sin_y
739    let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
740    rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
741
742    let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR);
743    let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR);
744
745    if ub == lb {
746        return rr.to_f64();
747    }
748    sinpi_dd(y, sin_k, cos_k, &backend)
749}
750
751#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
752#[target_feature(enable = "avx", enable = "fma")]
753unsafe fn sinpi_fma_impl(x: f64) -> f64 {
754    sinpi_gen_impl(x, FmaSinCosPiBackend {})
755}
756
757/// Computes sin(PI*x)
758///
759/// Max ULP 0.5
760pub fn f_sinpi(x: f64) -> f64 {
761    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
762    {
763        sinpi_gen_impl(x, GenSinCosPiBackend {})
764    }
765    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
766    {
767        use std::sync::OnceLock;
768        static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
769        let q = EXECUTOR.get_or_init(|| {
770            if std::arch::is_x86_feature_detected!("avx")
771                && std::arch::is_x86_feature_detected!("fma")
772            {
773                sinpi_fma_impl
774            } else {
775                fn def_sinpi(x: f64) -> f64 {
776                    sinpi_gen_impl(x, GenSinCosPiBackend {})
777                }
778                def_sinpi
779            }
780        });
781        unsafe { q(x) }
782    }
783}
784
785#[inline(always)]
786fn cospi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> f64 {
787    let ix = x.to_bits();
788    let ax = ix & 0x7fff_ffff_ffff_ffff;
789    if ax == 0 {
790        return 1.0;
791    }
792    let e: i32 = (ax >> 52) as i32;
793    // e is the unbiased exponent, we have 2^(e-1023) <= |x| < 2^(e-1022)
794    let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64;
795    let mut s = 1063i32.wrapping_sub(e); // 2^(40-s) <= |x| < 2^(41-s)
796    if s < 0 {
797        // |x| >= 2^41
798        if e == 0x7ff {
799            // NaN or Inf
800            if ix.wrapping_shl(12) == 0 {
801                return f64::NAN;
802            }
803            return x + x; // NaN
804        }
805        s = -s - 1; // now 2^(41+s) <= |x| < 2^(42+s)
806        if s > 11 {
807            return 1.0;
808        } // |x| >= 2^53
809        let iq: u64 = (m as u64).wrapping_shl(s as u32).wrapping_add(1024);
810        if (iq & 2047) == 0 {
811            return 0.0;
812        }
813    }
814    if ax <= 0x3f30000000000000u64 {
815        // |x| <= 2^-12, |x| <= 0.000244140625
816        if ax <= 0x3e2ccf6429be6621u64 {
817            return 1.0 - f64::from_bits(0x3c80000000000000);
818        }
819        let x2 = x * x;
820        let x4 = x2 * x2;
821        let eps = x2 * f64::from_bits(0x3cfa000000000000);
822
823        /*
824            Generated by Sollya:
825            d = [0, 0.000244140625];
826            f_cos = cos(y*pi);
827            Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
828
829            See ./notes/cospi.sollya
830        */
831
832        const C: [u64; 4] = [
833            0xc013bd3cc9be45de,
834            0x40103c1f081b5ac4,
835            0xbff55d3c7ff79b60,
836            0x3fd24c7b6f7d0690,
837        ];
838
839        let p0 = backend.fma(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
840        let p1 = backend.fma(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
841
842        let p = x2 * backend.fma(x4, p0, p1);
843        let lb = (p - eps) + 1.;
844        let ub = (p + eps) + 1.;
845        if lb == ub {
846            return lb;
847        }
848        return as_cospi_zero(x, &backend);
849    }
850
851    let si: i32 = e.wrapping_sub(1011);
852    if si >= 0 && ((m as u64).wrapping_shl(si as u32) ^ 0x8000000000000000u64) == 0 {
853        return 0.0;
854    }
855
856    let (y, k) = backend.arg_reduce_pi_64(x);
857
858    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
859    let msin_k = DoubleDouble::from_bit_pair(
860        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(64) & 127) as usize],
861    );
862    let cos_k = DoubleDouble::from_bit_pair(
863        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
864    );
865
866    let r_sincos = sincospi_eval(y, &backend);
867
868    let cos_k_cos_y = backend.quick_mult(r_sincos.v_cos, cos_k);
869    let cos_k_msin_y = backend.quick_mult(r_sincos.v_sin, msin_k);
870
871    // cos_k_cos_y is always >> cos_k_msin_y
872    let mut rr = DoubleDouble::from_exact_add(cos_k_cos_y.hi, cos_k_msin_y.hi);
873    rr.lo += cos_k_cos_y.lo + cos_k_msin_y.lo;
874
875    let ub = rr.hi + (rr.lo + r_sincos.err); // (rr.lo + ERR);
876    let lb = rr.hi + (rr.lo - r_sincos.err); // (rr.lo - ERR);
877
878    if ub == lb {
879        return rr.to_f64();
880    }
881    sinpi_dd(y, cos_k, msin_k, &backend)
882}
883
884#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
885#[target_feature(enable = "avx", enable = "fma")]
886unsafe fn cospi_fma_impl(x: f64) -> f64 {
887    cospi_gen_impl(x, FmaSinCosPiBackend {})
888}
889
890/// Computes cos(PI*x)
891///
892/// Max found ULP 0.5
893pub fn f_cospi(x: f64) -> f64 {
894    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
895    {
896        cospi_gen_impl(x, GenSinCosPiBackend {})
897    }
898    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
899    {
900        use std::sync::OnceLock;
901        static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
902        let q = EXECUTOR.get_or_init(|| {
903            if std::arch::is_x86_feature_detected!("avx")
904                && std::arch::is_x86_feature_detected!("fma")
905            {
906                cospi_fma_impl
907            } else {
908                fn def_cospi(x: f64) -> f64 {
909                    cospi_gen_impl(x, GenSinCosPiBackend {})
910                }
911                def_cospi
912            }
913        });
914        unsafe { q(x) }
915    }
916}
917
918#[inline(always)]
919fn sincospi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> (f64, f64) {
920    let ix = x.to_bits();
921    let ax = ix & 0x7fff_ffff_ffff_ffff;
922    if ax == 0 {
923        return (x, 1.0);
924    }
925    let e: i32 = (ax >> 52) as i32;
926    // e is the unbiased exponent, we have 2^(e-1023) <= |x| < 2^(e-1022)
927    let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
928    let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64;
929    let mut s = 1063i32.wrapping_sub(e); // 2^(40-s) <= |x| < 2^(41-s)
930    if s < 0 {
931        // |x| >= 2^41
932        if e == 0x7ff {
933            // NaN or Inf
934            if ix.wrapping_shl(12) == 0 {
935                return (f64::NAN, f64::NAN);
936            }
937            return (x + x, x + x); // NaN
938        }
939        s = -s - 1;
940        if s > 10 {
941            static CF: [f64; 2] = [1., -1.];
942            let is_odd = backend.odd_integer(f64::from_bits(ax));
943            let cos_x = CF[is_odd as usize];
944            return (f64::copysign(0.0, x), cos_x);
945        } // |x| >= 2^53
946        let iq: u64 = (m as u64).wrapping_shl(s as u32);
947
948        // sinpi = 0 when multiple of 2048
949        let sin_zero = (iq & 2047) == 0;
950
951        // cospi = 0 when offset-by-half multiple of 2048
952        let cos_zero = ((m as u64).wrapping_shl(s as u32).wrapping_add(1024) & 2047) == 0;
953
954        if sin_zero && cos_zero {
955            // both zero (only possible if NaN or something degenerate)
956        } else if sin_zero {
957            static CF: [f64; 2] = [1., -1.];
958            let is_odd = backend.odd_integer(f64::from_bits(ax));
959            let cos_x = CF[is_odd as usize];
960            return (0.0, cos_x); // sin = 0, cos = ±1
961        } else if cos_zero {
962            // x = k / 2 * PI
963            let si = e.wrapping_sub(1011);
964            let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
965            // making sin decision based on quadrant
966            return if t == 0 {
967                (f64::copysign(1.0, x), 0.0)
968            } else {
969                (-f64::copysign(1.0, x), 0.0)
970            }; // sin = ±1, cos = 0
971        }
972    }
973
974    if ax <= 0x3f30000000000000u64 {
975        // |x| <= 2^-12, |x| <= 0.000244140625
976        if ax <= 0x3c90000000000000u64 {
977            const PI: DoubleDouble = DoubleDouble::new(
978                f64::from_bits(0x3ca1a62633145c07),
979                f64::from_bits(0x400921fb54442d18),
980            );
981            let sin_x = if ax < 0x0350000000000000 {
982                let t = x * f64::from_bits(0x4690000000000000);
983                let z = backend.quick_mult_f64(PI, t);
984                let r = z.to_f64();
985                let rs = r * f64::from_bits(0x3950000000000000);
986                let rt = rs * f64::from_bits(0x4690000000000000);
987                backend.dyad_fma((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs)
988            } else {
989                let z = backend.quick_mult_f64(PI, x);
990                z.to_f64()
991            };
992            return (sin_x, 1.0 - f64::from_bits(0x3c80000000000000));
993        }
994        let x2 = x * x;
995        let x4 = x2 * x2;
996        let cos_eps = x2 * f64::from_bits(0x3cfa000000000000);
997
998        /*
999            Generated by Sollya:
1000            d = [0, 0.000244140625];
1001            f_cos = cos(y*pi);
1002            Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|107, 107, D...|], d, relative, floating);
1003
1004            See ./notes/cospi.sollya
1005        */
1006
1007        const COS_C: [u64; 4] = [
1008            0xc013bd3cc9be45de,
1009            0x40103c1f081b5ac4,
1010            0xbff55d3c7ff79b60,
1011            0x3fd24c7b6f7d0690,
1012        ];
1013
1014        let p0 = backend.fma(x2, f64::from_bits(COS_C[3]), f64::from_bits(COS_C[2]));
1015        let p1 = backend.fma(x2, f64::from_bits(COS_C[1]), f64::from_bits(COS_C[0]));
1016
1017        let p = x2 * backend.fma(x4, p0, p1);
1018        let cos_lb = (p - cos_eps) + 1.;
1019        let cos_ub = (p + cos_eps) + 1.;
1020        let cos_x = if cos_lb == cos_ub {
1021            cos_lb
1022        } else {
1023            as_cospi_zero(x, &backend)
1024        };
1025
1026        /*
1027            Poly generated by Sollya:
1028            d = [0, 0.03515625];
1029            f_sin = sin(y*pi)/y;
1030            Q = fpminimax(f_sin, [|0, 2, 4, 6, 8, 10|], [|107, D...|], d, relative, floating);
1031
1032            See ./notes/sinpi_zero.sollya
1033        */
1034
1035        const SIN_C: [u64; 4] = [
1036            0xc014abbce625be51,
1037            0x400466bc67754b46,
1038            0xbfe32d2cc12a51f4,
1039            0x3fb5060540058476,
1040        ];
1041
1042        const C_PI: DoubleDouble =
1043            DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18));
1044
1045        let mut z = backend.quick_mult_f64(C_PI, x);
1046
1047        let x3 = x2 * x;
1048
1049        let zl0 = backend.fma(x2, f64::from_bits(SIN_C[1]), f64::from_bits(SIN_C[0]));
1050        let zl1 = backend.fma(x2, f64::from_bits(SIN_C[3]), f64::from_bits(SIN_C[2]));
1051
1052        let sin_eps = x * backend.fma(
1053            x2,
1054            f64::from_bits(0x3d00000000000000), // 2^-47
1055            f64::from_bits(0x3bd0000000000000), // 2^-66
1056        );
1057
1058        z.lo = backend.fma(x3, backend.fma(x4, zl1, zl0), z.lo);
1059        let sin_lb = z.hi + (z.lo - sin_eps);
1060        let sin_ub = z.hi + (z.lo + sin_eps);
1061        let sin_x = if sin_lb == sin_ub {
1062            sin_lb
1063        } else {
1064            as_sinpi_zero(x, &backend)
1065        };
1066        return (sin_x, cos_x);
1067    }
1068
1069    let si = e.wrapping_sub(1011);
1070    if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
1071        // x is integer or half-integer
1072        if (m0.wrapping_shl(si as u32)) == 0 {
1073            static CF: [f64; 2] = [1., -1.];
1074            let is_odd = backend.odd_integer(f64::from_bits(ax));
1075            let cos_x = CF[is_odd as usize];
1076            return (f64::copysign(0.0, x), cos_x); // x is integer
1077        }
1078        // x is half-integer
1079        let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
1080        // t = 0 if |x| = 1/2 mod 2, t = 1 if |x| = 3/2 mod 2
1081        return if t == 0 {
1082            (f64::copysign(1.0, x), 0.0)
1083        } else {
1084            (-f64::copysign(1.0, x), 0.0)
1085        };
1086    }
1087
1088    let (y, k) = backend.arg_reduce_pi_64(x);
1089
1090    // cos(k * pi/64) = sin(k * pi/64 + pi/2) = sin((k + 32) * pi/64).
1091    let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
1092    let cos_k = DoubleDouble::from_bit_pair(
1093        SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
1094    );
1095    let msin_k = -sin_k;
1096
1097    let r_sincos = sincospi_eval(y, &backend);
1098
1099    let sin_k_cos_y = backend.quick_mult(sin_k, r_sincos.v_cos);
1100    let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
1101
1102    let cos_k_cos_y = backend.quick_mult(r_sincos.v_cos, cos_k);
1103    let msin_k_sin_y = backend.quick_mult(r_sincos.v_sin, msin_k);
1104
1105    // sin_k_cos_y is always >> cos_k_sin_y
1106    let mut rr_sin = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
1107    rr_sin.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
1108
1109    let sin_ub = rr_sin.hi + (rr_sin.lo + r_sincos.err); // (rr.lo + ERR);
1110    let sin_lb = rr_sin.hi + (rr_sin.lo - r_sincos.err); // (rr.lo - ERR);
1111
1112    let mut rr_cos = DoubleDouble::from_exact_add(cos_k_cos_y.hi, msin_k_sin_y.hi);
1113    rr_cos.lo += cos_k_cos_y.lo + msin_k_sin_y.lo;
1114
1115    let cos_ub = rr_cos.hi + (rr_cos.lo + r_sincos.err); // (rr.lo + ERR);
1116    let cos_lb = rr_cos.hi + (rr_cos.lo - r_sincos.err); // (rr.lo - ERR);
1117
1118    if sin_ub == sin_lb && cos_lb == cos_ub {
1119        return (rr_sin.to_f64(), rr_cos.to_f64());
1120    }
1121
1122    sincospi_dd(y, sin_k, cos_k, cos_k, msin_k, &backend)
1123}
1124
1125#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1126#[target_feature(enable = "avx", enable = "fma")]
1127unsafe fn sincospi_fma_impl(x: f64) -> (f64, f64) {
1128    sincospi_gen_impl(x, FmaSinCosPiBackend {})
1129}
1130
1131/// Computes sin(PI*x) and cos(PI*x)
1132///
1133/// Max found ULP 0.5
1134pub fn f_sincospi(x: f64) -> (f64, f64) {
1135    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
1136    {
1137        sincospi_gen_impl(x, GenSinCosPiBackend {})
1138    }
1139    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1140    {
1141        use std::sync::OnceLock;
1142        static EXECUTOR: OnceLock<unsafe fn(f64) -> (f64, f64)> = OnceLock::new();
1143        let q = EXECUTOR.get_or_init(|| {
1144            if std::arch::is_x86_feature_detected!("avx")
1145                && std::arch::is_x86_feature_detected!("fma")
1146            {
1147                sincospi_fma_impl
1148            } else {
1149                fn def_sincospi(x: f64) -> (f64, f64) {
1150                    sincospi_gen_impl(x, GenSinCosPiBackend {})
1151                }
1152                def_sincospi
1153            }
1154        });
1155        unsafe { q(x) }
1156    }
1157}
1158
1159#[cfg(test)]
1160mod tests {
1161    use super::*;
1162
1163    #[test]
1164    fn test_sinpi() {
1165        assert_eq!(f_sinpi(262143.50006870925), -0.9999999767029883);
1166        assert_eq!(f_sinpi(7124076477593855.), 0.);
1167        assert_eq!(f_sinpi(-11235582092889474000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.), -0.);
1168        assert_eq!(f_sinpi(-2.7430620343968443e303), -0.0);
1169        assert_eq!(f_sinpi(0.00003195557007273919), 0.00010039138401316004);
1170        assert_eq!(f_sinpi(-0.038357843137253766), -0.12021328061499763);
1171        assert_eq!(f_sinpi(1.0156097449358867), -0.04901980680173724);
1172        assert_eq!(f_sinpi(74.8593852519989), 0.42752597787896457);
1173        assert_eq!(f_sinpi(0.500091552734375), 0.9999999586369661);
1174        assert_eq!(f_sinpi(0.5307886532952182), 0.9953257438106751);
1175        assert_eq!(f_sinpi(3.1415926535897936), -0.43030121700009316);
1176        assert_eq!(f_sinpi(-0.5305172747685276), -0.9954077178320563);
1177        assert_eq!(f_sinpi(-0.03723630312089732), -0.1167146713267927);
1178        assert_eq!(
1179            f_sinpi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022946074000077123),
1180            0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007208721750737005
1181        );
1182        assert_eq!(
1183            f_sinpi(0.000000000000000000000000000000000000007413093439574428),
1184            2.3288919890141717e-38
1185        );
1186        assert_eq!(f_sinpi(0.0031909299901270445), 0.0100244343161398578);
1187        assert_eq!(f_sinpi(0.11909245901270445), 0.36547215190661003);
1188        assert_eq!(f_sinpi(0.99909245901270445), 0.0028511202357662186);
1189        assert!(f_sinpi(f64::INFINITY).is_nan());
1190        assert!(f_sinpi(f64::NEG_INFINITY).is_nan());
1191        assert!(f_sinpi(f64::NAN).is_nan());
1192    }
1193
1194    #[test]
1195    fn test_sincospi() {
1196        let v0 = f_sincospi(1.0156097449358867);
1197        assert_eq!(v0.0, f_sinpi(1.0156097449358867));
1198        assert_eq!(v0.1, f_cospi(1.0156097449358867));
1199
1200        let v1 = f_sincospi(4503599627370496.);
1201        assert_eq!(v1.0, f_sinpi(4503599627370496.));
1202        assert_eq!(v1.1, f_cospi(4503599627370496.));
1203
1204        let v1 = f_sincospi(-108.);
1205        assert_eq!(v1.0, f_sinpi(-108.));
1206        assert_eq!(v1.1, f_cospi(-108.));
1207
1208        let v1 = f_sincospi(3.);
1209        assert_eq!(v1.0, f_sinpi(3.));
1210        assert_eq!(v1.1, f_cospi(3.));
1211
1212        let v1 = f_sincospi(13.5);
1213        assert_eq!(v1.0, f_sinpi(13.5));
1214        assert_eq!(v1.1, f_cospi(13.5));
1215
1216        let v1 = f_sincospi(7124076477593855.);
1217        assert_eq!(v1.0, f_sinpi(7124076477593855.));
1218        assert_eq!(v1.1, f_cospi(7124076477593855.));
1219
1220        let v1 = f_sincospi(2533419148247186.5);
1221        assert_eq!(v1.0, f_sinpi(2533419148247186.5));
1222        assert_eq!(v1.1, f_cospi(2533419148247186.5));
1223
1224        let v1 = f_sincospi(2.2250653705240375E-308);
1225        assert_eq!(v1.0, f_sinpi(2.2250653705240375E-308));
1226        assert_eq!(v1.1, f_cospi(2.2250653705240375E-308));
1227
1228        let v1 = f_sincospi(2533420818956351.);
1229        assert_eq!(v1.0, f_sinpi(2533420818956351.));
1230        assert_eq!(v1.1, f_cospi(2533420818956351.));
1231
1232        let v1 = f_sincospi(2533822406803233.5);
1233        assert_eq!(v1.0, f_sinpi(2533822406803233.5));
1234        assert_eq!(v1.1, f_cospi(2533822406803233.5));
1235
1236        let v1 = f_sincospi(-3040685725640478.5);
1237        assert_eq!(v1.0, f_sinpi(-3040685725640478.5));
1238        assert_eq!(v1.1, f_cospi(-3040685725640478.5));
1239
1240        let v1 = f_sincospi(2533419148247186.5);
1241        assert_eq!(v1.0, f_sinpi(2533419148247186.5));
1242        assert_eq!(v1.1, f_cospi(2533419148247186.5));
1243
1244        let v1 = f_sincospi(2533420819267583.5);
1245        assert_eq!(v1.0, f_sinpi(2533420819267583.5));
1246        assert_eq!(v1.1, f_cospi(2533420819267583.5));
1247
1248        let v1 = f_sincospi(6979704728846336.);
1249        assert_eq!(v1.0, f_sinpi(6979704728846336.));
1250        assert_eq!(v1.1, f_cospi(6979704728846336.));
1251
1252        let v1 = f_sincospi(7124076477593855.);
1253        assert_eq!(v1.0, f_sinpi(7124076477593855.));
1254        assert_eq!(v1.1, f_cospi(7124076477593855.));
1255
1256        let v1 = f_sincospi(-0.00000000002728839192371484);
1257        assert_eq!(v1.0, f_sinpi(-0.00000000002728839192371484));
1258        assert_eq!(v1.1, f_cospi(-0.00000000002728839192371484));
1259
1260        let v1 = f_sincospi(0.00002465398569495569);
1261        assert_eq!(v1.0, f_sinpi(0.00002465398569495569));
1262        assert_eq!(v1.1, f_cospi(0.00002465398569495569));
1263    }
1264
1265    #[test]
1266    fn test_cospi() {
1267        assert_eq!(0.9999497540959953, f_cospi(0.0031909299901270445));
1268        assert_eq!(0.9308216542079669, f_cospi(0.11909299901270445));
1269        assert_eq!(-0.1536194873288318, f_cospi(0.54909299901270445));
1270        assert!(f_cospi(f64::INFINITY).is_nan());
1271        assert!(f_cospi(f64::NEG_INFINITY).is_nan());
1272        assert!(f_cospi(f64::NAN).is_nan());
1273    }
1274}