1use crate::common::{dd_fmla, dyad_fmla, f_fmla, is_odd_integer};
30use crate::double_double::DoubleDouble;
31use crate::polyeval::{f_polyeval3, f_polyeval4};
32use crate::rounding::CpuRound;
33use crate::sin::SinCos;
34use crate::sincospi_tables::SINPI_K_PI_OVER_64;
35
36#[cold]
49#[inline(always)]
50fn as_cospi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
51 const C: [(u64, u64); 5] = [
52 (0xbcb692b71366cc04, 0xc013bd3cc9be45de),
53 (0xbcb32b33fb803bd5, 0x40103c1f081b5ac4),
54 (0xbc9f5b752e98b088, 0xbff55d3c7e3cbff9),
55 (0x3c30023d540b9350, 0x3fce1f506446cb66),
56 (0x3c1a5d47937787d2, 0xbf8a9b062a36ba1c),
57 ];
58 let x2 = backend.exact_mult(x, x);
59 let mut p = backend.quick_mul_add(
60 x2,
61 DoubleDouble::from_bit_pair(C[3]),
62 DoubleDouble::from_bit_pair(C[3]),
63 );
64 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
65 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
66 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
67 p = backend.mul_add_f64(x2, p, 1.);
68 p.to_f64()
69}
70
71#[cold]
84#[inline(always)]
85fn as_sinpi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
86 const C: [(u64, u64); 6] = [
87 (0x3ca1a626311d9056, 0x400921fb54442d18),
88 (0x3cb055f12c462211, 0xc014abbce625be53),
89 (0xbc9789ea63534250, 0x400466bc6775aae1),
90 (0xbc78b86de6962184, 0xbfe32d2cce62874e),
91 (0x3c4eddf7cd887302, 0x3fb507833e2b781f),
92 (0x3bf180c9d4af2894, 0xbf7e2ea4e143707e),
93 ];
94 let x2 = backend.exact_mult(x, x);
95 let mut p = backend.quick_mul_add(
96 x2,
97 DoubleDouble::from_bit_pair(C[5]),
98 DoubleDouble::from_bit_pair(C[4]),
99 );
100 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
101 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
102 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
103 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
104 p = backend.quick_mult_f64(p, x);
105 p.to_f64()
106}
107
108#[inline]
111pub(crate) fn reduce_pi_64(x: f64) -> (f64, i64) {
112 let kd = (x * 64.).cpu_round();
113 let y = dd_fmla(kd, -1. / 64., x);
114 (y, unsafe {
115 kd.to_int_unchecked::<i64>() })
117}
118
119#[inline(always)]
122#[allow(unused)]
123pub(crate) fn reduce_pi_64_fma(x: f64) -> (f64, i64) {
124 let kd = (x * 64.).round();
125 let y = f64::mul_add(kd, -1. / 64., x);
126 (y, unsafe {
127 kd.to_int_unchecked::<i64>() })
129}
130
131pub(crate) trait SinCosPiBackend {
132 fn fma(&self, x: f64, y: f64, z: f64) -> f64;
133 fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64;
134 fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64;
135 fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64;
136 fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64);
137 fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble;
138 fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble;
139 fn odd_integer(&self, x: f64) -> bool;
140 fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble;
141 fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble;
142 fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble;
143 fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble;
144 fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble;
145}
146
147pub(crate) struct GenSinCosPiBackend {}
148
149impl SinCosPiBackend for GenSinCosPiBackend {
150 #[inline(always)]
151 fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
152 f_fmla(x, y, z)
153 }
154 #[inline(always)]
155 fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64 {
156 dd_fmla(x, y, z)
157 }
158 #[inline(always)]
159 fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64 {
160 dyad_fmla(x, y, z)
161 }
162 #[inline(always)]
163 fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
164 use crate::polyeval::f_polyeval3;
165 f_polyeval3(x, a0, a1, a2)
166 }
167 #[inline(always)]
168 fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64) {
169 reduce_pi_64(x)
170 }
171 #[inline(always)]
172 fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
173 DoubleDouble::quick_mult_f64(x, y)
174 }
175 #[inline(always)]
176 fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
177 DoubleDouble::quick_mult(x, y)
178 }
179
180 #[inline(always)]
181 fn odd_integer(&self, x: f64) -> bool {
182 is_odd_integer(x)
183 }
184
185 #[inline(always)]
186 fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
187 DoubleDouble::div(x, y)
188 }
189
190 #[inline(always)]
191 fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
192 DoubleDouble::mul_add_f64(a, b, c)
193 }
194
195 #[inline(always)]
196 fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
197 DoubleDouble::quick_mul_add(a, b, c)
198 }
199
200 #[inline(always)]
201 fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
202 DoubleDouble::mul_add(a, b, c)
203 }
204
205 #[inline(always)]
206 fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble {
207 DoubleDouble::from_exact_mult(x, y)
208 }
209}
210
211#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
212pub(crate) struct FmaSinCosPiBackend {}
213
214#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
215impl SinCosPiBackend for FmaSinCosPiBackend {
216 #[inline(always)]
217 fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
218 f64::mul_add(x, y, z)
219 }
220 #[inline(always)]
221 fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64 {
222 f64::mul_add(x, y, z)
223 }
224 #[inline(always)]
225 fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64 {
226 f64::mul_add(x, y, z)
227 }
228 #[inline(always)]
229 fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
230 use crate::polyeval::d_polyeval3;
231 d_polyeval3(x, a0, a1, a2)
232 }
233 #[inline(always)]
234 fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64) {
235 reduce_pi_64_fma(x)
236 }
237 #[inline(always)]
238 fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
239 DoubleDouble::quick_mult_f64_fma(x, y)
240 }
241 #[inline(always)]
242 fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
243 DoubleDouble::quick_mult_fma(x, y)
244 }
245
246 #[inline(always)]
247 fn odd_integer(&self, x: f64) -> bool {
248 use crate::common::is_odd_integer_fast;
249 is_odd_integer_fast(x)
250 }
251
252 #[inline(always)]
253 fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
254 DoubleDouble::div_fma(x, y)
255 }
256
257 #[inline(always)]
258 fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
259 DoubleDouble::mul_add_f64_fma(a, b, c)
260 }
261
262 #[inline(always)]
263 fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
264 DoubleDouble::quick_mul_add_fma(a, b, c)
265 }
266
267 #[inline(always)]
268 fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
269 DoubleDouble::mul_add_fma(a, b, c)
270 }
271
272 #[inline(always)]
273 fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble {
274 DoubleDouble::from_exact_mult_fma(x, y)
275 }
276}
277
278#[inline(always)]
279pub(crate) fn sincospi_eval<B: SinCosPiBackend>(x: f64, backend: &B) -> SinCos {
280 let x2 = x * x;
281 let sin_lop = backend.polyeval3(
289 x2,
290 f64::from_bits(0xc014abbce625be4d),
291 f64::from_bits(0x400466bc6767f259),
292 f64::from_bits(0xbfe32d176b0b3baf),
293 ) * x2;
294 let sin_lo = backend.dd_fma(f64::from_bits(0x3ca1a5c04563817a), x, sin_lop * x);
297 let sin_hi = x * f64::from_bits(0x400921fb54442d18);
298
299 let p = backend.polyeval3(
307 x2,
308 f64::from_bits(0xc013bd3cc9be45cf),
309 f64::from_bits(0x40103c1f08085ad1),
310 f64::from_bits(0xbff55d1e43463fc3),
311 );
312
313 let cos_lo = backend.dd_fma(p, x2, f64::from_bits(0xbbdf72adefec0800));
316 let cos_hi = f64::from_bits(0x3ff0000000000000);
317
318 let err = backend.fma(
319 x2,
320 f64::from_bits(0x3cb0000000000000), f64::from_bits(0x3c40000000000000), );
323 SinCos {
324 v_sin: DoubleDouble::from_exact_add(sin_hi, sin_lo),
325 v_cos: DoubleDouble::from_exact_add(cos_hi, cos_lo),
326 err,
327 }
328}
329
330#[inline(always)]
331pub(crate) fn sincospi_eval_dd<B: SinCosPiBackend>(x: f64, backend: &B) -> SinCos {
332 let x2 = backend.exact_mult(x, x);
333 const SC: [(u64, u64); 5] = [
340 (0x3ca1a626330ccf19, 0x400921fb54442d18),
341 (0x3cb05540f6323de9, 0xc014abbce625be53),
342 (0xbc9050fdd1229756, 0x400466bc6775aadf),
343 (0xbc780d406f3472e8, 0xbfe32d2cce5a7bf1),
344 (0x3c4cfcf8b6b817f2, 0x3fb5077069d8a182),
345 ];
346
347 let mut sin_y = backend.quick_mul_add(
348 x2,
349 DoubleDouble::from_bit_pair(SC[4]),
350 DoubleDouble::from_bit_pair(SC[3]),
351 );
352 sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[2]));
353 sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[1]));
354 sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[0]));
355 sin_y = backend.quick_mult_f64(sin_y, x);
356
357 const CC: [(u64, u64); 5] = [
363 (0xbaaa70a580000000, 0x3ff0000000000000),
364 (0xbcb69211d8dd1237, 0xc013bd3cc9be45de),
365 (0xbcbd96cfd637eeb7, 0x40103c1f081b5abf),
366 (0x3c994d75c577f029, 0xbff55d3c7e2e4ba5),
367 (0xbc5c542d998a4e48, 0x3fce1f2f5f747411),
368 ];
369
370 let mut cos_y = backend.quick_mul_add(
371 x2,
372 DoubleDouble::from_bit_pair(CC[4]),
373 DoubleDouble::from_bit_pair(CC[3]),
374 );
375 cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[2]));
376 cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[1]));
377 cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[0]));
378 SinCos {
379 v_sin: sin_y,
380 v_cos: cos_y,
381 err: 0.,
382 }
383}
384
385#[cold]
386#[inline(always)]
387fn sinpi_dd<B: SinCosPiBackend>(
388 x: f64,
389 sin_k: DoubleDouble,
390 cos_k: DoubleDouble,
391 backend: &B,
392) -> f64 {
393 let r_sincos = sincospi_eval_dd(x, backend);
394 let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
395 let rr = backend.mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y);
396 rr.to_f64()
397}
398
399#[cold]
400#[inline(always)]
401fn sincospi_dd<B: SinCosPiBackend>(
402 x: f64,
403 sin_sin_k: DoubleDouble,
404 sin_cos_k: DoubleDouble,
405 cos_sin_k: DoubleDouble,
406 cos_cos_k: DoubleDouble,
407 backend: &B,
408) -> (f64, f64) {
409 let r_sincos = sincospi_eval_dd(x, backend);
410
411 let cos_k_sin_y = backend.quick_mult(sin_cos_k, r_sincos.v_sin);
412 let rr_sin = backend.mul_add(sin_sin_k, r_sincos.v_cos, cos_k_sin_y);
413
414 let cos_k_sin_y = backend.quick_mult(cos_cos_k, r_sincos.v_sin);
415 let rr_cos = backend.mul_add(cos_sin_k, r_sincos.v_cos, cos_k_sin_y);
416
417 (rr_sin.to_f64(), rr_cos.to_f64())
418}
419
420#[inline]
422fn sincospi_eval_extended(x: f64) -> SinCos {
423 let x2 = DoubleDouble::from_exact_mult(x, x);
424 let sin_lop = f_polyeval3(
432 x2.hi,
433 f64::from_bits(0x400466bc67763662),
434 f64::from_bits(0xbfe32d2cce5aad86),
435 f64::from_bits(0x3fb5077099a1f35b),
436 );
437 let mut v_sin = DoubleDouble::mul_f64_add(
438 x2,
439 sin_lop,
440 DoubleDouble::from_bit_pair((0x3cb0553d6ee5e8ec, 0xc014abbce625be53)),
441 );
442 v_sin = DoubleDouble::mul_add(
443 x2,
444 v_sin,
445 DoubleDouble::from_bit_pair((0x3ca1a626330dd130, 0x400921fb54442d18)),
446 );
447 v_sin = DoubleDouble::quick_mult_f64(v_sin, x);
448
449 let p = f_polyeval3(
457 x2.hi,
458 f64::from_bits(0x40103c1f081b5abf),
459 f64::from_bits(0xbff55d3c7e2edd89),
460 f64::from_bits(0x3fce1f2fd9d79484),
461 );
462
463 let mut v_cos = DoubleDouble::mul_f64_add(
464 x2,
465 p,
466 DoubleDouble::from_bit_pair((0xbcb69236a9b3ed73, 0xc013bd3cc9be45de)),
467 );
468 v_cos = DoubleDouble::mul_add_f64(x2, v_cos, f64::from_bits(0x3ff0000000000000));
469
470 SinCos {
471 v_sin: DoubleDouble::from_exact_add(v_sin.hi, v_sin.lo),
472 v_cos: DoubleDouble::from_exact_add(v_cos.hi, v_cos.lo),
473 err: 0.,
474 }
475}
476
477pub(crate) fn f_fast_sinpi_dd(x: f64) -> DoubleDouble {
478 let ix = x.to_bits();
479 let ax = ix & 0x7fff_ffff_ffff_ffff;
480 if ax == 0 {
481 return DoubleDouble::new(0., 0.);
482 }
483 let e: i32 = (ax >> 52) as i32;
484 let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
485 let sgn: i64 = (ix as i64) >> 63;
486 let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
487 let mut s: i32 = 1063i32.wrapping_sub(e);
488 if s < 0 {
489 s = -s - 1;
490 if s > 10 {
491 return DoubleDouble::new(0., f64::copysign(0.0, x));
492 }
493 let iq: u64 = (m as u64).wrapping_shl(s as u32);
494 if (iq & 2047) == 0 {
495 return DoubleDouble::new(0., f64::copysign(0.0, x));
496 }
497 }
498
499 if ax <= 0x3fa2000000000000u64 {
500 const PI: DoubleDouble = DoubleDouble::new(
502 f64::from_bits(0x3ca1a62633145c07),
503 f64::from_bits(0x400921fb54442d18),
504 );
505
506 if ax < 0x3c90000000000000 {
507 if ax < 0x0350000000000000 {
514 let t = x * f64::from_bits(0x4690000000000000);
515 let z = DoubleDouble::quick_mult_f64(PI, t);
516 let r = z.to_f64();
517 let rs = r * f64::from_bits(0x3950000000000000);
518 let rt = rs * f64::from_bits(0x4690000000000000);
519 return DoubleDouble::new(
520 0.,
521 dyad_fmla((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs),
522 );
523 }
524 let z = DoubleDouble::quick_mult_f64(PI, x);
525 return z;
526 }
527
528 const C: [u64; 4] = [
537 0xbfe32d2cce62bd85,
538 0x3fb50783487eb73d,
539 0xbf7e3074f120ad1f,
540 0x3f3e8d9011340e5a,
541 ];
542
543 let x2 = DoubleDouble::from_exact_mult(x, x);
544
545 const C_PI: DoubleDouble =
546 DoubleDouble::from_bit_pair((0x3ca1a626331457a4, 0x400921fb54442d18));
547
548 let p = f_polyeval4(
549 x2.hi,
550 f64::from_bits(C[0]),
551 f64::from_bits(C[1]),
552 f64::from_bits(C[2]),
553 f64::from_bits(C[3]),
554 );
555 let mut r = DoubleDouble::mul_f64_add(
556 x2,
557 p,
558 DoubleDouble::from_bit_pair((0xbc96dd7ae221e58c, 0x400466bc6775aae2)),
559 );
560 r = DoubleDouble::mul_add(
561 x2,
562 r,
563 DoubleDouble::from_bit_pair((0x3cb05511c8a6c478, 0xc014abbce625be53)),
564 );
565 r = DoubleDouble::mul_add(r, x2, C_PI);
566 r = DoubleDouble::quick_mult_f64(r, x);
567 let k = DoubleDouble::from_exact_add(r.hi, r.lo);
568 return k;
569 }
570
571 let si = e.wrapping_sub(1011);
572 if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
573 if (m0.wrapping_shl(si as u32)) == 0 {
575 return DoubleDouble::new(0., f64::copysign(0.0, x)); }
577 let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
578 return DoubleDouble::new(
580 0.,
581 if t == 0 {
582 f64::copysign(1.0, x)
583 } else {
584 -f64::copysign(1.0, x)
585 },
586 );
587 }
588
589 let (y, k) = reduce_pi_64(x);
590
591 let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
593 let cos_k = DoubleDouble::from_bit_pair(
594 SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
595 );
596
597 let r_sincos = sincospi_eval_extended(y);
598
599 let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
600 let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
601
602 let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
604 rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
605 DoubleDouble::from_exact_add(rr.hi, rr.lo)
606}
607
608#[inline(always)]
609fn sinpi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> f64 {
610 let ix = x.to_bits();
611 let ax = ix & 0x7fff_ffff_ffff_ffff;
612 if ax == 0 {
613 return x;
614 }
615 let e: i32 = (ax >> 52) as i32;
616 let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
617 let sgn: i64 = (ix as i64) >> 63;
618 let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
619 let mut s: i32 = 1063i32.wrapping_sub(e);
620 if s < 0 {
621 if e == 0x7ff {
622 if (ix << 12) == 0 {
623 return f64::NAN;
624 }
625 return x + x; }
627 s = -s - 1;
628 if s > 10 {
629 return f64::copysign(0.0, x);
630 }
631 let iq: u64 = (m as u64).wrapping_shl(s as u32);
632 if (iq & 2047) == 0 {
633 return f64::copysign(0.0, x);
634 }
635 }
636
637 if ax <= 0x3fa2000000000000u64 {
638 const PI: DoubleDouble = DoubleDouble::new(
640 f64::from_bits(0x3ca1a62633145c07),
641 f64::from_bits(0x400921fb54442d18),
642 );
643
644 if ax < 0x3c90000000000000 {
645 if ax < 0x0350000000000000 {
652 let t = x * f64::from_bits(0x4690000000000000);
653 let z = backend.quick_mult_f64(PI, t);
654 let r = z.to_f64();
655 let rs = r * f64::from_bits(0x3950000000000000);
656 let rt = rs * f64::from_bits(0x4690000000000000);
657 return backend.dyad_fma(
658 (z.hi - rt) + z.lo,
659 f64::from_bits(0x3950000000000000),
660 rs,
661 );
662 }
663 let z = backend.quick_mult_f64(PI, x);
664 return z.to_f64();
665 }
666
667 let x2 = x * x;
677 let x3 = x2 * x;
678 let x4 = x2 * x2;
679
680 let eps = x * backend.fma(
681 x2,
682 f64::from_bits(0x3d00000000000000), f64::from_bits(0x3bd0000000000000), );
685
686 const C: [u64; 4] = [
687 0xc014abbce625be51,
688 0x400466bc67754b46,
689 0xbfe32d2cc12a51f4,
690 0x3fb5060540058476,
691 ];
692
693 const C_PI: DoubleDouble =
694 DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18));
695
696 let mut z = backend.quick_mult_f64(C_PI, x);
697
698 let zl0 = backend.fma(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
699 let zl1 = backend.fma(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
700
701 z.lo = backend.fma(x3, backend.fma(x4, zl1, zl0), z.lo);
702 let lb = z.hi + (z.lo - eps);
703 let ub = z.hi + (z.lo + eps);
704 if lb == ub {
705 return lb;
706 }
707 return as_sinpi_zero(x, &backend);
708 }
709
710 let si = e.wrapping_sub(1011);
711 if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
712 if (m0.wrapping_shl(si as u32)) == 0 {
714 return f64::copysign(0.0, x); }
716 let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
717 return if t == 0 {
719 f64::copysign(1.0, x)
720 } else {
721 -f64::copysign(1.0, x)
722 };
723 }
724
725 let (y, k) = backend.arg_reduce_pi_64(x);
726
727 let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
729 let cos_k = DoubleDouble::from_bit_pair(
730 SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
731 );
732
733 let r_sincos = sincospi_eval(y, &backend);
734
735 let sin_k_cos_y = backend.quick_mult(sin_k, r_sincos.v_cos);
736 let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
737
738 let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
740 rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
741
742 let ub = rr.hi + (rr.lo + r_sincos.err); let lb = rr.hi + (rr.lo - r_sincos.err); if ub == lb {
746 return rr.to_f64();
747 }
748 sinpi_dd(y, sin_k, cos_k, &backend)
749}
750
751#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
752#[target_feature(enable = "avx", enable = "fma")]
753unsafe fn sinpi_fma_impl(x: f64) -> f64 {
754 sinpi_gen_impl(x, FmaSinCosPiBackend {})
755}
756
757pub fn f_sinpi(x: f64) -> f64 {
761 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
762 {
763 sinpi_gen_impl(x, GenSinCosPiBackend {})
764 }
765 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
766 {
767 use std::sync::OnceLock;
768 static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
769 let q = EXECUTOR.get_or_init(|| {
770 if std::arch::is_x86_feature_detected!("avx")
771 && std::arch::is_x86_feature_detected!("fma")
772 {
773 sinpi_fma_impl
774 } else {
775 fn def_sinpi(x: f64) -> f64 {
776 sinpi_gen_impl(x, GenSinCosPiBackend {})
777 }
778 def_sinpi
779 }
780 });
781 unsafe { q(x) }
782 }
783}
784
785#[inline(always)]
786fn cospi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> f64 {
787 let ix = x.to_bits();
788 let ax = ix & 0x7fff_ffff_ffff_ffff;
789 if ax == 0 {
790 return 1.0;
791 }
792 let e: i32 = (ax >> 52) as i32;
793 let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64;
795 let mut s = 1063i32.wrapping_sub(e); if s < 0 {
797 if e == 0x7ff {
799 if ix.wrapping_shl(12) == 0 {
801 return f64::NAN;
802 }
803 return x + x; }
805 s = -s - 1; if s > 11 {
807 return 1.0;
808 } let iq: u64 = (m as u64).wrapping_shl(s as u32).wrapping_add(1024);
810 if (iq & 2047) == 0 {
811 return 0.0;
812 }
813 }
814 if ax <= 0x3f30000000000000u64 {
815 if ax <= 0x3e2ccf6429be6621u64 {
817 return 1.0 - f64::from_bits(0x3c80000000000000);
818 }
819 let x2 = x * x;
820 let x4 = x2 * x2;
821 let eps = x2 * f64::from_bits(0x3cfa000000000000);
822
823 const C: [u64; 4] = [
833 0xc013bd3cc9be45de,
834 0x40103c1f081b5ac4,
835 0xbff55d3c7ff79b60,
836 0x3fd24c7b6f7d0690,
837 ];
838
839 let p0 = backend.fma(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
840 let p1 = backend.fma(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
841
842 let p = x2 * backend.fma(x4, p0, p1);
843 let lb = (p - eps) + 1.;
844 let ub = (p + eps) + 1.;
845 if lb == ub {
846 return lb;
847 }
848 return as_cospi_zero(x, &backend);
849 }
850
851 let si: i32 = e.wrapping_sub(1011);
852 if si >= 0 && ((m as u64).wrapping_shl(si as u32) ^ 0x8000000000000000u64) == 0 {
853 return 0.0;
854 }
855
856 let (y, k) = backend.arg_reduce_pi_64(x);
857
858 let msin_k = DoubleDouble::from_bit_pair(
860 SINPI_K_PI_OVER_64[((k as u64).wrapping_add(64) & 127) as usize],
861 );
862 let cos_k = DoubleDouble::from_bit_pair(
863 SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
864 );
865
866 let r_sincos = sincospi_eval(y, &backend);
867
868 let cos_k_cos_y = backend.quick_mult(r_sincos.v_cos, cos_k);
869 let cos_k_msin_y = backend.quick_mult(r_sincos.v_sin, msin_k);
870
871 let mut rr = DoubleDouble::from_exact_add(cos_k_cos_y.hi, cos_k_msin_y.hi);
873 rr.lo += cos_k_cos_y.lo + cos_k_msin_y.lo;
874
875 let ub = rr.hi + (rr.lo + r_sincos.err); let lb = rr.hi + (rr.lo - r_sincos.err); if ub == lb {
879 return rr.to_f64();
880 }
881 sinpi_dd(y, cos_k, msin_k, &backend)
882}
883
884#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
885#[target_feature(enable = "avx", enable = "fma")]
886unsafe fn cospi_fma_impl(x: f64) -> f64 {
887 cospi_gen_impl(x, FmaSinCosPiBackend {})
888}
889
890pub fn f_cospi(x: f64) -> f64 {
894 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
895 {
896 cospi_gen_impl(x, GenSinCosPiBackend {})
897 }
898 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
899 {
900 use std::sync::OnceLock;
901 static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
902 let q = EXECUTOR.get_or_init(|| {
903 if std::arch::is_x86_feature_detected!("avx")
904 && std::arch::is_x86_feature_detected!("fma")
905 {
906 cospi_fma_impl
907 } else {
908 fn def_cospi(x: f64) -> f64 {
909 cospi_gen_impl(x, GenSinCosPiBackend {})
910 }
911 def_cospi
912 }
913 });
914 unsafe { q(x) }
915 }
916}
917
918#[inline(always)]
919fn sincospi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> (f64, f64) {
920 let ix = x.to_bits();
921 let ax = ix & 0x7fff_ffff_ffff_ffff;
922 if ax == 0 {
923 return (x, 1.0);
924 }
925 let e: i32 = (ax >> 52) as i32;
926 let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
928 let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64;
929 let mut s = 1063i32.wrapping_sub(e); if s < 0 {
931 if e == 0x7ff {
933 if ix.wrapping_shl(12) == 0 {
935 return (f64::NAN, f64::NAN);
936 }
937 return (x + x, x + x); }
939 s = -s - 1;
940 if s > 10 {
941 static CF: [f64; 2] = [1., -1.];
942 let is_odd = backend.odd_integer(f64::from_bits(ax));
943 let cos_x = CF[is_odd as usize];
944 return (f64::copysign(0.0, x), cos_x);
945 } let iq: u64 = (m as u64).wrapping_shl(s as u32);
947
948 let sin_zero = (iq & 2047) == 0;
950
951 let cos_zero = ((m as u64).wrapping_shl(s as u32).wrapping_add(1024) & 2047) == 0;
953
954 if sin_zero && cos_zero {
955 } else if sin_zero {
957 static CF: [f64; 2] = [1., -1.];
958 let is_odd = backend.odd_integer(f64::from_bits(ax));
959 let cos_x = CF[is_odd as usize];
960 return (0.0, cos_x); } else if cos_zero {
962 let si = e.wrapping_sub(1011);
964 let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
965 return if t == 0 {
967 (f64::copysign(1.0, x), 0.0)
968 } else {
969 (-f64::copysign(1.0, x), 0.0)
970 }; }
972 }
973
974 if ax <= 0x3f30000000000000u64 {
975 if ax <= 0x3c90000000000000u64 {
977 const PI: DoubleDouble = DoubleDouble::new(
978 f64::from_bits(0x3ca1a62633145c07),
979 f64::from_bits(0x400921fb54442d18),
980 );
981 let sin_x = if ax < 0x0350000000000000 {
982 let t = x * f64::from_bits(0x4690000000000000);
983 let z = backend.quick_mult_f64(PI, t);
984 let r = z.to_f64();
985 let rs = r * f64::from_bits(0x3950000000000000);
986 let rt = rs * f64::from_bits(0x4690000000000000);
987 backend.dyad_fma((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs)
988 } else {
989 let z = backend.quick_mult_f64(PI, x);
990 z.to_f64()
991 };
992 return (sin_x, 1.0 - f64::from_bits(0x3c80000000000000));
993 }
994 let x2 = x * x;
995 let x4 = x2 * x2;
996 let cos_eps = x2 * f64::from_bits(0x3cfa000000000000);
997
998 const COS_C: [u64; 4] = [
1008 0xc013bd3cc9be45de,
1009 0x40103c1f081b5ac4,
1010 0xbff55d3c7ff79b60,
1011 0x3fd24c7b6f7d0690,
1012 ];
1013
1014 let p0 = backend.fma(x2, f64::from_bits(COS_C[3]), f64::from_bits(COS_C[2]));
1015 let p1 = backend.fma(x2, f64::from_bits(COS_C[1]), f64::from_bits(COS_C[0]));
1016
1017 let p = x2 * backend.fma(x4, p0, p1);
1018 let cos_lb = (p - cos_eps) + 1.;
1019 let cos_ub = (p + cos_eps) + 1.;
1020 let cos_x = if cos_lb == cos_ub {
1021 cos_lb
1022 } else {
1023 as_cospi_zero(x, &backend)
1024 };
1025
1026 const SIN_C: [u64; 4] = [
1036 0xc014abbce625be51,
1037 0x400466bc67754b46,
1038 0xbfe32d2cc12a51f4,
1039 0x3fb5060540058476,
1040 ];
1041
1042 const C_PI: DoubleDouble =
1043 DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18));
1044
1045 let mut z = backend.quick_mult_f64(C_PI, x);
1046
1047 let x3 = x2 * x;
1048
1049 let zl0 = backend.fma(x2, f64::from_bits(SIN_C[1]), f64::from_bits(SIN_C[0]));
1050 let zl1 = backend.fma(x2, f64::from_bits(SIN_C[3]), f64::from_bits(SIN_C[2]));
1051
1052 let sin_eps = x * backend.fma(
1053 x2,
1054 f64::from_bits(0x3d00000000000000), f64::from_bits(0x3bd0000000000000), );
1057
1058 z.lo = backend.fma(x3, backend.fma(x4, zl1, zl0), z.lo);
1059 let sin_lb = z.hi + (z.lo - sin_eps);
1060 let sin_ub = z.hi + (z.lo + sin_eps);
1061 let sin_x = if sin_lb == sin_ub {
1062 sin_lb
1063 } else {
1064 as_sinpi_zero(x, &backend)
1065 };
1066 return (sin_x, cos_x);
1067 }
1068
1069 let si = e.wrapping_sub(1011);
1070 if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
1071 if (m0.wrapping_shl(si as u32)) == 0 {
1073 static CF: [f64; 2] = [1., -1.];
1074 let is_odd = backend.odd_integer(f64::from_bits(ax));
1075 let cos_x = CF[is_odd as usize];
1076 return (f64::copysign(0.0, x), cos_x); }
1078 let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
1080 return if t == 0 {
1082 (f64::copysign(1.0, x), 0.0)
1083 } else {
1084 (-f64::copysign(1.0, x), 0.0)
1085 };
1086 }
1087
1088 let (y, k) = backend.arg_reduce_pi_64(x);
1089
1090 let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
1092 let cos_k = DoubleDouble::from_bit_pair(
1093 SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
1094 );
1095 let msin_k = -sin_k;
1096
1097 let r_sincos = sincospi_eval(y, &backend);
1098
1099 let sin_k_cos_y = backend.quick_mult(sin_k, r_sincos.v_cos);
1100 let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
1101
1102 let cos_k_cos_y = backend.quick_mult(r_sincos.v_cos, cos_k);
1103 let msin_k_sin_y = backend.quick_mult(r_sincos.v_sin, msin_k);
1104
1105 let mut rr_sin = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
1107 rr_sin.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
1108
1109 let sin_ub = rr_sin.hi + (rr_sin.lo + r_sincos.err); let sin_lb = rr_sin.hi + (rr_sin.lo - r_sincos.err); let mut rr_cos = DoubleDouble::from_exact_add(cos_k_cos_y.hi, msin_k_sin_y.hi);
1113 rr_cos.lo += cos_k_cos_y.lo + msin_k_sin_y.lo;
1114
1115 let cos_ub = rr_cos.hi + (rr_cos.lo + r_sincos.err); let cos_lb = rr_cos.hi + (rr_cos.lo - r_sincos.err); if sin_ub == sin_lb && cos_lb == cos_ub {
1119 return (rr_sin.to_f64(), rr_cos.to_f64());
1120 }
1121
1122 sincospi_dd(y, sin_k, cos_k, cos_k, msin_k, &backend)
1123}
1124
1125#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1126#[target_feature(enable = "avx", enable = "fma")]
1127unsafe fn sincospi_fma_impl(x: f64) -> (f64, f64) {
1128 sincospi_gen_impl(x, FmaSinCosPiBackend {})
1129}
1130
1131pub fn f_sincospi(x: f64) -> (f64, f64) {
1135 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
1136 {
1137 sincospi_gen_impl(x, GenSinCosPiBackend {})
1138 }
1139 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
1140 {
1141 use std::sync::OnceLock;
1142 static EXECUTOR: OnceLock<unsafe fn(f64) -> (f64, f64)> = OnceLock::new();
1143 let q = EXECUTOR.get_or_init(|| {
1144 if std::arch::is_x86_feature_detected!("avx")
1145 && std::arch::is_x86_feature_detected!("fma")
1146 {
1147 sincospi_fma_impl
1148 } else {
1149 fn def_sincospi(x: f64) -> (f64, f64) {
1150 sincospi_gen_impl(x, GenSinCosPiBackend {})
1151 }
1152 def_sincospi
1153 }
1154 });
1155 unsafe { q(x) }
1156 }
1157}
1158
1159#[cfg(test)]
1160mod tests {
1161 use super::*;
1162
1163 #[test]
1164 fn test_sinpi() {
1165 assert_eq!(f_sinpi(262143.50006870925), -0.9999999767029883);
1166 assert_eq!(f_sinpi(7124076477593855.), 0.);
1167 assert_eq!(f_sinpi(-11235582092889474000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.), -0.);
1168 assert_eq!(f_sinpi(-2.7430620343968443e303), -0.0);
1169 assert_eq!(f_sinpi(0.00003195557007273919), 0.00010039138401316004);
1170 assert_eq!(f_sinpi(-0.038357843137253766), -0.12021328061499763);
1171 assert_eq!(f_sinpi(1.0156097449358867), -0.04901980680173724);
1172 assert_eq!(f_sinpi(74.8593852519989), 0.42752597787896457);
1173 assert_eq!(f_sinpi(0.500091552734375), 0.9999999586369661);
1174 assert_eq!(f_sinpi(0.5307886532952182), 0.9953257438106751);
1175 assert_eq!(f_sinpi(3.1415926535897936), -0.43030121700009316);
1176 assert_eq!(f_sinpi(-0.5305172747685276), -0.9954077178320563);
1177 assert_eq!(f_sinpi(-0.03723630312089732), -0.1167146713267927);
1178 assert_eq!(
1179 f_sinpi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022946074000077123),
1180 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007208721750737005
1181 );
1182 assert_eq!(
1183 f_sinpi(0.000000000000000000000000000000000000007413093439574428),
1184 2.3288919890141717e-38
1185 );
1186 assert_eq!(f_sinpi(0.0031909299901270445), 0.0100244343161398578);
1187 assert_eq!(f_sinpi(0.11909245901270445), 0.36547215190661003);
1188 assert_eq!(f_sinpi(0.99909245901270445), 0.0028511202357662186);
1189 assert!(f_sinpi(f64::INFINITY).is_nan());
1190 assert!(f_sinpi(f64::NEG_INFINITY).is_nan());
1191 assert!(f_sinpi(f64::NAN).is_nan());
1192 }
1193
1194 #[test]
1195 fn test_sincospi() {
1196 let v0 = f_sincospi(1.0156097449358867);
1197 assert_eq!(v0.0, f_sinpi(1.0156097449358867));
1198 assert_eq!(v0.1, f_cospi(1.0156097449358867));
1199
1200 let v1 = f_sincospi(4503599627370496.);
1201 assert_eq!(v1.0, f_sinpi(4503599627370496.));
1202 assert_eq!(v1.1, f_cospi(4503599627370496.));
1203
1204 let v1 = f_sincospi(-108.);
1205 assert_eq!(v1.0, f_sinpi(-108.));
1206 assert_eq!(v1.1, f_cospi(-108.));
1207
1208 let v1 = f_sincospi(3.);
1209 assert_eq!(v1.0, f_sinpi(3.));
1210 assert_eq!(v1.1, f_cospi(3.));
1211
1212 let v1 = f_sincospi(13.5);
1213 assert_eq!(v1.0, f_sinpi(13.5));
1214 assert_eq!(v1.1, f_cospi(13.5));
1215
1216 let v1 = f_sincospi(7124076477593855.);
1217 assert_eq!(v1.0, f_sinpi(7124076477593855.));
1218 assert_eq!(v1.1, f_cospi(7124076477593855.));
1219
1220 let v1 = f_sincospi(2533419148247186.5);
1221 assert_eq!(v1.0, f_sinpi(2533419148247186.5));
1222 assert_eq!(v1.1, f_cospi(2533419148247186.5));
1223
1224 let v1 = f_sincospi(2.2250653705240375E-308);
1225 assert_eq!(v1.0, f_sinpi(2.2250653705240375E-308));
1226 assert_eq!(v1.1, f_cospi(2.2250653705240375E-308));
1227
1228 let v1 = f_sincospi(2533420818956351.);
1229 assert_eq!(v1.0, f_sinpi(2533420818956351.));
1230 assert_eq!(v1.1, f_cospi(2533420818956351.));
1231
1232 let v1 = f_sincospi(2533822406803233.5);
1233 assert_eq!(v1.0, f_sinpi(2533822406803233.5));
1234 assert_eq!(v1.1, f_cospi(2533822406803233.5));
1235
1236 let v1 = f_sincospi(-3040685725640478.5);
1237 assert_eq!(v1.0, f_sinpi(-3040685725640478.5));
1238 assert_eq!(v1.1, f_cospi(-3040685725640478.5));
1239
1240 let v1 = f_sincospi(2533419148247186.5);
1241 assert_eq!(v1.0, f_sinpi(2533419148247186.5));
1242 assert_eq!(v1.1, f_cospi(2533419148247186.5));
1243
1244 let v1 = f_sincospi(2533420819267583.5);
1245 assert_eq!(v1.0, f_sinpi(2533420819267583.5));
1246 assert_eq!(v1.1, f_cospi(2533420819267583.5));
1247
1248 let v1 = f_sincospi(6979704728846336.);
1249 assert_eq!(v1.0, f_sinpi(6979704728846336.));
1250 assert_eq!(v1.1, f_cospi(6979704728846336.));
1251
1252 let v1 = f_sincospi(7124076477593855.);
1253 assert_eq!(v1.0, f_sinpi(7124076477593855.));
1254 assert_eq!(v1.1, f_cospi(7124076477593855.));
1255
1256 let v1 = f_sincospi(-0.00000000002728839192371484);
1257 assert_eq!(v1.0, f_sinpi(-0.00000000002728839192371484));
1258 assert_eq!(v1.1, f_cospi(-0.00000000002728839192371484));
1259
1260 let v1 = f_sincospi(0.00002465398569495569);
1261 assert_eq!(v1.0, f_sinpi(0.00002465398569495569));
1262 assert_eq!(v1.1, f_cospi(0.00002465398569495569));
1263 }
1264
1265 #[test]
1266 fn test_cospi() {
1267 assert_eq!(0.9999497540959953, f_cospi(0.0031909299901270445));
1268 assert_eq!(0.9308216542079669, f_cospi(0.11909299901270445));
1269 assert_eq!(-0.1536194873288318, f_cospi(0.54909299901270445));
1270 assert!(f_cospi(f64::INFINITY).is_nan());
1271 assert!(f_cospi(f64::NEG_INFINITY).is_nan());
1272 assert!(f_cospi(f64::NAN).is_nan());
1273 }
1274}