1use crate::common::f_fmla;
30use crate::double_double::DoubleDouble;
31use crate::polyeval::f_estrin_polyeval5;
32use crate::sincospi::{GenSinCosPiBackend, SinCosPiBackend, reduce_pi_64};
33use crate::sincospi_tables::SINPI_K_PI_OVER_64;
34
35#[cold]
47fn as_sincpi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
48 const C: [(u64, u64); 7] = [
49 (0xb9d3080000000000, 0x3ff0000000000000),
50 (0xbc81873d86314302, 0xbffa51a6625307d3),
51 (0x3c84871b4ffeefae, 0x3fe9f9cb402bc46c),
52 (0xbc5562d6ae037010, 0xbfc86a8e4720db66),
53 (0xbc386c93f4549bac, 0x3f9ac6805cf31ffd),
54 (0x3c0dbda368edfa40, 0xbf633816a3399d4e),
55 (0xbbcf22ccc18f27a9, 0x3f23736e6a59edd9),
56 ];
57 let x2 = backend.exact_mult(x, x);
58 let mut p = backend.quick_mul_add(
59 x2,
60 DoubleDouble::from_bit_pair(C[6]),
61 DoubleDouble::from_bit_pair(C[5]),
62 );
63 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
64 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
65 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
66 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
67 p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
68 p.to_f64()
69}
70
71#[inline(always)]
72fn sincpi_gen_impl(x: f64) -> f64 {
73 let ix = x.to_bits();
74 let ax = ix & 0x7fff_ffff_ffff_ffff;
75 if ax == 0 {
76 return 1.;
77 }
78 let e: i32 = (ax >> 52) as i32;
79 let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
80 let sgn: i64 = (ix as i64) >> 63;
81 let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
82 let mut s: i32 = 1063i32.wrapping_sub(e);
83 if s < 0 {
84 if e == 0x7ff {
85 if (ix << 12) == 0 {
86 return f64::NAN;
87 }
88 return x + x; }
90 s = -s - 1;
91 if s > 10 {
92 return f64::copysign(0.0, x);
93 }
94 let iq: u64 = (m as u64).wrapping_shl(s as u32);
95 if (iq & 2047) == 0 {
96 return f64::copysign(0.0, x);
97 }
98 }
99
100 if ax <= 0x3fa2000000000000u64 {
101 if ax < 0x3c90000000000000u64 {
104 if ax <= 0x3b05798ee2308c3au64 {
106 return 1.;
108 }
109 #[cfg(any(
112 all(
113 any(target_arch = "x86", target_arch = "x86_64"),
114 target_feature = "fma"
115 ),
116 target_arch = "aarch64"
117 ))]
118 {
119 const M_SQR_PI_OVER_6: f64 = f64::from_bits(0xbffa51a6625307d3);
120 let p = f_fmla(x, M_SQR_PI_OVER_6 * x, 1.);
121 return p;
122 }
123 #[cfg(not(any(
124 all(
125 any(target_arch = "x86", target_arch = "x86_64"),
126 target_feature = "fma"
127 ),
128 target_arch = "aarch64"
129 )))]
130 {
131 use crate::common::min_normal_f64;
132 return 1. - min_normal_f64();
133 }
134 }
135
136 let x2 = x * x;
143
144 let eps = x * f_fmla(
145 x2,
146 f64::from_bits(0x3d00000000000000), f64::from_bits(0x3bd0000000000000), );
149
150 const C: [u64; 5] = [
151 0xbffa51a6625307d3,
152 0x3fe9f9cb402bbeaa,
153 0xbfc86a8e466bbb5b,
154 0x3f9ac66d887e2f38,
155 0xbf628473a38d289a,
156 ];
157
158 const F: DoubleDouble =
159 DoubleDouble::from_bit_pair((0xbb93f0a925810000, 0x3ff0000000000000));
160
161 let p = f_estrin_polyeval5(
162 x2,
163 f64::from_bits(C[0]),
164 f64::from_bits(C[1]),
165 f64::from_bits(C[2]),
166 f64::from_bits(C[3]),
167 f64::from_bits(C[4]),
168 );
169 let v = DoubleDouble::from_exact_mult(p, x2);
170 let z = DoubleDouble::add(F, v);
171
172 let lb = z.hi + (z.lo - eps);
173 let ub = z.hi + (z.lo + eps);
174 if lb == ub {
175 return lb;
176 }
177 return as_sincpi_zero(x, &GenSinCosPiBackend {});
178 }
179
180 let si = e.wrapping_sub(1011);
181 if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
182 if (m0.wrapping_shl(si as u32)) == 0 {
184 return f64::copysign(0.0, x); }
186 let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
187 #[cfg(any(
189 all(
190 any(target_arch = "x86", target_arch = "x86_64"),
191 target_feature = "fma"
192 ),
193 target_arch = "aarch64"
194 ))]
195 {
196 let num = if t == 0 {
197 f64::copysign(1.0, x)
198 } else {
199 -f64::copysign(1.0, x)
200 };
201 const PI: DoubleDouble =
202 DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
203 let r = DoubleDouble::quick_mult_f64(PI, x);
204 let v = DoubleDouble::from_f64_div_dd(num, r);
205 return v.to_f64();
206 }
207
208 #[cfg(not(any(
209 all(
210 any(target_arch = "x86", target_arch = "x86_64"),
211 target_feature = "fma"
212 ),
213 target_arch = "aarch64"
214 )))]
215 {
216 use crate::double_double::two_product_compatible;
217 return if two_product_compatible(x) {
218 let num = if t == 0 {
219 f64::copysign(1.0, x)
220 } else {
221 -f64::copysign(1.0, x)
222 };
223 const PI: DoubleDouble =
224 DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
225 let r = DoubleDouble::quick_mult_f64(PI, x);
226 let v = DoubleDouble::from_f64_div_dd(num, r);
227 v.to_f64()
228 } else {
229 use crate::dyadic_float::{DyadicFloat128, DyadicSign};
230 let num = DyadicFloat128::new_from_f64(if t == 0 {
231 f64::copysign(1.0, x)
232 } else {
233 -f64::copysign(1.0, x)
234 });
235 const PI: DyadicFloat128 = DyadicFloat128 {
236 sign: DyadicSign::Pos,
237 exponent: -126,
238 mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
239 };
240 let dx = DyadicFloat128::new_from_f64(x);
241 let r = (PI * dx).reciprocal();
242 (num * r).fast_as_f64()
243 };
244 }
245 }
246
247 let (y, k) = reduce_pi_64(x);
248
249 let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
251 let cos_k = DoubleDouble::from_bit_pair(
252 SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
253 );
254
255 let r_sincos = crate::sincospi::sincospi_eval(y, &GenSinCosPiBackend {});
256
257 const PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
258 let scale = DoubleDouble::quick_mult_f64(PI, x);
259
260 let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
261 let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
262
263 let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
265 rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
266 rr = DoubleDouble::div(rr, scale);
267
268 let ub = rr.hi + (rr.lo + r_sincos.err); let lb = rr.hi + (rr.lo - r_sincos.err); if ub == lb {
272 return rr.to_f64();
273 }
274 sincpi_dd(y, sin_k, cos_k, scale, &GenSinCosPiBackend {})
275}
276
277#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
278#[target_feature(enable = "avx", enable = "fma")]
279unsafe fn sincpi_fma_impl(x: f64) -> f64 {
280 use crate::sincospi::FmaSinCosPiBackend;
281 let ix = x.to_bits();
282 let ax = ix & 0x7fff_ffff_ffff_ffff;
283 if ax == 0 {
284 return 1.;
285 }
286 let e: i32 = (ax >> 52) as i32;
287 let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
288 let sgn: i64 = (ix as i64) >> 63;
289 let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
290 let mut s: i32 = 1063i32.wrapping_sub(e);
291 if s < 0 {
292 if e == 0x7ff {
293 if (ix << 12) == 0 {
294 return f64::NAN;
295 }
296 return x + x; }
298 s = -s - 1;
299 if s > 10 {
300 return f64::copysign(0.0, x);
301 }
302 let iq: u64 = (m as u64).wrapping_shl(s as u32);
303 if (iq & 2047) == 0 {
304 return f64::copysign(0.0, x);
305 }
306 }
307
308 if ax <= 0x3fa2000000000000u64 {
309 if ax < 0x3c90000000000000u64 {
312 if ax <= 0x3b05798ee2308c3au64 {
314 return 1.;
316 }
317 const M_SQR_PI_OVER_6: f64 = f64::from_bits(0xbffa51a6625307d3);
320 return f64::mul_add(x, M_SQR_PI_OVER_6 * x, 1.);
321 }
322
323 let x2 = x * x;
330
331 let eps = x * f64::mul_add(
332 x2,
333 f64::from_bits(0x3d00000000000000), f64::from_bits(0x3bd0000000000000), );
336
337 const C: [u64; 5] = [
338 0xbffa51a6625307d3,
339 0x3fe9f9cb402bbeaa,
340 0xbfc86a8e466bbb5b,
341 0x3f9ac66d887e2f38,
342 0xbf628473a38d289a,
343 ];
344
345 const F: DoubleDouble =
346 DoubleDouble::from_bit_pair((0xbb93f0a925810000, 0x3ff0000000000000));
347
348 use crate::polyeval::d_estrin_polyeval5;
349 let p = d_estrin_polyeval5(
350 x2,
351 f64::from_bits(C[0]),
352 f64::from_bits(C[1]),
353 f64::from_bits(C[2]),
354 f64::from_bits(C[3]),
355 f64::from_bits(C[4]),
356 );
357 let v = DoubleDouble::from_exact_mult_fma(p, x2);
358 let z = DoubleDouble::add(F, v);
359
360 let lb = z.hi + (z.lo - eps);
361 let ub = z.hi + (z.lo + eps);
362 if lb == ub {
363 return lb;
364 }
365 return as_sincpi_zero(x, &FmaSinCosPiBackend {});
366 }
367
368 let si = e.wrapping_sub(1011);
369 if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
370 if (m0.wrapping_shl(si as u32)) == 0 {
372 return f64::copysign(0.0, x); }
374 let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
375 let num = if t == 0 {
377 f64::copysign(1.0, x)
378 } else {
379 -f64::copysign(1.0, x)
380 };
381 const PI: DoubleDouble =
382 DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
383 let r = DoubleDouble::quick_mult_f64_fma(PI, x);
384 let v = DoubleDouble::from_f64_div_dd_fma(num, r);
385 return v.to_f64();
386 }
387
388 use crate::sincospi::reduce_pi_64_fma;
389
390 let (y, k) = reduce_pi_64_fma(x);
391
392 let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
394 let cos_k = DoubleDouble::from_bit_pair(
395 SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
396 );
397
398 let r_sincos = crate::sincospi::sincospi_eval(y, &FmaSinCosPiBackend {});
399
400 const PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
401 let scale = DoubleDouble::quick_mult_f64_fma(PI, x);
402
403 let sin_k_cos_y = DoubleDouble::quick_mult_fma(sin_k, r_sincos.v_cos);
404 let cos_k_sin_y = DoubleDouble::quick_mult_fma(cos_k, r_sincos.v_sin);
405
406 let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
408 rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
409 rr = DoubleDouble::div_fma(rr, scale);
410
411 let ub = rr.hi + (rr.lo + r_sincos.err); let lb = rr.hi + (rr.lo - r_sincos.err); if ub == lb {
415 return rr.to_f64();
416 }
417 sincpi_dd(y, sin_k, cos_k, scale, &FmaSinCosPiBackend {})
418}
419
420pub fn f_sincpi(x: f64) -> f64 {
426 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
427 {
428 sincpi_gen_impl(x)
429 }
430 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
431 {
432 use std::sync::OnceLock;
433 static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
434 let q = EXECUTOR.get_or_init(|| {
435 if std::arch::is_x86_feature_detected!("avx")
436 && std::arch::is_x86_feature_detected!("fma")
437 {
438 sincpi_fma_impl
439 } else {
440 fn def_sincpi(x: f64) -> f64 {
441 sincpi_gen_impl(x)
442 }
443 def_sincpi
444 }
445 });
446 unsafe { q(x) }
447 }
448}
449
450#[cold]
451#[inline(always)]
452fn sincpi_dd<B: SinCosPiBackend>(
453 x: f64,
454 sin_k: DoubleDouble,
455 cos_k: DoubleDouble,
456 scale: DoubleDouble,
457 backend: &B,
458) -> f64 {
459 let r_sincos = crate::sincospi::sincospi_eval_dd(x, backend);
460 let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
461 let mut rr = backend.mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y);
462 rr = backend.div(rr, scale);
463 rr.to_f64()
464}
465
466#[cfg(test)]
467mod tests {
468 use super::*;
469
470 #[test]
471 fn test_sincpi_zero() {
472 assert_eq!(f_sincpi(2.2204460492503131e-24), 1.0);
473 assert_eq!(f_sincpi(f64::EPSILON), 1.0);
474 assert_eq!(f_sincpi(0.007080019335262543), 0.9999175469662566);
475 assert_eq!(f_sincpi(0.05468860710998057), 0.9950875152844803);
476 assert_eq!(f_sincpi(0.5231231231), 0.6068750737806441);
477 assert_eq!(f_sincpi(1.), 0.);
478 assert_eq!(f_sincpi(-1.), 0.);
479 assert_eq!(f_sincpi(-2.), 0.);
480 assert_eq!(f_sincpi(-3.), 0.);
481 assert!(f_sincpi(f64::INFINITY).is_nan());
482 assert!(f_sincpi(f64::NEG_INFINITY).is_nan());
483 assert!(f_sincpi(f64::NAN).is_nan());
484 }
485}