1use crate::common::{dd_fmla, dyad_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::shared_eval::poly_dd_3;
32use crate::tangent::atan::{ATAN_CIRCLE, ATAN_REDUCE};
33
34const ONE_OVER_PI_DD: DoubleDouble = DoubleDouble::new(
35 f64::from_bits(0xbc76b01ec5417056),
36 f64::from_bits(0x3fd45f306dc9c883),
37);
38
39const ONE_OVER_3PI: f64 = f64::from_bits(0x3fbb2995e7b7b604); fn atanpi_small(x: f64) -> f64 {
42 if x == 0. {
43 return x;
44 }
45 if x.abs() == f64::from_bits(0x0015cba89af1f855) {
46 return if x > 0. {
47 f_fmla(
48 f64::from_bits(0x9a70000000000000),
49 f64::from_bits(0x1a70000000000000),
50 f64::from_bits(0x0006f00f7cd3a40b),
51 )
52 } else {
53 f_fmla(
54 f64::from_bits(0x1a70000000000000),
55 f64::from_bits(0x1a70000000000000),
56 f64::from_bits(0x8006f00f7cd3a40b),
57 )
58 };
59 }
60 let mut v = x.to_bits();
62 if (v & 0xfffffffffffff) == 0x59af9a1194efe
63 {
65 let e = v >> 52;
66 if (e & 0x7ff) > 2 {
67 v = ((e - 2) << 52) | 0xb824198b94a89;
68 return if x > 0. {
69 dd_fmla(
70 f64::from_bits(0x9a70000000000000),
71 f64::from_bits(0x1a70000000000000),
72 f64::from_bits(v),
73 )
74 } else {
75 dd_fmla(
76 f64::from_bits(0x1a70000000000000),
77 f64::from_bits(0x1a70000000000000),
78 f64::from_bits(v),
79 )
80 };
81 }
82 }
83 let h = x * ONE_OVER_PI_DD.hi;
84 let mut corr = dd_fmla(
87 x * f64::from_bits(0x4690000000000000),
88 ONE_OVER_PI_DD.hi,
89 -h * f64::from_bits(0x4690000000000000),
90 );
91 corr = dd_fmla(
92 x * f64::from_bits(0x4690000000000000),
93 ONE_OVER_PI_DD.lo,
94 corr,
95 );
96 dyad_fmla(corr, f64::from_bits(0x3950000000000000), h)
99}
100
101#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
102#[target_feature(enable = "avx", enable = "fma")]
103unsafe fn atanpi_small_fma(x: f64) -> f64 {
104 if x == 0. {
105 return x;
106 }
107 if x.abs() == f64::from_bits(0x0015cba89af1f855) {
108 return if x > 0. {
109 f64::mul_add(
110 f64::from_bits(0x9a70000000000000),
111 f64::from_bits(0x1a70000000000000),
112 f64::from_bits(0x0006f00f7cd3a40b),
113 )
114 } else {
115 f64::mul_add(
116 f64::from_bits(0x1a70000000000000),
117 f64::from_bits(0x1a70000000000000),
118 f64::from_bits(0x8006f00f7cd3a40b),
119 )
120 };
121 }
122 let mut v = x.to_bits();
124 if (v & 0xfffffffffffff) == 0x59af9a1194efe
125 {
127 let e = v >> 52;
128 if (e & 0x7ff) > 2 {
129 v = ((e - 2) << 52) | 0xb824198b94a89;
130 return if x > 0. {
131 f64::mul_add(
132 f64::from_bits(0x9a70000000000000),
133 f64::from_bits(0x1a70000000000000),
134 f64::from_bits(v),
135 )
136 } else {
137 f64::mul_add(
138 f64::from_bits(0x1a70000000000000),
139 f64::from_bits(0x1a70000000000000),
140 f64::from_bits(v),
141 )
142 };
143 }
144 }
145 let h = x * ONE_OVER_PI_DD.hi;
146 let mut corr = f64::mul_add(
149 x * f64::from_bits(0x4690000000000000),
150 ONE_OVER_PI_DD.hi,
151 -h * f64::from_bits(0x4690000000000000),
152 );
153 corr = f64::mul_add(
154 x * f64::from_bits(0x4690000000000000),
155 ONE_OVER_PI_DD.lo,
156 corr,
157 );
158 f64::mul_add(corr, f64::from_bits(0x3950000000000000), h)
161}
162
163fn atanpi_asympt(x: f64) -> f64 {
169 let h = f64::copysign(0.5, x);
170 let rcy = DoubleDouble::from_quick_recip(x);
172 let mut m = DoubleDouble::quick_mult(rcy, ONE_OVER_PI_DD);
173 m.hi = -m.hi;
175 m.lo = dd_fmla(ONE_OVER_3PI * rcy.hi, rcy.hi * rcy.hi, -m.lo);
176 let vh = DoubleDouble::from_exact_add(h, m.hi);
178 m.hi = vh.hi;
179 m = DoubleDouble::from_exact_add(vh.lo, m.lo);
180 if m.hi.abs() == f64::from_bits(0x3c80000000000000) {
181 m.hi = if m.hi * m.lo > 0. {
183 f64::copysign(f64::from_bits(0x3c80000000000001), m.hi)
184 } else {
185 f64::copysign(f64::from_bits(0x3c7fffffffffffff), m.hi)
186 };
187 }
188 vh.hi + m.hi
189}
190
191#[inline]
192fn atanpi_tiny(x: f64) -> f64 {
193 let mut dx = DoubleDouble::quick_mult_f64(ONE_OVER_PI_DD, x);
194 dx.lo = dd_fmla(-ONE_OVER_3PI * x, x * x, dx.lo);
195 dx.to_f64()
196}
197
198#[cold]
199fn as_atanpi_refine2(x: f64, a: f64) -> f64 {
200 if x.abs() > f64::from_bits(0x413be00000000000) {
201 return atanpi_asympt(x);
202 }
203 if x.abs() < f64::from_bits(0x3e4c700000000000) {
204 return atanpi_tiny(x);
205 }
206 const CH: [(u64, u64); 3] = [
207 (0xbfd5555555555555, 0xbc75555555555555),
208 (0x3fc999999999999a, 0xbc6999999999bcb8),
209 (0xbfc2492492492492, 0xbc6249242093c016),
210 ];
211 const CL: [u64; 4] = [
212 0x3fbc71c71c71c71c,
213 0xbfb745d1745d1265,
214 0x3fb3b13b115bcbc4,
215 0xbfb1107c41ad3253,
216 ];
217 let phi = ((a.abs()) * f64::from_bits(0x40545f306dc9c883) + 256.5).to_bits();
218 let i: i64 = ((phi >> (52 - 8)) & 0xff) as i64;
219 let dh: DoubleDouble = if i == 128 {
220 DoubleDouble::from_quick_recip(-x)
221 } else {
222 let ta = f64::copysign(f64::from_bits(ATAN_REDUCE[i as usize].0), x);
223 let dzta = DoubleDouble::from_exact_mult(x, ta);
224 let zmta = x - ta;
225 let v = 1. + dzta.hi;
226 let d = 1. - v;
227 let ev = (d + dzta.hi) - ((d + v) - 1.) + dzta.lo;
228 let r = 1.0 / v;
229 let rl = f_fmla(-ev, r, dd_fmla(r, -v, 1.0)) * r;
230 DoubleDouble::quick_mult_f64(DoubleDouble::new(rl, r), zmta)
231 };
232 let d2 = DoubleDouble::quick_mult(dh, dh);
233 let h4 = d2.hi * d2.hi;
234 let h3 = DoubleDouble::quick_mult(dh, d2);
235
236 let fl0 = f_fmla(d2.hi, f64::from_bits(CL[1]), f64::from_bits(CL[0]));
237 let fl1 = f_fmla(d2.hi, f64::from_bits(CL[3]), f64::from_bits(CL[2]));
238
239 let fl = d2.hi * f_fmla(h4, fl1, fl0);
240 let mut f = poly_dd_3(d2, CH, fl);
241 f = DoubleDouble::quick_mult(h3, f);
242 let (ah, mut al, mut at);
243 if i == 0 {
244 ah = dh.hi;
245 al = f.hi;
246 at = f.lo;
247 } else {
248 let mut df = 0.;
249 if i < 128 {
250 df = f64::copysign(1.0, x) * f64::from_bits(ATAN_REDUCE[i as usize].1);
251 }
252 let id = f64::copysign(i as f64, x);
253 ah = f64::from_bits(0x3f8921fb54442d00) * id;
254 al = f64::from_bits(0x3c88469898cc5180) * id;
255 at = f64::from_bits(0xb97fc8f8cbb5bf80) * id;
256 let v0 = DoubleDouble::add(DoubleDouble::new(at, al), DoubleDouble::new(0., df));
257 let v1 = DoubleDouble::add(v0, dh);
258 let v2 = DoubleDouble::add(v1, f);
259 al = v2.hi;
260 at = v2.lo;
261 }
262
263 let v2 = DoubleDouble::from_exact_add(ah, al);
264 let v1 = DoubleDouble::from_exact_add(v2.lo, at);
265
266 let z0 = DoubleDouble::quick_mult(DoubleDouble::new(v1.hi, v2.hi), ONE_OVER_PI_DD);
267 z0.to_f64()
269}
270
271#[inline(always)]
272fn atanpi_gen_impl<
275 Q: Fn(f64, f64, f64) -> f64,
276 F: Fn(f64, f64, f64) -> f64,
277 DdMul: Fn(DoubleDouble, DoubleDouble) -> DoubleDouble,
278 AtanpiSmall: Fn(f64) -> f64,
279>(
280 x: f64,
281 fma: Q,
282 dd_fma: F,
283 dd_mul: DdMul,
284 atanpi_small: AtanpiSmall,
285) -> f64 {
286 const CH: [u64; 4] = [
287 0x3ff0000000000000,
288 0xbfd555555555552b,
289 0x3fc9999999069c20,
290 0xbfc248d2c8444ac6,
291 ];
292 let t = x.to_bits();
293 let at: u64 = t & 0x7fff_ffff_ffff_ffff;
294 let mut i = (at >> 51).wrapping_sub(2030u64);
295 if at < 0x3f7b21c475e6362au64 {
296 if at < 0x3c90000000000000u64 {
298 return atanpi_small(x);
300 }
301 if x == 0. {
302 return x;
303 }
304 const CH2: [u64; 4] = [
305 0xbfd5555555555555,
306 0x3fc99999999998c1,
307 0xbfc249249176aec0,
308 0x3fbc711fd121ae80,
309 ];
310 let x2 = x * x;
311 let x3 = x * x2;
312 let x4 = x2 * x2;
313
314 let f0 = fma(x2, f64::from_bits(CH2[3]), f64::from_bits(CH2[2]));
315 let f1 = fma(x2, f64::from_bits(CH2[1]), f64::from_bits(CH2[0]));
316
317 let f = x3 * dd_fma(x4, f0, f1);
318 let hy = dd_mul(DoubleDouble::new(f, x), ONE_OVER_PI_DD);
326 let mut ub = hy.hi + dd_fma(f64::from_bits(0x3bb6f00000000000), x, hy.lo);
330 let lb = hy.hi + dd_fma(f64::from_bits(0xbbb6f00000000000), x, hy.lo);
331 if ub == lb {
332 return ub;
333 }
334 ub = (f + f * f64::from_bits(0x3cd2000000000000)) + x; return as_atanpi_refine2(x, ub);
337 }
338 let h;
340 let mut a: DoubleDouble;
341 if at > 0x4062ded8e34a9035u64 {
342 if at >= 0x43445f306dc9c883u64 {
344 if at >= (0x7ffu64 << 52) {
346 if at == 0x7ffu64 << 52 {
348 return f64::copysign(0.5, x);
350 } return x + x; }
353 return f64::copysign(0.5, x) - f64::copysign(f64::from_bits(0x3c70000000000000), x);
354 }
355 h = -1.0 / x;
356 a = DoubleDouble::new(
357 f64::copysign(f64::from_bits(0x3c91a62633145c07), x),
358 f64::copysign(f64::from_bits(0x3ff921fb54442d18), x),
359 );
360 } else {
361 if at == 0x3ff0000000000000 {
365 return f64::copysign(f64::from_bits(0x3fd0000000000000), x);
367 }
368 let u: u64 = t & 0x0007ffffffffffff;
369 let ut = u >> (51 - 16);
370 let ut2 = (ut * ut) >> 16;
371 let vc = ATAN_CIRCLE[i as usize];
372 i = (((vc[0] as u64).wrapping_shl(16)) + ut * (vc[1] as u64) - ut2 * (vc[2] as u64))
373 >> (16 + 9);
374 let va = ATAN_REDUCE[i as usize];
375 let ta = f64::copysign(1.0, x) * f64::from_bits(va.0);
376 let id = f64::copysign(1.0, x) * i as f64;
377 h = (x - ta) / fma(x, ta, 1.);
378 a = DoubleDouble::new(
379 fma(
380 f64::copysign(1.0, x),
381 f64::from_bits(va.1),
382 f64::from_bits(0x3c88469898cc5170) * id,
383 ),
384 f64::from_bits(0x3f8921fb54442d00) * id,
385 );
386 }
387 let h2 = h * h;
388 let h4 = h2 * h2;
389
390 let f0 = fma(h2, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
391 let f1 = fma(h2, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
392
393 let f = fma(h4, f0, f1);
394 a.lo = dd_fma(h, f, a.lo);
395 let e0 = h * f64::from_bits(0x3ccf800000000000); let ub0 = (a.lo + e0) + a.hi; a = DoubleDouble::from_exact_add(a.hi, a.lo);
403 a = dd_mul(a, ONE_OVER_PI_DD);
404 let e = h * f64::from_bits(0x3cb4100000000000); let ub = (a.lo + e) + a.hi;
410 let lb = (a.lo - e) + a.hi;
411 if ub == lb {
412 return ub;
413 }
414 as_atanpi_refine2(x, ub0)
415}
416
417#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
418#[target_feature(enable = "avx", enable = "fma")]
419unsafe fn atanpi_fma_impl(x: f64) -> f64 {
420 atanpi_gen_impl(
421 x,
422 f64::mul_add,
423 f64::mul_add,
424 DoubleDouble::quick_mult_fma,
425 |x| unsafe { atanpi_small_fma(x) },
426 )
427}
428
429pub fn f_atanpi(x: f64) -> f64 {
433 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
434 {
435 atanpi_gen_impl(x, f_fmla, dd_fmla, DoubleDouble::quick_mult, atanpi_small)
436 }
437 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
438 {
439 use std::sync::OnceLock;
440 static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
441 let q = EXECUTOR.get_or_init(|| {
442 if std::arch::is_x86_feature_detected!("avx")
443 && std::arch::is_x86_feature_detected!("fma")
444 {
445 atanpi_fma_impl
446 } else {
447 fn def_atanpi(x: f64) -> f64 {
448 atanpi_gen_impl(x, f_fmla, dd_fmla, DoubleDouble::quick_mult, atanpi_small)
449 }
450 def_atanpi
451 }
452 });
453 unsafe { q(x) }
454 }
455}
456
457#[cfg(test)]
458mod tests {
459
460 use super::*;
461
462 #[test]
463 fn atanpi_test() {
464 assert_eq!(f_atanpi(119895888825197.97), 0.49999999999999734);
465 assert_eq!(f_atanpi(1.5610674235815122E-307), 4.969031939254545e-308);
466 assert_eq!(f_atanpi(0.00004577636903266291), 0.000014571070806516354);
467 assert_eq!(f_atanpi(-0.00004577636903266291), -0.000014571070806516354);
468 assert_eq!(f_atanpi(-0.4577636903266291), -0.13664770469904508);
469 assert_eq!(f_atanpi(0.9876636903266291), 0.24802445507819193);
470 }
471}