1use crate::acospi::{INV_PI_DD, INV_PI_F128};
30use crate::common::f_fmla;
31use crate::double_double::DoubleDouble;
32use crate::dyadic_float::DyadicFloat128;
33use crate::rounding::CpuRound;
34use crate::tangent::atan2::{ATAN_I, atan_eval, atan2_hard};
35
36#[cold]
39fn atan2pi_big_exp_difference_hard(
40 num: f64,
41 den: f64,
42 x_sign: usize,
43 y_sign: usize,
44 recip: bool,
45 final_sign: f64,
46) -> f64 {
47 const ZERO: DoubleDouble = DoubleDouble::new(0.0, 0.0);
48 const MZERO: DoubleDouble = DoubleDouble::new(-0.0, -0.0);
49 static CONST_ADJ_INV_PI: [[[DoubleDouble; 2]; 2]; 2] = [
50 [
51 [ZERO, DoubleDouble::new(0., -1. / 2.)],
52 [MZERO, DoubleDouble::new(0., -1. / 2.)],
53 ],
54 [
55 [DoubleDouble::new(0., -1.), DoubleDouble::new(0., 1. / 2.)],
56 [DoubleDouble::new(0., -1.), DoubleDouble::new(0., 1. / 2.)],
57 ],
58 ];
59 let const_term = CONST_ADJ_INV_PI[x_sign][y_sign][recip as usize];
60 let scaled_div = DyadicFloat128::from_div_f64(num, den) * INV_PI_F128;
61 let sign_f128 = DyadicFloat128::new_from_f64(final_sign);
62 let p = DyadicFloat128::new_from_f64(const_term.hi * final_sign);
63 let p1 = sign_f128 * (DyadicFloat128::new_from_f64(const_term.lo) + scaled_div);
64 let r = p + p1;
65 r.fast_as_f64()
66}
67
68static IS_NEG: [f64; 2] = [1.0, -1.0];
69const ZERO: DoubleDouble = DoubleDouble::new(0.0, 0.0);
70const MZERO: DoubleDouble = DoubleDouble::new(-0.0, -0.0);
71const PI: DoubleDouble = DoubleDouble::new(
72 f64::from_bits(0x3ca1a62633145c07),
73 f64::from_bits(0x400921fb54442d18),
74);
75const MPI: DoubleDouble = DoubleDouble::new(
76 f64::from_bits(0xbca1a62633145c07),
77 f64::from_bits(0xc00921fb54442d18),
78);
79const PI_OVER_2: DoubleDouble = DoubleDouble::new(
80 f64::from_bits(0x3c91a62633145c07),
81 f64::from_bits(0x3ff921fb54442d18),
82);
83const MPI_OVER_2: DoubleDouble = DoubleDouble::new(
84 f64::from_bits(0xbc91a62633145c07),
85 f64::from_bits(0xbff921fb54442d18),
86);
87const PI_OVER_4: DoubleDouble = DoubleDouble::new(
88 f64::from_bits(0x3c81a62633145c07),
89 f64::from_bits(0x3fe921fb54442d18),
90);
91const THREE_PI_OVER_4: DoubleDouble = DoubleDouble::new(
92 f64::from_bits(0x3c9a79394c9e8a0a),
93 f64::from_bits(0x4002d97c7f3321d2),
94);
95
96static CONST_ADJ: [[[DoubleDouble; 2]; 2]; 2] = [
99 [[ZERO, MPI_OVER_2], [MZERO, MPI_OVER_2]],
100 [[MPI, PI_OVER_2], [MPI, PI_OVER_2]],
101];
102
103#[inline(always)]
104fn atan2pi_gen_impl(y: f64, x: f64) -> f64 {
105 let x_sign = x.is_sign_negative() as usize;
106 let y_sign = y.is_sign_negative() as usize;
107 let x_bits = x.to_bits() & 0x7fff_ffff_ffff_ffff;
108 let y_bits = y.to_bits() & 0x7fff_ffff_ffff_ffff;
109 let x_abs = x_bits;
110 let y_abs = y_bits;
111 let recip = x_abs < y_abs;
112 let mut min_abs = if recip { x_abs } else { y_abs };
113 let mut max_abs = if !recip { x_abs } else { y_abs };
114 let mut min_exp = min_abs.wrapping_shr(52);
115 let mut max_exp = max_abs.wrapping_shr(52);
116
117 let mut num = f64::from_bits(min_abs);
118 let mut den = f64::from_bits(max_abs);
119
120 if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 {
123 if x.is_nan() || y.is_nan() {
124 return f64::NAN;
125 }
126 let x_except = if x == 0.0 {
127 0
128 } else if x.is_infinite() {
129 2
130 } else {
131 1
132 };
133 let y_except = if y == 0.0 {
134 0
135 } else if y.is_infinite() {
136 2
137 } else {
138 1
139 };
140
141 static EXCEPTS: [[[DoubleDouble; 2]; 3]; 3] = [
148 [[ZERO, PI], [ZERO, PI], [ZERO, PI]],
149 [[PI_OVER_2, PI_OVER_2], [ZERO, ZERO], [ZERO, PI]],
150 [
151 [PI_OVER_2, PI_OVER_2],
152 [PI_OVER_2, PI_OVER_2],
153 [PI_OVER_4, THREE_PI_OVER_4],
154 ],
155 ];
156
157 if (x_except != 1) || (y_except != 1) {
158 let mut r = EXCEPTS[y_except][x_except][x_sign];
159 r = DoubleDouble::quick_mult(r, INV_PI_DD);
160 return f_fmla(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo);
161 }
162 let scale_up = min_exp < 128u64;
163 let scale_down = max_exp > 0x7ffu64 - 128u64;
164 if scale_up {
167 num *= f64::from_bits(0x43f0000000000000);
168 if !scale_down {
169 den *= f64::from_bits(0x43f0000000000000);
170 }
171 } else if scale_down {
172 den *= f64::from_bits(0x3bf0000000000000);
173 if !scale_up {
174 num *= f64::from_bits(0x3bf0000000000000);
175 }
176 }
177
178 min_abs = num.to_bits();
179 max_abs = den.to_bits();
180 min_exp = min_abs.wrapping_shr(52);
181 max_exp = max_abs.wrapping_shr(52);
182 }
183
184 let final_sign = IS_NEG[((x_sign != y_sign) != recip) as usize];
185 let const_term = CONST_ADJ[x_sign][y_sign][recip as usize];
186 let exp_diff = max_exp - min_exp;
187 if exp_diff > 54 {
190 if max_exp >= 1075 || min_exp < 970 {
191 return atan2pi_big_exp_difference_hard(num, den, x_sign, y_sign, recip, final_sign);
192 }
193 let z = DoubleDouble::from_exact_mult(final_sign, const_term.hi);
194 let mut divided = DoubleDouble::from_exact_div(num, den);
195 divided = DoubleDouble::f64_add(const_term.lo, divided);
196 divided = DoubleDouble::quick_mult_f64(divided, final_sign);
197 let r = DoubleDouble::add(z, divided);
198 let p = DoubleDouble::quick_mult(INV_PI_DD, r);
199 return p.to_f64();
200 }
201
202 let mut k = (64.0 * num / den).cpu_round();
203 let idx = k as u64;
204 k *= f64::from_bits(0x3f90000000000000);
206
207 let num_k = DoubleDouble::from_exact_mult(num, k);
211 let den_k = DoubleDouble::from_exact_mult(den, k);
212
213 let num_dd = DoubleDouble::from_exact_add(num - den_k.hi, -den_k.lo);
215 let mut den_dd = DoubleDouble::from_exact_add(den, num_k.hi);
217 den_dd.lo += num_k.lo;
218
219 let q = DoubleDouble::div(num_dd, den_dd);
221 let p = atan_eval(q);
223
224 let vl = ATAN_I[idx as usize];
225 let vlo = DoubleDouble::from_bit_pair(vl);
226 let mut r = DoubleDouble::add(const_term, DoubleDouble::add(vlo, p));
227
228 r = DoubleDouble::quick_mult(r, INV_PI_DD);
229
230 let err = f_fmla(
231 p.hi,
232 f64::from_bits(0x3bd0000000000000),
233 f64::from_bits(0x3c00000000000000),
234 );
235
236 let ub = r.hi + (r.lo + err);
237 let lb = r.hi + (r.lo - err);
238
239 if ub == lb {
240 r.hi *= final_sign;
241 r.lo *= final_sign;
242
243 return r.to_f64();
244 }
245
246 (atan2_hard(y, x) * INV_PI_F128).fast_as_f64()
247}
248
249#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
250#[target_feature(enable = "avx", enable = "fma")]
251unsafe fn atan2pi_fma_impl(y: f64, x: f64) -> f64 {
252 let x_sign = x.is_sign_negative() as usize;
253 let y_sign = y.is_sign_negative() as usize;
254 let x_bits = x.to_bits() & 0x7fff_ffff_ffff_ffff;
255 let y_bits = y.to_bits() & 0x7fff_ffff_ffff_ffff;
256 let x_abs = x_bits;
257 let y_abs = y_bits;
258 let recip = x_abs < y_abs;
259 let mut min_abs = if recip { x_abs } else { y_abs };
260 let mut max_abs = if !recip { x_abs } else { y_abs };
261 let mut min_exp = min_abs.wrapping_shr(52);
262 let mut max_exp = max_abs.wrapping_shr(52);
263
264 let mut num = f64::from_bits(min_abs);
265 let mut den = f64::from_bits(max_abs);
266
267 if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 {
270 if x.is_nan() || y.is_nan() {
271 return f64::NAN;
272 }
273 let x_except = if x == 0.0 {
274 0
275 } else if x.is_infinite() {
276 2
277 } else {
278 1
279 };
280 let y_except = if y == 0.0 {
281 0
282 } else if y.is_infinite() {
283 2
284 } else {
285 1
286 };
287
288 static EXCEPTS: [[[DoubleDouble; 2]; 3]; 3] = [
295 [[ZERO, PI], [ZERO, PI], [ZERO, PI]],
296 [[PI_OVER_2, PI_OVER_2], [ZERO, ZERO], [ZERO, PI]],
297 [
298 [PI_OVER_2, PI_OVER_2],
299 [PI_OVER_2, PI_OVER_2],
300 [PI_OVER_4, THREE_PI_OVER_4],
301 ],
302 ];
303
304 if (x_except != 1) || (y_except != 1) {
305 let mut r = EXCEPTS[y_except][x_except][x_sign];
306 r = DoubleDouble::quick_mult_fma(r, INV_PI_DD);
307 return f64::mul_add(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo);
308 }
309 let scale_up = min_exp < 128u64;
310 let scale_down = max_exp > 0x7ffu64 - 128u64;
311 if scale_up {
314 num *= f64::from_bits(0x43f0000000000000);
315 if !scale_down {
316 den *= f64::from_bits(0x43f0000000000000);
317 }
318 } else if scale_down {
319 den *= f64::from_bits(0x3bf0000000000000);
320 if !scale_up {
321 num *= f64::from_bits(0x3bf0000000000000);
322 }
323 }
324
325 min_abs = num.to_bits();
326 max_abs = den.to_bits();
327 min_exp = min_abs.wrapping_shr(52);
328 max_exp = max_abs.wrapping_shr(52);
329 }
330
331 let final_sign = IS_NEG[((x_sign != y_sign) != recip) as usize];
332 let const_term = CONST_ADJ[x_sign][y_sign][recip as usize];
333 let exp_diff = max_exp - min_exp;
334 if exp_diff > 54 {
337 if max_exp >= 1075 || min_exp < 970 {
338 return atan2pi_big_exp_difference_hard(num, den, x_sign, y_sign, recip, final_sign);
339 }
340 let z = DoubleDouble::from_exact_mult_fma(final_sign, const_term.hi);
341 let mut divided = DoubleDouble::from_exact_div_fma(num, den);
342 divided = DoubleDouble::f64_add(const_term.lo, divided);
343 divided = DoubleDouble::quick_mult_f64_fma(divided, final_sign);
344 let r = DoubleDouble::add(z, divided);
345 let p = DoubleDouble::quick_mult_fma(INV_PI_DD, r);
346 return p.to_f64();
347 }
348
349 let mut k = (64.0 * num / den).round();
350 let idx = k as u64;
351 k *= f64::from_bits(0x3f90000000000000);
353
354 let num_k = DoubleDouble::from_exact_mult_fma(num, k);
358 let den_k = DoubleDouble::from_exact_mult_fma(den, k);
359
360 let num_dd = DoubleDouble::from_exact_add(num - den_k.hi, -den_k.lo);
362 let mut den_dd = DoubleDouble::from_exact_add(den, num_k.hi);
364 den_dd.lo += num_k.lo;
365
366 let q = DoubleDouble::div_fma(num_dd, den_dd);
368 use crate::tangent::atan2::atan_eval_fma;
370 let p = atan_eval_fma(q);
371
372 let vl = ATAN_I[idx as usize];
373 let vlo = DoubleDouble::from_bit_pair(vl);
374 let mut r = DoubleDouble::add(const_term, DoubleDouble::add(vlo, p));
375
376 r = DoubleDouble::quick_mult_fma(r, INV_PI_DD);
377
378 let err = f64::mul_add(
379 p.hi,
380 f64::from_bits(0x3bd0000000000000),
381 f64::from_bits(0x3c00000000000000),
382 );
383
384 let ub = r.hi + (r.lo + err);
385 let lb = r.hi + (r.lo - err);
386
387 if ub == lb {
388 r.hi *= final_sign;
389 r.lo *= final_sign;
390
391 return r.to_f64();
392 }
393
394 (atan2_hard(y, x) * INV_PI_F128).fast_as_f64()
395}
396
397pub fn f_atan2pi(y: f64, x: f64) -> f64 {
401 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
402 {
403 atan2pi_gen_impl(y, x)
404 }
405 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
406 {
407 use std::sync::OnceLock;
408 static EXECUTOR: OnceLock<unsafe fn(f64, f64) -> f64> = OnceLock::new();
409 let q = EXECUTOR.get_or_init(|| {
410 if std::arch::is_x86_feature_detected!("avx")
411 && std::arch::is_x86_feature_detected!("fma")
412 {
413 atan2pi_fma_impl
414 } else {
415 fn def_atan2pi(y: f64, x: f64) -> f64 {
416 atan2pi_gen_impl(y, x)
417 }
418 def_atan2pi
419 }
420 });
421 unsafe { q(y, x) }
422 }
423}
424
425#[cfg(test)]
426mod tests {
427 use super::*;
428
429 #[test]
430 fn test_atan2pi() {
431 assert_eq!(
432 f_atan2pi(-0.000000000000010659658919444194, 2088960.4374061823),
433 -0.0000000000000000000016242886924270424
434 );
435 assert_eq!(f_atan2pi(-3.9999999981625933, 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003577142133480227), -0.5);
436 assert_eq!(f_atan2pi(0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000472842255026406,
437 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008045886150098693
438 ),1.8706499392673612e-162);
439 assert_eq!(f_atan2pi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002670088630208647,
440 2.0000019071157054
441 ), 4.249573987697093e-307);
442 assert_eq!(f_atan2pi(-5., 2.), -0.3788810584091566);
443 assert_eq!(f_atan2pi(2., -5.), 0.8788810584091566);
444 }
445}