1use crate::common::f_fmla;
30
31static OFF: [f32; 8] = [0.0, 0.5, 1.0, 0.5, -0.0, -0.5, -1.0, -0.5];
32static SGNF: [f32; 2] = [1., -1.];
33static SGN: [f64; 2] = [1., -1.];
34
35#[inline(always)]
36fn atan2pif_gen_impl<Q: Fn(f64, f64, f64) -> f64>(y: f32, x: f32, fma: Q) -> f32 {
37 let tx = x.to_bits();
38 let ty: u32 = y.to_bits();
39 let ux: u32 = tx;
40 let uy: u32 = ty;
41 let ax: u32 = ux & 0x7fff_ffff;
42 let ay = uy & 0x7fff_ffff;
43 if ay >= (0xff << 23) || ax >= (0xff << 23) {
44 if ay > (0xff << 23) {
45 return x + y;
46 } if ax > (0xff << 23) {
48 return x + y;
49 } let yinf = ay == (0xff << 23);
51 let xinf = ax == (0xff << 23);
52 if yinf & xinf {
53 return if (ux >> 31) != 0 {
54 0.75 * SGNF[(uy >> 31) as usize]
55 } else {
56 0.25 * SGNF[(uy >> 31) as usize]
57 };
58 }
59 if xinf {
60 return if (ux >> 31) != 0 {
61 SGNF[(uy >> 31) as usize]
62 } else {
63 0.0 * SGNF[(uy >> 31) as usize]
64 };
65 }
66 if yinf {
67 return 0.5 * SGNF[(uy >> 31) as usize];
68 }
69 }
70 if ay == 0 {
71 if (ay | ax) == 0 {
72 let i: u32 = (uy >> 31) * 4 + (ux >> 31) * 2;
73 return OFF[i as usize];
74 }
75 if (ux >> 31) == 0 {
76 return 0.0 * SGNF[(uy >> 31) as usize];
77 }
78 }
79 if ax == ay {
80 static S: [f32; 4] = [0.25, 0.75, -0.25, -0.75];
81 let i = (uy >> 31) * 2 + (ux >> 31);
82 return S[i as usize];
83 }
84 let gt: usize = (ay > ax) as usize;
85 let i: u32 = (uy >> 31) * 4 + (ux >> 31) * 2 + gt as u32;
86
87 let zx = x as f64;
88 let zy = y as f64;
89 static M: [f64; 2] = [0., 1.];
90
91 let mut z = fma(M[gt], zx, M[1 - gt] * zy) / fma(M[gt], zy, M[1 - gt] * zx);
92
93 const CN: [u64; 7] = [
94 0x3fd45f306dc9c883,
95 0x3fe988d83a142ada,
96 0x3fe747bebf492057,
97 0x3fd2cc5645094ff3,
98 0x3faa0521c711ab66,
99 0x3f6881b8058b9a0d,
100 0x3efb16ff514a0af0,
101 ];
102
103 let mut r = f64::from_bits(CN[0]);
104 let z2 = z * z;
105 z *= SGN[gt];
106 if z2 > f64::from_bits(0x3c90000000000000) {
108 let z4 = z2 * z2;
109 let z8 = z4 * z4;
110 let mut cn0 = fma(z2, f64::from_bits(CN[1]), r);
111 let cn2 = fma(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2]));
112 let mut cn4 = fma(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4]));
113 let cn6 = f64::from_bits(CN[6]);
114 cn0 += z4 * cn2;
115 cn4 += z4 * cn6;
116 cn0 += z8 * cn4;
117
118 const CD: [u64; 7] = [
119 0x3ff0000000000000,
120 0x4006b8b143a3f6da,
121 0x4008421201d18ed5,
122 0x3ff8221d086914eb,
123 0x3fd670657e3a07ba,
124 0x3fa0f4951fd1e72d,
125 0x3f4b3874b8798286,
126 ];
127
128 let mut cd0 = fma(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0]));
129 let cd2 = fma(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2]));
130 let mut cd4 = fma(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4]));
131 let cd6 = f64::from_bits(CD[6]);
132 cd0 = fma(z4, cd2, cd0);
133 cd4 = fma(z4, cd6, cd4);
134 cd0 = fma(z8, cd4, cd0);
135
136 r = cn0 / cd0;
137 }
138 fma(z, r, OFF[i as usize] as f64) as f32
139}
140
141#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
142#[target_feature(enable = "avx", enable = "fma")]
143unsafe fn atan2pif_fma_impl(y: f32, x: f32) -> f32 {
144 atan2pif_gen_impl(y, x, f64::mul_add)
145}
146
147#[inline]
151pub fn f_atan2pif(y: f32, x: f32) -> f32 {
152 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
153 {
154 atan2pif_gen_impl(y, x, f_fmla)
155 }
156 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
157 {
158 use std::sync::OnceLock;
159 static EXECUTOR: OnceLock<unsafe fn(f32, f32) -> f32> = OnceLock::new();
160 let q = EXECUTOR.get_or_init(|| {
161 if std::arch::is_x86_feature_detected!("avx")
162 && std::arch::is_x86_feature_detected!("fma")
163 {
164 atan2pif_fma_impl
165 } else {
166 fn def_atan2pif(y: f32, x: f32) -> f32 {
167 atan2pif_gen_impl(y, x, f_fmla)
168 }
169 def_atan2pif
170 }
171 });
172 unsafe { q(y, x) }
173 }
174}
175
176#[cfg(test)]
177mod tests {
178 use super::*;
179 #[test]
180 fn test_atan2pif() {
181 assert_eq!(f_atan2pif(0.32131, 0.987565), 0.10012555);
182 assert_eq!(f_atan2pif(532.32131, 12.987565), 0.49223542);
183 assert_eq!(f_atan2pif(-754.32131, 12.987565), -0.494520042);
184 }
185}