1use crate::common::f_fmla;
30
31#[inline(always)]
32fn atanpif_gen_impl<Q: Fn(f64, f64, f64) -> f64>(x: f32, fma: Q) -> f32 {
33 let t = x.to_bits();
34 let e: i32 = ((t >> 23) & 0xff) as i32;
35 let gt = e >= 127;
36 if e > 127 + 24 {
37 let f = f32::copysign(0.5, x);
39 if e == 0xff {
40 if (t.wrapping_shl(9)) != 0 {
41 return x + x;
42 } return f; }
45 return if x.abs() >= f32::from_bits(0x7da2f983) {
47 f - f32::copysign(f32::from_bits(0x32800000), x)
48 } else {
49 f - f32::from_bits(0x3ea2f983) / x
50 };
51 }
52 let mut z = x as f64;
53 if e < 127 - 13 {
54 let sx = z * f64::from_bits(0x3fd45f306dc9c883);
56 if e < 127 - 25 {
57 return sx as f32;
59 }
60 let zz0 = sx - (f64::from_bits(0x3fd5555555555555) * sx) * (x as f64 * x as f64);
61 return zz0 as f32;
62 }
63 let ax = t & 0x7fff_ffff;
64 if ax == 0x3fa267ddu32 {
65 return f32::copysign(f32::from_bits(0x3e933802), x)
66 - f32::copysign(f32::from_bits(0x24000000), x);
67 };
68 if ax == 0x3f693531u32 {
69 return f32::copysign(f32::from_bits(0x3e70d331), x)
70 + f32::copysign(f32::from_bits(0x31800000), x);
71 };
72 if ax == 0x3f800000u32 {
73 return f32::copysign(f32::from_bits(0x3e800000), x);
74 };
75 if gt {
76 z = 1. / z;
77 }
78 let z2 = z * z;
79 let z4 = z2 * z2;
80 let z8 = z4 * z4;
81 const CN: [u64; 6] = [
82 0x3fd45f306dc9c882,
83 0x3fe733b561bc23d5,
84 0x3fe28d9805bdfbf2,
85 0x3fc8c3ba966ae287,
86 0x3f994a7f81ee634b,
87 0x3f4a6bbf6127a6df,
88 ];
89 let mut cn0 = fma(z2, f64::from_bits(CN[1]), f64::from_bits(CN[0]));
90 let cn2 = fma(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2]));
91 let cn4 = fma(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4]));
92 cn0 += z4 * cn2;
93 cn0 += z8 * cn4;
94 cn0 *= z;
95
96 const CD: [u64; 7] = [
97 0x3ff0000000000000,
98 0x4004e3b3ecc2518f,
99 0x4003ef4a360ff063,
100 0x3ff0f1dc55bad551,
101 0x3fc8da0fecc018a4,
102 0x3f88fa87803776bf,
103 0x3f1dadf2ca0acb43,
104 ];
105
106 let mut cd0 = fma(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0]));
107 let cd2 = fma(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2]));
108 let mut cd4 = fma(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4]));
109 let cd6 = f64::from_bits(CD[6]);
110 cd0 += z4 * cd2;
111 cd4 += z4 * cd6;
112 cd0 = fma(z8, cd4, cd0);
113 let mut r = cn0 / cd0;
114 if gt {
115 r = f64::copysign(0.5, z) - r;
116 }
117 r as f32
118}
119
120#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
121#[target_feature(enable = "avx", enable = "fma")]
122unsafe fn atanpif_fma_impl(x: f32) -> f32 {
123 atanpif_gen_impl(x, f64::mul_add)
124}
125
126#[inline]
130pub fn f_atanpif(x: f32) -> f32 {
131 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
132 {
133 atanpif_gen_impl(x, f_fmla)
134 }
135 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
136 {
137 use std::sync::OnceLock;
138 static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
139 let q = EXECUTOR.get_or_init(|| {
140 if std::arch::is_x86_feature_detected!("avx")
141 && std::arch::is_x86_feature_detected!("fma")
142 {
143 atanpif_fma_impl
144 } else {
145 fn def_atanpif(x: f32) -> f32 {
146 atanpif_gen_impl(x, f_fmla)
147 }
148 def_atanpif
149 }
150 });
151 unsafe { q(x) }
152 }
153}
154
155#[cfg(test)]
156mod tests {
157 use super::*;
158 #[test]
159 fn test_atanpif() {
160 assert_eq!(f_atanpif(0.0), 0.0);
161 assert_eq!(f_atanpif(1.0), 0.25);
162 assert_eq!(f_atanpif(1.5), 0.31283295);
163 assert_eq!(f_atanpif(-1.0), -0.25);
164 assert_eq!(f_atanpif(-1.5), -0.31283295);
165 }
166}