1use crate::common::f_fmla;
30
31pub(crate) static ASINCOSF_PI_TABLE: [[u64; 8]; 16] = [
32 [
33 0x3fd45f306dc9c882,
34 0x3fab2995e7b7dc2f,
35 0x3f98723a1cf50c7e,
36 0x3f8d1a4591d16a29,
37 0x3f83ce3aa68ddaee,
38 0x3f7d3182ab0cc1bf,
39 0x3f762b379a8b88e3,
40 0x3f76811411fcfec2,
41 ],
42 [
43 0x3fdffffffffd3cda,
44 0xbfb17cc1b3355fdd,
45 0x3f9d067a1e8d5a99,
46 0xbf908e16fb09314a,
47 0x3f85eed43d42dcb2,
48 0xbf7f58baca7acc71,
49 0x3f75dab64e2dcf15,
50 0xbf659270e30797ac,
51 ],
52 [
53 0x3fdfffffff7c4617,
54 0xbfb17cc149ded3a2,
55 0x3f9d0654d4cb2c1a,
56 0xbf908c3ba713d33a,
57 0x3f85d2053481079c,
58 0xbf7e485ebc545e7e,
59 0x3f7303baca167ddd,
60 0xbf5dee8d16d06b38,
61 ],
62 [
63 0x3fdffffffa749848,
64 0xbfb17cbe71559350,
65 0x3f9d05a312269adf,
66 0xbf90862b3ee617d7,
67 0x3f85920708db2a73,
68 0xbf7cb0463b3862c3,
69 0x3f702b82478f95d7,
70 0xbf552a7b8579e729,
71 ],
72 [
73 0x3fdfffffe1f92bb5,
74 0xbfb17cb3e74c64e3,
75 0x3f9d03af67311cbf,
76 0xbf9079441cbfc7a0,
77 0x3f852b4287805a61,
78 0xbf7ac3286d604a98,
79 0x3f6b2f1210d9701b,
80 0xbf4e740ddc25afd6,
81 ],
82 [
83 0x3fdfffff92beb6e2,
84 0xbfb17c986fe9518b,
85 0x3f9cff98167c9a5e,
86 0xbf90638b591eae52,
87 0x3f84a0803828959e,
88 0xbf78adeca229f11d,
89 0x3f66b9a7ba05dfce,
90 0xbf4640521a43b2d0,
91 ],
92 [
93 0x3fdffffeccee5bfc,
94 0xbfb17c5f1753f5ea,
95 0x3f9cf874e4fe258f,
96 0xbf9043e6cf77b256,
97 0x3f83f7db42227d92,
98 0xbf7691a6fa2a2882,
99 0x3f62f6543162bc61,
100 0xbf407d5da05822b6,
101 ],
102 [
103 0x3fdffffd2f64431d,
104 0xbfb17bf8208c10c1,
105 0x3f9ced7487cdb124,
106 0xbf901a0d30932905,
107 0x3f83388f99b254da,
108 0xbf74844e245c65bd,
109 0x3f5fa777150197c6,
110 0xbf38c1ecf16a05c8,
111 ],
112 [
113 0x3fdffffa36d1712e,
114 0xbfb17b523971bd4e,
115 0x3f9cddee26de2dee,
116 0xbf8fccb00abaaabc,
117 0x3f8269afc3622342,
118 0xbf72933152686752,
119 0x3f5a76d4956cc9a3,
120 0xbf32ce7d6dc651ce,
121 ],
122 [
123 0x3fdffff5402ab3a1,
124 0xbfb17a5ba85da77a,
125 0x3f9cc96894e05c02,
126 0xbf8f532143cb832e,
127 0x3f819180b660ff09,
128 0xbf70c57417a78b3c,
129 0x3f562e26cbd7bb1e,
130 0xbf2ce28d33fe1df3,
131 ],
132 [
133 0x3fdfffed8d639751,
134 0xbfb1790349f3ae76,
135 0x3f9caf9a4fd1b398,
136 0xbf8ec986b111342e,
137 0x3f80b53c3ad4baa4,
138 0xbf6e3c2282eeace4,
139 0x3f52a55369f55bbe,
140 0xbf2667fe48c396e8,
141 ],
142 [
143 0x3fdfffe24b714161,
144 0xbfb177394fbcb719,
145 0x3f9c90652d920ebd,
146 0xbf8e3239197bddf1,
147 0x3f7fb2188525b025,
148 0xbf6b3aadd451afc7,
149 0x3f4f74020f31fdab,
150 0xbf218b0cb246768d,
151 ],
152 [
153 0x3fdfffd298bec9e2,
154 0xbfb174efbfd34648,
155 0x3f9c6bcfe48ea92b,
156 0xbf8d8f9f2a16157c,
157 0x3f7e0044f56c8864,
158 0xbf6883e2347fe76c,
159 0x3f4a9f0e3c1b7af5,
160 0xbf1bb5acc0e60825,
161 ],
162 [
163 0x3fdfffbd8b784c4d,
164 0xbfb1721abdd3722e,
165 0x3f9c41fee756d4b0,
166 0xbf8ce40bccf8065f,
167 0x3f7c59b684b70ef9,
168 0xbf66133d027996b3,
169 0x3f469cad01106397,
170 0xbf160f8e45494156,
171 ],
172 [
173 0x3fdfffa23749cf88,
174 0xbfb16eb0a8285c06,
175 0x3f9c132d762e1b0d,
176 0xbf8c31a959398f4e,
177 0x3f7ac1c5b46bc8a0,
178 0xbf63e34f1abe51dc,
179 0x3f4346738737c0b9,
180 0xbf11b227a3f5c750,
181 ],
182 [
183 0x3fdfff7fb25bb407,
184 0xbfb16aaa14d75640,
185 0x3f9bdfa75fca5ff2,
186 0xbf8b7a6e260d079c,
187 0x3f793ab06911033c,
188 0xbf61ee5560967fd5,
189 0x3f407d31060838bf,
190 0xbf0c96f33a283115,
191 ],
192];
193
194#[inline(always)]
195fn asinpif_gen_impl<Q: Fn(f64, f64, f64) -> f64>(x: f32, fma: Q) -> f32 {
196 let ax = x.abs();
197 let az = ax as f64;
198 let z = x as f64;
199 let t = x.to_bits();
200 let e: i32 = ((t >> 23) & 0xff) as i32;
201 if e >= 127 {
202 if ax == 1.0 {
204 return f32::copysign(0.5, x);
205 } if e == 0xff && (t.wrapping_shl(9)) != 0 {
207 return x + x;
208 } return f32::NAN; }
211 let s: i32 = 146i32.wrapping_sub(e);
212 let mut i = 0i32;
213 if s < 32 {
215 i = (((t & 0x007fffff) | 1 << 23) >> s) as i32;
216 }
217 let z2 = z * z;
218 let z4 = z2 * z2;
219 let c = ASINCOSF_PI_TABLE[i as usize & 15];
220 if i == 0 {
221 let mut c0 = fma(z2, f64::from_bits(c[1]), f64::from_bits(c[0]));
223 let c2 = fma(z2, f64::from_bits(c[3]), f64::from_bits(c[2]));
224 let mut c4 = fma(z2, f64::from_bits(c[5]), f64::from_bits(c[4]));
225 let c6 = fma(z2, f64::from_bits(c[7]), f64::from_bits(c[6]));
226 c0 = fma(c2, z4, c0);
227 c4 = fma(c6, z4, c4);
228 c0 += c4 * (z4 * z4);
229 (z * c0) as f32
230 } else {
231 let f = (1. - az).sqrt();
233 let mut c0 = fma(az, f64::from_bits(c[1]), f64::from_bits(c[0]));
234 let c2 = fma(az, f64::from_bits(c[3]), f64::from_bits(c[2]));
235 let mut c4 = fma(az, f64::from_bits(c[5]), f64::from_bits(c[4]));
236 let c6 = fma(az, f64::from_bits(c[7]), f64::from_bits(c[6]));
237 c0 = fma(c2, z2, c0);
238 c4 = fma(c6, z2, c4);
239 c0 += c4 * z4;
240 let r = fma(
241 -c0,
242 f64::copysign(f, x as f64),
243 f64::copysign(0.5, x as f64),
244 );
245 r as f32
246 }
247}
248
249#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
250#[target_feature(enable = "avx", enable = "fma")]
251unsafe fn asinpif_fma_impl(x: f32) -> f32 {
252 asinpif_gen_impl(x, f64::mul_add)
253}
254
255#[inline]
259pub fn f_asinpif(x: f32) -> f32 {
260 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
261 {
262 asinpif_gen_impl(x, f_fmla)
263 }
264 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
265 {
266 use std::sync::OnceLock;
267 static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
268 let q = EXECUTOR.get_or_init(|| {
269 if std::arch::is_x86_feature_detected!("avx")
270 && std::arch::is_x86_feature_detected!("fma")
271 {
272 asinpif_fma_impl
273 } else {
274 fn def_asinpif(x: f32) -> f32 {
275 asinpif_gen_impl(x, f_fmla)
276 }
277 def_asinpif
278 }
279 });
280 unsafe { q(x) }
281 }
282}
283
284#[cfg(test)]
285mod tests {
286 use super::*;
287
288 #[test]
289 fn test_asinpif() {
290 assert_eq!(f_asinpif(0.0), 0.);
291 assert_eq!(f_asinpif(0.5), 0.16666667);
292 assert!(f_asinpif(1.5).is_nan());
293 }
294}