1use crate::common::{f_fmla, f_fmlaf, pow2if};
30use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
31use std::hint::black_box;
32
33const TBLSIZE: usize = 64;
34
35#[repr(align(64))]
36struct Exp2Table([(u32, u32); TBLSIZE]);
37
38#[rustfmt::skip]
39static EXP2FT: Exp2Table = Exp2Table([(0x3F3504F3, 0xB2D4175E),(0x3F36FD92, 0x3268D5EF),(0x3F38FBAF, 0xB30E8719),(0x3F3AFF5B, 0x3319E7DA),(0x3F3D08A4, 0x333CD82F),(0x3F3F179A, 0x330E1902),(0x3F412C4D, 0x32CCF4D7),(0x3F4346CD, 0x328F330E),(0x3F45672A, 0xB201B5B7),(0x3F478D75, 0x32CCCE34),(0x3F49B9BE, 0x335E937C),(0x3F4BEC15, 0x2FF41909),(0x3F4E248C, 0xB21760EA),(0x3F506334, 0x3283628B),(0x3F52A81E, 0x3340F500),(0x3F54F35B, 0x331202BD),(0x3F5744FD, 0x32B66A3E),(0x3F599D16, 0x32D0D9B1),(0x3F5BFBB8, 0x332ED93F),(0x3F5E60F5, 0x3350A709),(0x3F60CCDF, 0x32025744),(0x3F633F89, 0xB33A7C4D),(0x3F65B907, 0x321DA4E9),(0x3F68396A, 0xB2FF36A7),(0x3F6AC0C7, 0x3217E40E),(0x3F6D4F30, 0xB2400CBB),(0x3F6FE4BA, 0x331A2ACC),(0x3F728177, 0xB2B7D3E5),(0x3F75257D, 0xB1FED2BE),(0x3F77D0DF, 0xB32B73BA),(0x3F7A83B3, 0x32579081),(0x3F7D3E0C, 0xB19726B5),(0x3F800000, 0x00000000),(0x3F8164D2, 0x320C09FB),(0x3F82CD87, 0x3391E031),(0x3F843A29, 0x33287EEF),(0x3F85AAC3, 0xB38F6665),(0x3F871F62, 0x339004AB),(0x3F88980F, 0x33AC4561),(0x3F8A14D5, 0xB39CDAEA),(0x3F8B95C2, 0x32949D5C),(0x3F8D1ADF, 0xB36F79FA),(0x3F8EA43A, 0x33971DC2),(0x3F9031DC, 0xB32BD022),(0x3F91C3D3, 0xB3928952),(0x3F935A2B, 0xB2EBFECF),(0x3F94F4F0, 0x3357B8BB),(0x3F96942D, 0xB307353B),(0x3F9837F0, 0xB345DFE9),(0x3F99E046, 0x3382A804),(0x3F9B8D3A, 0x3326993E),(0x3F9D3EDA, 0x3350A029),(0x3F9EF532, 0xB3605F62),(0x3FA0B051, 0xB210909B),(0x3FA27043, 0xB0DDC369),(0x3FA43516, 0x33385844),(0x3FA5FED7, 0x33400757),(0x3FA7CD94, 0x3325446E),(0x3FA9A15B, 0x33237A50),(0x3FAB7A3A, 0x33201CA4),(0x3FAD583F, 0x32394687),(0x3FAF3B79, 0x332E1225),(0x3FB123F6, 0x33838969),(0x3FB311C4, 0xB219F2BA)]);
40
41pub(crate) static EXP2F_TABLE: [u64; 64] = [
52 0x3ff0000000000000,
53 0x3ff02c9a3e778061,
54 0x3ff059b0d3158574,
55 0x3ff0874518759bc8,
56 0x3ff0b5586cf9890f,
57 0x3ff0e3ec32d3d1a2,
58 0x3ff11301d0125b51,
59 0x3ff1429aaea92de0,
60 0x3ff172b83c7d517b,
61 0x3ff1a35beb6fcb75,
62 0x3ff1d4873168b9aa,
63 0x3ff2063b88628cd6,
64 0x3ff2387a6e756238,
65 0x3ff26b4565e27cdd,
66 0x3ff29e9df51fdee1,
67 0x3ff2d285a6e4030b,
68 0x3ff306fe0a31b715,
69 0x3ff33c08b26416ff,
70 0x3ff371a7373aa9cb,
71 0x3ff3a7db34e59ff7,
72 0x3ff3dea64c123422,
73 0x3ff4160a21f72e2a,
74 0x3ff44e086061892d,
75 0x3ff486a2b5c13cd0,
76 0x3ff4bfdad5362a27,
77 0x3ff4f9b2769d2ca7,
78 0x3ff5342b569d4f82,
79 0x3ff56f4736b527da,
80 0x3ff5ab07dd485429,
81 0x3ff5e76f15ad2148,
82 0x3ff6247eb03a5585,
83 0x3ff6623882552225,
84 0x3ff6a09e667f3bcd,
85 0x3ff6dfb23c651a2f,
86 0x3ff71f75e8ec5f74,
87 0x3ff75feb564267c9,
88 0x3ff7a11473eb0187,
89 0x3ff7e2f336cf4e62,
90 0x3ff82589994cce13,
91 0x3ff868d99b4492ed,
92 0x3ff8ace5422aa0db,
93 0x3ff8f1ae99157736,
94 0x3ff93737b0cdc5e5,
95 0x3ff97d829fde4e50,
96 0x3ff9c49182a3f090,
97 0x3ffa0c667b5de565,
98 0x3ffa5503b23e255d,
99 0x3ffa9e6b5579fdbf,
100 0x3ffae89f995ad3ad,
101 0x3ffb33a2b84f15fb,
102 0x3ffb7f76f2fb5e47,
103 0x3ffbcc1e904bc1d2,
104 0x3ffc199bdd85529c,
105 0x3ffc67f12e57d14b,
106 0x3ffcb720dcef9069,
107 0x3ffd072d4a07897c,
108 0x3ffd5818dcfba487,
109 0x3ffda9e603db3285,
110 0x3ffdfc97337b9b5f,
111 0x3ffe502ee78b3ff6,
112 0x3ffea4afa2a490da,
113 0x3ffefa1bee615a27,
114 0x3fff50765b6e4540,
115 0x3fffa7c1819e90d8,
116];
117
118#[inline(always)]
145fn exp2f_gen<B: ExpfBackend>(x: f32, backend: B) -> f32 {
146 let mut t = x.to_bits();
147
148 if (t & 0xffff) == 0 {
149 let k: i32 = (((t >> 23) & 0xff) as i32).wrapping_sub(127); if k >= 0 && k < 9 && (t << (9i32.wrapping_add(k))) == 0 {
152 let msk = (t as i32) >> 31;
154 let mut m: i32 = (((t & 0x7fffff) | (1 << 23)) >> (23 - k)) as i32;
155 m = (m ^ msk).wrapping_sub(msk).wrapping_add(127);
156 if m > 0 && m < 255 {
157 t = (m as u32).wrapping_shl(23);
158 return f32::from_bits(t);
159 } else if m <= 0 && m > -23 {
160 t = 1i32.wrapping_shl(22i32.wrapping_add(m) as u32) as u32;
161 return f32::from_bits(t);
162 }
163 }
164 }
165 let ux = t.wrapping_shl(1);
166
167 if ux >= 0x86000000u32 || ux < 0x65000000u32 {
168 if ux < 0x65000000u32 {
170 return 1.0 + x;
171 } if !(t >= 0xc3000000u32 && t < 0xc3150000u32) {
174 if ux >= 0xffu32 << 24 {
175 if ux > (0xffu32 << 24) {
177 return x + x;
178 } static IR: [f32; 2] = [f32::INFINITY, 0.];
180 return IR[(t >> 31) as usize]; }
182 if t >= 0xc3150000u32 {
183 let z = x;
185 let mut y = f_fmla(
186 z as f64 + 149.,
187 f64::from_bits(0x3690000000000000),
188 f64::from_bits(0x36a0000000000000),
189 );
190 y = y.max(f64::from_bits(0x3680000000000000));
191 return y as f32;
192 }
193 let r = black_box(f64::from_bits(0x47e0000000000000))
195 * black_box(f64::from_bits(0x47e0000000000000));
196 return r as f32;
197 }
198 }
199
200 if ux <= 0x7a000000u32 {
201 const C: [u64; 6] = [
210 0x3fe62e42fefa39f3,
211 0x3fcebfbdff82c57b,
212 0x3fac6b08d6f2d7aa,
213 0x3f83b2ab6fc92f5d,
214 0x3f55d897cfe27125,
215 0x3f243090e61e6af1,
216 ];
217 let xd = x as f64;
218 let p = backend.polyeval6(
219 xd,
220 f64::from_bits(C[0]),
221 f64::from_bits(C[1]),
222 f64::from_bits(C[2]),
223 f64::from_bits(C[3]),
224 f64::from_bits(C[4]),
225 f64::from_bits(C[5]),
226 );
227 return backend.fma(p, xd, 1.) as f32;
228 }
229
230 let x_d = x as f64;
231 let kf = backend.round(x_d * 64.);
232 let k = unsafe { kf.to_int_unchecked::<i32>() }; let dx = backend.fma(f64::from_bits(0xbf90000000000000), kf, x_d);
235
236 const TABLE_BITS: u32 = 6;
237 const TABLE_MASK: u64 = (1u64 << TABLE_BITS) - 1;
238
239 let exp_hi: i64 = ((k >> TABLE_BITS) as i64).wrapping_shl(52);
242
243 let mh_bits = (EXP2F_TABLE[((k as u64) & TABLE_MASK) as usize] as i64).wrapping_add(exp_hi);
246 let mh = f64::from_bits(mh_bits as u64);
247
248 const C: [u64; 5] = [
252 0x3fe62e42fefa39ef,
253 0x3fcebfbdff8131c4,
254 0x3fac6b08d7061695,
255 0x3f83b2b1bee74b2a,
256 0x3f55d88091198529,
257 ];
258 let dx_sq = dx * dx;
259 let c1 = backend.fma(dx, f64::from_bits(C[0]), 1.0);
260 let c2 = backend.fma(dx, f64::from_bits(C[2]), f64::from_bits(C[1]));
261 let c3 = backend.fma(dx, f64::from_bits(C[4]), f64::from_bits(C[3]));
262 let p = backend.fma(dx_sq, c3, c2);
263 backend.fma(p, dx_sq * mh, c1 * mh) as f32
268}
269
270#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
271#[target_feature(enable = "avx", enable = "fma")]
272unsafe fn exp2f_fma_impl(x: f32) -> f32 {
273 use crate::exponents::expf::FmaBackend;
274 exp2f_gen(x, FmaBackend {})
275}
276
277#[inline]
281pub fn f_exp2f(x: f32) -> f32 {
282 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
283 {
284 exp2f_gen(x, GenericExpfBackend {})
285 }
286 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
287 {
288 use std::sync::OnceLock;
289 static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
290 let q = EXECUTOR.get_or_init(|| {
291 if std::arch::is_x86_feature_detected!("avx")
292 && std::arch::is_x86_feature_detected!("fma")
293 {
294 exp2f_fma_impl
295 } else {
296 fn def_exp2f(x: f32) -> f32 {
297 exp2f_gen(x, GenericExpfBackend {})
298 }
299 def_exp2f
300 }
301 });
302 unsafe { q(x) }
303 }
304}
305
306#[inline]
307pub(crate) fn dirty_exp2f(d: f32) -> f32 {
308 let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
309
310 let ui = f32::to_bits(d + redux);
311 let mut i0 = ui;
312 i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
313 let k = i0 / TBLSIZE as u32;
314 i0 &= TBLSIZE as u32 - 1;
315 let mut uf = f32::from_bits(ui);
316 uf -= redux;
317
318 let item = EXP2FT.0[i0 as usize];
319 let z0: f32 = f32::from_bits(item.0);
320
321 let f: f32 = d - uf;
322
323 let mut u = 0.24022650695908768;
324 u = f_fmlaf(u, f, 0.69314718055994973);
325 u *= f;
326
327 let i2 = pow2if(k as i32);
328 f_fmlaf(u, z0, z0) * i2
329}
330
331#[cfg(test)]
332mod tests {
333 use super::*;
334
335 #[test]
336 fn test_exp2f() {
337 assert_eq!(f_exp2f(1. / 64.), 1.0108893);
338 assert_eq!(f_exp2f(2.0), 4.0);
339 assert_eq!(f_exp2f(3.0), 8.0);
340 assert_eq!(f_exp2f(4.0), 16.0);
341 assert_eq!(f_exp2f(10.0), 1024.0);
342 assert_eq!(f_exp2f(-10.0), 0.0009765625);
343 assert!(f_exp2f(f32::NAN).is_nan());
344 assert_eq!(f_exp2f(-0.35), 0.7845841);
345 assert_eq!(f_exp2f(0.35), 1.2745606);
346 assert!(f_exp2f(f32::INFINITY).is_infinite());
347 assert_eq!(f_exp2f(f32::NEG_INFINITY), 0.0);
348 }
349
350 #[test]
351 fn test_dirty_exp2f() {
352 assert!((dirty_exp2f(0.35f32) - 0.35f32.exp2()).abs() < 1e-5);
353 assert!((dirty_exp2f(-0.6f32) - (-0.6f32).exp2()).abs() < 1e-5);
354 }
355}