1use crate::common::dd_fmla;
30use crate::double_double::DoubleDouble;
31use crate::exponents::auxiliary::fast_ldexp;
32use crate::exponents::exp::{EXP_REDUCE_T0, EXP_REDUCE_T1, to_denormal};
33use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
34use crate::rounding::CpuRoundTiesEven;
35
36#[inline]
37fn exp2_poly_dd(z: f64) -> DoubleDouble {
38 const C: [(u64, u64); 6] = [
39 (0x3bbabc9e3b39873e, 0x3f262e42fefa39ef),
40 (0xbae5e43a53e44950, 0x3e4ebfbdff82c58f),
41 (0xba0d3a15710d3d83, 0x3d6c6b08d704a0c0),
42 (0x3914dd5d2a5e025a, 0x3c83b2ab6fba4e77),
43 (0xb83dc47e47beb9dd, 0x3b95d87fe7a66459),
44 (0xb744fcd51fcb7640, 0x3aa430912f9fb79d),
45 ];
46
47 let mut r = DoubleDouble::quick_mul_f64_add(
48 DoubleDouble::from_bit_pair(C[5]),
49 z,
50 DoubleDouble::from_bit_pair(C[4]),
51 );
52 r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[3]));
53 r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[2]));
54 r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[1]));
55 DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(C[0]))
56}
57
58#[cold]
59fn exp2_accurate(x: f64) -> f64 {
60 let mut ix = x.to_bits();
61 let sx = 4096.0 * x;
62 let fx = sx.cpu_round_ties_even();
63 let z = sx - fx;
64 let k: i64 = unsafe {
65 fx.to_int_unchecked::<i64>() };
67 let i1 = k & 0x3f;
68 let i0 = (k >> 6) & 0x3f;
69 let ie = k >> 12;
70
71 let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
72 let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
73 let dt = DoubleDouble::quick_mult(t0, t1);
74
75 let mut f = exp2_poly_dd(z);
76 f = DoubleDouble::quick_mult_f64(f, z);
77 if ix <= 0xc08ff00000000000u64 {
78 if f64::from_bits(0xbc971547652b82fe) <= x && x <= f64::from_bits(0x3ca71547652b82fd) {
82 return dd_fmla(x, 0.5, 1.0);
83 } else if (k & 0xfff) == 0 {
84 let zf = DoubleDouble::from_exact_add(dt.hi, f.hi);
86 let zfl = DoubleDouble::from_exact_add(zf.lo, f.lo);
87 f.hi = zf.hi;
88 f.lo = zfl.hi;
89 ix = zfl.hi.to_bits();
90 if ix & 0x000fffffffffffff == 0 {
91 if ((ix >> 52) & 0x7ff) != 0 {
93 let v = zfl.lo.to_bits();
95 let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64)
96 .wrapping_shl(1)
97 .wrapping_add(1) as i64;
98 ix = ix.wrapping_add(d as u64);
99 f.lo = f64::from_bits(ix);
100 }
101 }
102 } else {
103 f = DoubleDouble::quick_mult(f, dt);
104 f = DoubleDouble::add(dt, f);
105 }
106 let hf = DoubleDouble::from_exact_add(f.hi, f.lo);
107
108 fast_ldexp(hf.hi, ie as i32)
109 } else {
110 ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
111 f = DoubleDouble::quick_mult(f, dt);
112 f = DoubleDouble::add(dt, f);
113 let zve = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
114 f.hi = zve.hi;
115 f.lo += zve.lo;
116
117 to_denormal(f.to_f64())
118 }
119}
120
121#[inline(always)]
122fn exp2_gen<B: ExpfBackend>(x: f64, backend: B) -> f64 {
123 let mut ix = x.to_bits();
124 let ax = ix.wrapping_shl(1);
125 if ax == 0 {
126 return 1.0;
127 }
128 if ax >= 0x8120000000000000u64 {
129 if ax > 0xffe0000000000000u64 {
131 return x + x; }
133 if ax == 0xffe0000000000000u64 {
134 return if (ix >> 63) != 0 { 0.0 } else { x };
135 }
136 if (ix >> 63) != 0 {
138 if ix >= 0xc090cc0000000000u64 {
140 const Z: f64 = f64::from_bits(0x0010000000000000);
142 return Z * Z;
143 }
144 } else {
145 return f64::from_bits(0x7fe0000000000000) * x;
147 }
148 }
149
150 if ax <= 0x792e2a8eca5705fcu64 {
153 return 1.0 + f64::copysign(f64::from_bits(0x3c90000000000000), x);
154 }
155
156 let m = ix.wrapping_shl(12);
157 let ex = (ax >> 53).wrapping_sub(0x3ff);
158 let frac = ex >> 63 | m << (ex & 63);
159 let sx = 4096.0 * x;
160 let fx = backend.round_ties_even(sx);
161 let z = sx - fx;
162 let z2 = z * z;
163 let k = unsafe {
164 fx.to_int_unchecked::<i64>() };
166 let i1 = k & 0x3f;
167 let i0 = (k >> 6) & 0x3f;
168 let ie = k >> 12;
169 let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
170 let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
171 let ti0 = backend.quick_mult(t0, t1);
172 const C: [u64; 4] = [
173 0x3f262e42fefa39ef,
174 0x3e4ebfbdff82c58f,
175 0x3d6c6b08d73b3e01,
176 0x3c83b2ab6fdda001,
177 ];
178 let tz = ti0.hi * z;
179 let mut fh = ti0.hi;
180
181 let p0 = backend.fma(z, f64::from_bits(C[1]), f64::from_bits(C[0]));
182 let p1 = backend.fma(z, f64::from_bits(C[3]), f64::from_bits(C[2]));
183 let p2 = backend.fma(z2, p1, p0);
184
185 let mut fl = backend.fma(tz, p2, ti0.lo);
186
187 const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe);
188
189 if ix <= 0xc08ff00000000000u64 {
190 if frac != 0 {
192 let ub = fh + (fl + EPS);
193 fh += fl - EPS;
194 if ub != fh {
195 return exp2_accurate(x);
196 }
197 }
198 fh = fast_ldexp(fh, ie as i32);
199 } else {
200 ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
202 let rs = DoubleDouble::from_exact_add(f64::from_bits(ix), fh);
203 fl += rs.lo;
204 fh = rs.hi;
205 if frac != 0 {
206 let ub = fh + (fl + EPS);
207 fh += fl - EPS;
208 if ub != fh {
209 return exp2_accurate(x);
210 }
211 }
212 fh = to_denormal(fh);
214 }
215 fh
216}
217
218#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
219#[target_feature(enable = "avx", enable = "fma")]
220unsafe fn exp2_fma_impl(x: f64) -> f64 {
221 use crate::exponents::expf::FmaBackend;
222 exp2_gen(x, FmaBackend {})
223}
224
225pub fn f_exp2(x: f64) -> f64 {
229 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
230 {
231 exp2_gen(x, GenericExpfBackend {})
232 }
233 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
234 {
235 use std::sync::OnceLock;
236 static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
237 let q = EXECUTOR.get_or_init(|| {
238 if std::arch::is_x86_feature_detected!("avx")
239 && std::arch::is_x86_feature_detected!("fma")
240 {
241 exp2_fma_impl
242 } else {
243 fn def_exp2(x: f64) -> f64 {
244 exp2_gen(x, GenericExpfBackend {})
245 }
246 def_exp2
247 }
248 });
249 unsafe { q(x) }
250 }
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256
257 #[test]
258 fn test_exp2d() {
259 assert_eq!(f_exp2(2.0), 4.0);
260 assert_eq!(f_exp2(3.0), 8.0);
261 assert_eq!(f_exp2(4.0), 16.0);
262 assert_eq!(f_exp2(0.35f64), 1.2745606273192622);
263 assert_eq!(f_exp2(-0.6f64), 0.6597539553864471);
264 assert_eq!(f_exp2(f64::INFINITY), f64::INFINITY);
265 assert_eq!(f_exp2(f64::NEG_INFINITY), 0.);
266 assert!(f_exp2(f64::NAN).is_nan());
267 }
268}