1use crate::common::{dd_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
32use crate::exponents::fast_ldexp;
33use crate::rounding::CpuRoundTiesEven;
34use crate::shared_eval::poly_dekker_generic;
35use std::hint::black_box;
36
37static TZ: [(u64, u64); 65] = [
38 (0xbc6797d4686c5393, 0xbfcc5041854df7d4),
39 (0xbc8ea1cb9d163339, 0xbfcb881a23aebb48),
40 (0x3c8f483a3e8cd60f, 0xbfcabe60e1f21838),
41 (0x3c7dffd920f493db, 0xbfc9f3129931fab0),
42 (0xbc851bfdbb129094, 0xbfc9262c1c3430a0),
43 (0x3c8cd3e5225e2206, 0xbfc857aa375db4e4),
44 (0x3c5e3a6bdaece8f9, 0xbfc78789b0a5e0c0),
45 (0xbc8daf2ae0c2d3d4, 0xbfc6b5c7478983d8),
46 (0xbc7fd36226fadd44, 0xbfc5e25fb4fde210),
47 (0x3c7d887cd0341ab0, 0xbfc50d4fab639758),
48 (0xbc8676a52a1a618b, 0xbfc43693d679612c),
49 (0x3c79776b420ad283, 0xbfc35e28db4ecd9c),
50 (0x3c73d5fd7d70a5ed, 0xbfc2840b5836cf68),
51 (0x3c5a94ad2c8fa0bf, 0xbfc1a837e4ba3760),
52 (0x3c26ad4c353465b0, 0xbfc0caab118a1278),
53 (0xbc78bba170e59b65, 0xbfbfd6c2d0e3d910),
54 (0xbc8e1e0a76cb0685, 0xbfbe14aed893eef0),
55 (0x3c8fe131f55e75f8, 0xbfbc4f1331d22d40),
56 (0xbc8b5beee8bcee31, 0xbfba85e8c62d9c10),
57 (0xbc77fe9b02c25e9b, 0xbfb8b92870fa2b58),
58 (0xbc832ae7bdaf1116, 0xbfb6e8caff341fe8),
59 (0x3c7a6cfe58cbd73b, 0xbfb514c92f634788),
60 (0x3c68798de3138a56, 0xbfb33d1bb17df2e8),
61 (0xbc3589321a7ef10b, 0xbfb161bb26cbb590),
62 (0xbc78d0e700fcfb65, 0xbfaf0540438fd5c0),
63 (0x3c8473ef07d5dd3b, 0xbfab3f864c080000),
64 (0xbc838e62149c16e2, 0xbfa7723950130400),
65 (0xbc508bb6309bd394, 0xbfa39d4a1a77e050),
66 (0xbc8bad3fd501a227, 0xbf9f8152aee94500),
67 (0x3c63d27ac39ed253, 0xbf97b88f290230e0),
68 (0xbc8b60bbd08aac55, 0xbf8fc055004416c0),
69 (0xbc4a00d03b3359de, 0xbf7fe0154aaeed80),
70 (0x0000000000000000, 0x0000000000000000),
71 (0x3c8861931c15e39b, 0x3f80100ab00222c0),
72 (0x3c77ab864b3e9045, 0x3f90202ad5778e40),
73 (0x3c74e5659d75e95b, 0x3f984890d9043740),
74 (0x3c78e0bd083aba81, 0x3fa040ac0224fd90),
75 (0x3c345cc1cf959b1b, 0x3fa465509d383eb0),
76 (0xbc8eb6980ce14da7, 0x3fa89246d053d180),
77 (0x3c77324137d6c342, 0x3facc79f4f5613a0),
78 (0xbc45272ff30eed1b, 0x3fb082b577d34ed8),
79 (0xbc81280f19dace1c, 0x3fb2a5dd543ccc50),
80 (0xbc8d550af31c8ec3, 0x3fb4cd4fc989cd68),
81 (0x3c87923b72aa582d, 0x3fb6f91575870690),
82 (0xbc776c2e732457f1, 0x3fb92937074e0cd8),
83 (0x3c881f5c92a5200f, 0x3fbb5dbd3f681220),
84 (0x3c8e8ac7a4d3206c, 0x3fbd96b0eff0e790),
85 (0xbc712db6f4bbe33b, 0x3fbfd41afcba45e8),
86 (0xbc58c4a5df1ec7e5, 0x3fc10b022db7ae68),
87 (0xbc6bd4b1c37ea8a2, 0x3fc22e3b09dc54d8),
88 (0x3c85aeb9860044d0, 0x3fc353bc9fb00b20),
89 (0xbc64c26602c63fda, 0x3fc47b8b853aafec),
90 (0xbc87f644c1f9d314, 0x3fc5a5ac59b963cc),
91 (0x3c8f5aa8ec61fc2d, 0x3fc6d223c5b10638),
92 (0x3c27ab912c69ffeb, 0x3fc800f67b00d7b8),
93 (0xbc5b3564bc0ec9cd, 0x3fc9322934f54148),
94 (0x3c86a7062465be33, 0x3fca65c0b85ac1a8),
95 (0xbc885718d2ff1bf4, 0x3fcb9bc1d3910094),
96 (0xbc8045cb0c685e08, 0x3fccd4315e9e0834),
97 (0xbc16e7fb859d5055, 0x3fce0f143b41a554),
98 (0x3c851bbdee020603, 0x3fcf4c6f5508ee5c),
99 (0x3c6e17611afc42c5, 0x3fd04623d0b0f8c8),
100 (0xbc71c5b2e8735a43, 0x3fd0e7510fd7c564),
101 (0xbc825fe139c4cffd, 0x3fd189c1ecaeb084),
102 (0xbc789843c4964554, 0x3fd22d78f0fa061a),
103];
104
105pub(crate) static EXPM1_T0: [(u64, u64); 64] = [
106 (0x0000000000000000, 0x3ff0000000000000),
107 (0xbc719083535b085e, 0x3ff02c9a3e778061),
108 (0x3c8d73e2a475b466, 0x3ff059b0d3158574),
109 (0x3c6186be4bb28500, 0x3ff0874518759bc8),
110 (0x3c98a62e4adc610a, 0x3ff0b5586cf9890f),
111 (0x3c403a1727c57b52, 0x3ff0e3ec32d3d1a2),
112 (0xbc96c51039449b3a, 0x3ff11301d0125b51),
113 (0xbc932fbf9af1369e, 0x3ff1429aaea92de0),
114 (0xbc819041b9d78a76, 0x3ff172b83c7d517b),
115 (0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75),
116 (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
117 (0x3c8dc775814a8494, 0x3ff2063b88628cd6),
118 (0x3c99b07eb6c70572, 0x3ff2387a6e756238),
119 (0x3c82bd339940e9da, 0x3ff26b4565e27cdd),
120 (0x3c8612e8afad1256, 0x3ff29e9df51fdee1),
121 (0x3c90024754db41d4, 0x3ff2d285a6e4030b),
122 (0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
123 (0x3c932721843659a6, 0x3ff33c08b26416ff),
124 (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
125 (0xbc75e436d661f5e2, 0x3ff3a7db34e59ff7),
126 (0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
127 (0xbc5ef3691c309278, 0x3ff4160a21f72e2a),
128 (0x3c489b7a04ef80d0, 0x3ff44e086061892d),
129 (0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0),
130 (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
131 (0xbc94b309d25957e4, 0x3ff4f9b2769d2ca7),
132 (0xbc807abe1db13cac, 0x3ff5342b569d4f82),
133 (0x3c99bb2c011d93ac, 0x3ff56f4736b527da),
134 (0x3c96324c054647ac, 0x3ff5ab07dd485429),
135 (0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148),
136 (0xbc9383c17e40b496, 0x3ff6247eb03a5585),
137 (0xbc9bb60987591c34, 0x3ff6623882552225),
138 (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
139 (0xbc6bbe3a683c88aa, 0x3ff6dfb23c651a2f),
140 (0xbc816e4786887a9a, 0x3ff71f75e8ec5f74),
141 (0xbc90245957316dd4, 0x3ff75feb564267c9),
142 (0xbc841577ee049930, 0x3ff7a11473eb0187),
143 (0x3c705d02ba15797e, 0x3ff7e2f336cf4e62),
144 (0xbc9d4c1dd41532d8, 0x3ff82589994cce13),
145 (0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed),
146 (0x3c96e9f156864b26, 0x3ff8ace5422aa0db),
147 (0x3c85cc13a2e3976c, 0x3ff8f1ae99157736),
148 (0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5),
149 (0xbc9d185b7c1b85d0, 0x3ff97d829fde4e50),
150 (0x3c7c7c46b071f2be, 0x3ff9c49182a3f090),
151 (0xbc9359495d1cd532, 0x3ffa0c667b5de565),
152 (0xbc9d2f6edb8d41e2, 0x3ffa5503b23e255d),
153 (0x3c90fac90ef7fd32, 0x3ffa9e6b5579fdbf),
154 (0x3c97a1cd345dcc82, 0x3ffae89f995ad3ad),
155 (0xbc62805e3084d708, 0x3ffb33a2b84f15fb),
156 (0xbc75584f7e54ac3a, 0x3ffb7f76f2fb5e47),
157 (0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2),
158 (0x3c811065895048de, 0x3ffc199bdd85529c),
159 (0x3c92884dff483cac, 0x3ffc67f12e57d14b),
160 (0x3c7503cbd1e949dc, 0x3ffcb720dcef9069),
161 (0xbc9cbc3743797a9c, 0x3ffd072d4a07897c),
162 (0x3c82ed02d75b3706, 0x3ffd5818dcfba487),
163 (0x3c9c2300696db532, 0x3ffda9e603db3285),
164 (0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f),
165 (0x3c839e8980a9cc90, 0x3ffe502ee78b3ff6),
166 (0xbc9e9c23179c2894, 0x3ffea4afa2a490da),
167 (0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27),
168 (0x3c99d3e12dd8a18a, 0x3fff50765b6e4540),
169 (0x3c874853f3a5931e, 0x3fffa7c1819e90d8),
170];
171
172pub(crate) static EXPM1_T1: [(u64, u64); 64] = [
173 (0x0000000000000000, 0x3ff0000000000000),
174 (0x3c9ae8e38c59c72a, 0x3ff000b175effdc7),
175 (0xbc57b5d0d58ea8f4, 0x3ff00162f3904052),
176 (0x3c94115cb6b16a8e, 0x3ff0021478e11ce6),
177 (0xbc8d7c96f201bb2e, 0x3ff002c605e2e8cf),
178 (0x3c984711d4c35ea0, 0x3ff003779a95f959),
179 (0xbc80484245243778, 0x3ff0042936faa3d8),
180 (0xbc94b237da2025fa, 0x3ff004dadb113da0),
181 (0xbc75e00e62d6b30e, 0x3ff0058c86da1c0a),
182 (0x3c9a1d6cedbb9480, 0x3ff0063e3a559473),
183 (0xbc94acf197a00142, 0x3ff006eff583fc3d),
184 (0xbc6eaf2ea42391a6, 0x3ff007a1b865a8ca),
185 (0x3c7da93f90835f76, 0x3ff0085382faef83),
186 (0xbc86a79084ab093c, 0x3ff00905554425d4),
187 (0x3c986364f8fbe8f8, 0x3ff009b72f41a12b),
188 (0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd),
189 (0xbc84f6b2a7609f72, 0x3ff00b1afa5abcbf),
190 (0xbc7e1a258ea8f71a, 0x3ff00bcceb7707ec),
191 (0x3c74362ca5bc26f2, 0x3ff00c7ee448ee02),
192 (0x3c9095a56c919d02, 0x3ff00d30e4d0c483),
193 (0xbc6406ac4e81a646, 0x3ff00de2ed0ee0f5),
194 (0x3c9b5a6902767e08, 0x3ff00e94fd0398e0),
195 (0xbc991b2060859320, 0x3ff00f4714af41d3),
196 (0x3c8427068ab22306, 0x3ff00ff93412315c),
197 (0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11),
198 (0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b),
199 (0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63),
200 (0xbc734104ee7edae8, 0x3ff012c1fecd613b),
201 (0xbc72b6aeb6176892, 0x3ff0137444c9b5b5),
202 (0x3c7a8cd33b8a1bb2, 0x3ff01426927f5278),
203 (0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f),
204 (0x3c857ba2dc7e0c72, 0x3ff0158b4517bb88),
205 (0x3c9b61299ab8cdb8, 0x3ff0163da9fb3335),
206 (0xbc990565902c5f44, 0x3ff016f0169949ed),
207 (0x3c870fc41c5c2d54, 0x3ff017a28af25567),
208 (0x3c94b9a6e145d76c, 0x3ff018550706ab62),
209 (0xbc7008eff5142bfa, 0x3ff019078ad6a19f),
210 (0xbc977669f033c7de, 0x3ff019ba16628de2),
211 (0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3),
212 (0x3c9371231477ece6, 0x3ff01b1f44af9f9e),
213 (0x3c75e7626621eb5a, 0x3ff01bd1e77170b4),
214 (0xbc9bc72b100828a4, 0x3ff01c8491f08f08),
215 (0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070),
216 (0x3c816996709da2e2, 0x3ff01de9fe280ac8),
217 (0xbc8c11f5239bf536, 0x3ff01e9cbfe113ef),
218 (0x3c8e1d4eb5edc6b4, 0x3ff01f4f8958c1c6),
219 (0xbc9afb99946ee3f0, 0x3ff020025a8f6a35),
220 (0xbc98f06d8a148a32, 0x3ff020b533856324),
221 (0xbc82bf310fc54eb6, 0x3ff02168143b0281),
222 (0xbc9c95a035eb4176, 0x3ff0221afcb09e3e),
223 (0xbc9491793e46834c, 0x3ff022cdece68c4f),
224 (0xbc73e8d0d9c49090, 0x3ff02380e4dd22ad),
225 (0xbc9314aa16278aa4, 0x3ff02433e494b755),
226 (0x3c848daf888e9650, 0x3ff024e6ec0da046),
227 (0x3c856dc8046821f4, 0x3ff02599fb483385),
228 (0x3c945b42356b9d46, 0x3ff0264d1244c719),
229 (0xbc7082ef51b61d7e, 0x3ff027003103b10e),
230 (0x3c72106ed0920a34, 0x3ff027b357854772),
231 (0xbc9fd4cf26ea5d0e, 0x3ff0286685c9e059),
232 (0xbc909f8775e78084, 0x3ff02919bbd1d1d8),
233 (0x3c564cbba902ca28, 0x3ff029ccf99d720a),
234 (0x3c94383ef231d206, 0x3ff02a803f2d170d),
235 (0x3c94a47a505b3a46, 0x3ff02b338c811703),
236 (0x3c9e471202234680, 0x3ff02be6e199c811),
237];
238
239static EXPM1_DD1: [(u64, u64); 11] = [
240 (0x3c65555555555554, 0x3fc5555555555555),
241 (0x3c45555555555123, 0x3fa5555555555555),
242 (0x3c01111111118167, 0x3f81111111111111),
243 (0xbbef49f49e220cea, 0x3f56c16c16c16c17),
244 (0x3b6a019eff6f919c, 0x3f2a01a01a01a01a),
245 (0x3b39fcff48a75b41, 0x3efa01a01a01a01a),
246 (0xbb6c14f73758cd7f, 0x3ec71de3a556c734),
247 (0x3b3dfce97931018f, 0x3e927e4fb7789f5c),
248 (0x3afc513da9e4c9c5, 0x3e5ae64567f544e3),
249 (0x3acca00af84f2b60, 0x3e21eed8eff8d831),
250 (0x3a8f27ac6000898f, 0x3de6124613a86e8f),
251];
252
253static EXPM1_DD2: [(u64, u64); 7] = [
254 (0x3ff0000000000000, 0x0000000000000000),
255 (0x3fe0000000000000, 0x39c712f72ecec2cf),
256 (0x3fc5555555555555, 0x3c65555555554d07),
257 (0x3fa5555555555555, 0x3c455194d28275da),
258 (0x3f81111111111111, 0x3c012faa0e1c0f7b),
259 (0x3f56c16c16da6973, 0xbbf4ba45ab25d2a3),
260 (0x3f2a01a019eb7f31, 0xbbc9091d845ecd36),
261];
262
263#[inline]
264pub(crate) fn opoly_dd_generic<const N: usize>(
265 x: DoubleDouble,
266 poly: [(u64, u64); N],
267) -> DoubleDouble {
268 let zch = poly.last().unwrap();
269 let p0 = DoubleDouble::from_exact_add(f64::from_bits(zch.0), x.lo);
270 let ach = p0.hi;
271 let acl = f64::from_bits(zch.1) + p0.lo;
272 let mut ch = DoubleDouble::new(acl, ach);
273
274 for zch in poly.iter().rev().skip(1) {
275 ch = DoubleDouble::quick_mult_f64(ch, x.hi);
276 let z0 = DoubleDouble::from_bit_pair(*zch);
277 ch = DoubleDouble::add(z0, ch);
278 }
279
280 ch
281}
282
283#[cold]
284fn as_expm1_accurate(x: f64) -> f64 {
285 let mut ix;
286 if x.abs() < 0.25 {
287 const CL: [u64; 6] = [
288 0x3da93974a8ca5354,
289 0x3d6ae7f3e71e4908,
290 0x3d2ae7f357341648,
291 0x3ce952c7f96664cb,
292 0x3ca686f8ce633aae,
293 0x3c62f49b2fbfb5b6,
294 ];
295
296 let fl0 = f_fmla(x, f64::from_bits(CL[5]), f64::from_bits(CL[4]));
297 let fl1 = f_fmla(x, fl0, f64::from_bits(CL[3]));
298 let fl2 = f_fmla(x, fl1, f64::from_bits(CL[2]));
299 let fl3 = f_fmla(x, fl2, f64::from_bits(CL[1]));
300
301 let fl = x * f_fmla(x, fl3, f64::from_bits(CL[0]));
302 let mut f = opoly_dd_generic(DoubleDouble::new(fl, x), EXPM1_DD1);
303 f = DoubleDouble::quick_mult_f64(f, x);
304 f = DoubleDouble::quick_mult_f64(f, x);
305 f = DoubleDouble::quick_mult_f64(f, x);
306 let hx = 0.5 * x;
307 let dx2dd = DoubleDouble::from_exact_mult(x, hx);
308 f = DoubleDouble::add(dx2dd, f);
309 let v0 = DoubleDouble::from_exact_add(x, f.hi);
310 let v1 = DoubleDouble::from_exact_add(v0.lo, f.lo);
311 let v2 = DoubleDouble::from_exact_add(v0.hi, v1.hi);
312 let mut v3 = DoubleDouble::from_exact_add(v2.lo, v1.lo);
313 ix = v3.hi.to_bits();
314 if (ix & 0x000fffffffffffff) == 0 {
315 let v = v3.lo.to_bits();
316 let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64)
317 .wrapping_shl(1)
318 .wrapping_add(1) as i64;
319 ix = ix.wrapping_add(d as u64);
320 v3.hi = f64::from_bits(ix);
321 }
322 v3.hi + v2.hi
323 } else {
324 const S: f64 = f64::from_bits(0x40b71547652b82fe);
325 let t = (x * S).cpu_round_ties_even();
326 let jt: i64 = t as i64;
327 let i0 = (jt >> 6) & 0x3f;
328 let i1 = jt & 0x3f;
329 let ie = jt >> 12;
330 let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i0 as usize]);
331 let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
332
333 let bt = DoubleDouble::quick_mult(t0, t1);
334
335 const L2H: f64 = f64::from_bits(0x3f262e42ff000000);
336 const L2L: f64 = f64::from_bits(0x3d0718432a1b0e26);
337 const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3);
338
339 let dx = f_fmla(-L2H, t, x);
340 let dxl = L2L * t;
341 let dxll = f_fmla(L2LL, t, dd_fmla(L2L, t, -dxl));
342 let dxh = dx + dxl;
343 let dxl = (dx - dxh) + dxl + dxll;
344 let mut f = poly_dekker_generic(DoubleDouble::new(dxl, dxh), EXPM1_DD2);
345 f = DoubleDouble::quick_mult(DoubleDouble::new(dxl, dxh), f);
346 f = DoubleDouble::quick_mult(f, bt);
347 f = DoubleDouble::add(bt, f);
348 let off: u64 = (2048i64 + 1023i64).wrapping_sub(ie).wrapping_shl(52) as u64;
349 let e: f64;
350 if ie < 53 {
351 let fhz = DoubleDouble::from_exact_add(f64::from_bits(off), f.hi);
352 f.hi = fhz.hi;
353 e = fhz.lo;
354 } else if ie < 104 {
355 let fhz = DoubleDouble::from_exact_add(f.hi, f64::from_bits(off));
356 f.hi = fhz.hi;
357 e = fhz.lo;
358 } else {
359 e = 0.;
360 }
361 f.lo += e;
362 let dst = DoubleDouble::from_exact_add(f.hi, f.lo);
363 fast_ldexp(dst.hi, ie as i32)
364 }
365}
366
367#[inline(always)]
368fn expm1_gen<B: ExpfBackend>(x: f64, backend: B) -> f64 {
369 let ix = x.to_bits();
370 let aix: u64 = ix & 0x7fff_ffff_ffff_ffff;
371 if aix < 0x3fd0000000000000u64 {
372 if aix < 0x3ca0000000000000u64 {
373 if aix == 0 {
374 return x;
375 }
376 return backend.dyad_fma(f64::from_bits(0x3c90000000000000), x.abs(), x);
377 }
378 let sx = f64::from_bits(0x4060000000000000) * x;
379 let fx = backend.round_ties_even(sx);
380 let z = sx - fx;
381 let z2 = z * z;
382 let i: i64 = unsafe {
383 fx.to_int_unchecked::<i64>() };
385 let t = DoubleDouble::from_bit_pair(TZ[i.wrapping_add(32) as usize]);
386 const C: [u64; 6] = [
387 0x3f80000000000000,
388 0x3f00000000000000,
389 0x3e755555555551ad,
390 0x3de555555555599c,
391 0x3d511111ad1ad69d,
392 0x3cb6c16c168b1fb5,
393 ];
394 let fh = z * f64::from_bits(C[0]);
395
396 let fl0 = backend.fma(z, f64::from_bits(C[5]), f64::from_bits(C[4]));
397 let fl1 = backend.fma(z, f64::from_bits(C[2]), f64::from_bits(C[1]));
398
399 let fw0 = backend.fma(z, fl0, f64::from_bits(C[3]));
400
401 let fl = z2 * backend.fma(z2, fw0, fl1);
402 let mut f = DoubleDouble::new(fl, fh);
403 let e0 = f64::from_bits(0x3bea000000000000);
404 let eps = z2 * e0 + f64::from_bits(0x3970000000000000);
405 let mut r = DoubleDouble::from_exact_add(t.hi, f.hi);
406 r.lo += t.lo + f.lo;
407 f = backend.quick_mult(t, f);
408 f = DoubleDouble::add(r, f);
409 let ub = f.hi + (f.lo + eps);
410 let lb = f.hi + (f.lo - eps);
411 if ub != lb {
412 return as_expm1_accurate(x);
413 }
414 lb
415 } else {
416 if aix >= 0x40862e42fefa39f0u64 {
417 if aix > 0x7ff0000000000000u64 {
418 return x + x;
419 } if aix == 0x7ff0000000000000u64 {
421 return if (ix >> 63) != 0 { -1.0 } else { x };
422 }
423 if (ix >> 63) == 0 {
424 const Z: f64 = f64::from_bits(0x7fe0000000000000);
425 return black_box(Z) * black_box(Z);
426 }
427 }
428 if ix >= 0xc0425e4f7b2737fau64 {
429 if ix >= 0xc042b708872320e2u64 {
430 return black_box(-1.0) + black_box(f64::from_bits(0x3c80000000000000));
431 }
432 return (f64::from_bits(0x40425e4f7b2737fa) + x + f64::from_bits(0x3cc8486612173c69))
433 * f64::from_bits(0x3c971547652b82fe)
434 - f64::from_bits(0x3fefffffffffffff);
435 }
436
437 const S: f64 = f64::from_bits(0x40b71547652b82fe);
438 let t = backend.round_ties_even(x * S);
439 let jt: i64 = unsafe {
440 t.to_int_unchecked::<i64>() };
442 let i0 = (jt >> 6) & 0x3f;
443 let i1 = jt & 0x3f;
444 let ie = jt >> 12;
445 let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i0 as usize]);
446 let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
447
448 let bt = backend.quick_mult(t0, t1);
449
450 const L2H: f64 = f64::from_bits(0x3f262e42ff000000);
451 const L2L: f64 = f64::from_bits(0x3d0718432a1b0e26);
452 let dx = backend.dd_fma(L2L, t, backend.fma(-L2H, t, x));
453 let dx2 = dx * dx;
454
455 const CH: [u64; 4] = [
456 0x3ff0000000000000,
457 0x3fe0000000000000,
458 0x3fc55555557e54ff,
459 0x3fa55555553a12f4,
460 ];
461
462 let p0 = backend.fma(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
463 let p1 = backend.fma(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
464
465 let p = backend.fma(dx2, p0, p1);
466 let mut fh = bt.hi;
467 let tx = bt.hi * dx;
468 let mut fl = backend.fma(tx, p, bt.lo);
469 let eps = f64::from_bits(0x3c0833beace2b6fe) * bt.hi;
470
471 let off: u64 = ((2048i64 + 1023i64).wrapping_sub(ie) as u64).wrapping_shl(52);
472 let e: f64;
473 if ie < 53 {
474 let flz = DoubleDouble::from_exact_add(f64::from_bits(off), fh);
475 e = flz.lo;
476 fh = flz.hi;
477 } else if ie < 75 {
478 let flz = DoubleDouble::from_exact_add(fh, f64::from_bits(off));
479 e = flz.lo;
480 fh = flz.hi;
481 } else {
482 e = 0.;
483 }
484 fl += e;
485 let ub = fh + (fl + eps);
486 let lb = fh + (fl - eps);
487 if ub != lb {
488 return as_expm1_accurate(x);
489 }
490 fast_ldexp(lb, ie as i32)
491 }
492}
493
494#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
495#[target_feature(enable = "avx", enable = "fma")]
496unsafe fn expm1_fma_impl(x: f64) -> f64 {
497 use crate::exponents::expf::FmaBackend;
498 expm1_gen(x, FmaBackend {})
499}
500
501pub fn f_expm1(x: f64) -> f64 {
505 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
506 {
507 expm1_gen(x, GenericExpfBackend {})
508 }
509 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
510 {
511 use std::sync::OnceLock;
512 static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
513 let q = EXECUTOR.get_or_init(|| {
514 if std::arch::is_x86_feature_detected!("avx")
515 && std::arch::is_x86_feature_detected!("fma")
516 {
517 expm1_fma_impl
518 } else {
519 fn def_expm1(x: f64) -> f64 {
520 expm1_gen(x, GenericExpfBackend {})
521 }
522 def_expm1
523 }
524 });
525 unsafe { q(x) }
526 }
527}
528
529#[cfg(test)]
530mod tests {
531 use super::*;
532
533 #[test]
534 fn test_expm1() {
535 assert_eq!(f_expm1(0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000028321017343872864),
536 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000028321017343872864 );
537 assert_eq!(f_expm1(36.52188110363568), 7265264535836525.);
538 assert_eq!(f_expm1(2.), 6.38905609893065);
539 assert_eq!(f_expm1(0.4321321), 0.5405386068701713);
540 assert_eq!(f_expm1(-0.0000004321321), -4.321320066309375e-7);
541 }
542}