pxfm/exponents/
expm1.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
32use crate::exponents::fast_ldexp;
33use crate::rounding::CpuRoundTiesEven;
34use crate::shared_eval::poly_dekker_generic;
35use std::hint::black_box;
36
37static TZ: [(u64, u64); 65] = [
38    (0xbc6797d4686c5393, 0xbfcc5041854df7d4),
39    (0xbc8ea1cb9d163339, 0xbfcb881a23aebb48),
40    (0x3c8f483a3e8cd60f, 0xbfcabe60e1f21838),
41    (0x3c7dffd920f493db, 0xbfc9f3129931fab0),
42    (0xbc851bfdbb129094, 0xbfc9262c1c3430a0),
43    (0x3c8cd3e5225e2206, 0xbfc857aa375db4e4),
44    (0x3c5e3a6bdaece8f9, 0xbfc78789b0a5e0c0),
45    (0xbc8daf2ae0c2d3d4, 0xbfc6b5c7478983d8),
46    (0xbc7fd36226fadd44, 0xbfc5e25fb4fde210),
47    (0x3c7d887cd0341ab0, 0xbfc50d4fab639758),
48    (0xbc8676a52a1a618b, 0xbfc43693d679612c),
49    (0x3c79776b420ad283, 0xbfc35e28db4ecd9c),
50    (0x3c73d5fd7d70a5ed, 0xbfc2840b5836cf68),
51    (0x3c5a94ad2c8fa0bf, 0xbfc1a837e4ba3760),
52    (0x3c26ad4c353465b0, 0xbfc0caab118a1278),
53    (0xbc78bba170e59b65, 0xbfbfd6c2d0e3d910),
54    (0xbc8e1e0a76cb0685, 0xbfbe14aed893eef0),
55    (0x3c8fe131f55e75f8, 0xbfbc4f1331d22d40),
56    (0xbc8b5beee8bcee31, 0xbfba85e8c62d9c10),
57    (0xbc77fe9b02c25e9b, 0xbfb8b92870fa2b58),
58    (0xbc832ae7bdaf1116, 0xbfb6e8caff341fe8),
59    (0x3c7a6cfe58cbd73b, 0xbfb514c92f634788),
60    (0x3c68798de3138a56, 0xbfb33d1bb17df2e8),
61    (0xbc3589321a7ef10b, 0xbfb161bb26cbb590),
62    (0xbc78d0e700fcfb65, 0xbfaf0540438fd5c0),
63    (0x3c8473ef07d5dd3b, 0xbfab3f864c080000),
64    (0xbc838e62149c16e2, 0xbfa7723950130400),
65    (0xbc508bb6309bd394, 0xbfa39d4a1a77e050),
66    (0xbc8bad3fd501a227, 0xbf9f8152aee94500),
67    (0x3c63d27ac39ed253, 0xbf97b88f290230e0),
68    (0xbc8b60bbd08aac55, 0xbf8fc055004416c0),
69    (0xbc4a00d03b3359de, 0xbf7fe0154aaeed80),
70    (0x0000000000000000, 0x0000000000000000),
71    (0x3c8861931c15e39b, 0x3f80100ab00222c0),
72    (0x3c77ab864b3e9045, 0x3f90202ad5778e40),
73    (0x3c74e5659d75e95b, 0x3f984890d9043740),
74    (0x3c78e0bd083aba81, 0x3fa040ac0224fd90),
75    (0x3c345cc1cf959b1b, 0x3fa465509d383eb0),
76    (0xbc8eb6980ce14da7, 0x3fa89246d053d180),
77    (0x3c77324137d6c342, 0x3facc79f4f5613a0),
78    (0xbc45272ff30eed1b, 0x3fb082b577d34ed8),
79    (0xbc81280f19dace1c, 0x3fb2a5dd543ccc50),
80    (0xbc8d550af31c8ec3, 0x3fb4cd4fc989cd68),
81    (0x3c87923b72aa582d, 0x3fb6f91575870690),
82    (0xbc776c2e732457f1, 0x3fb92937074e0cd8),
83    (0x3c881f5c92a5200f, 0x3fbb5dbd3f681220),
84    (0x3c8e8ac7a4d3206c, 0x3fbd96b0eff0e790),
85    (0xbc712db6f4bbe33b, 0x3fbfd41afcba45e8),
86    (0xbc58c4a5df1ec7e5, 0x3fc10b022db7ae68),
87    (0xbc6bd4b1c37ea8a2, 0x3fc22e3b09dc54d8),
88    (0x3c85aeb9860044d0, 0x3fc353bc9fb00b20),
89    (0xbc64c26602c63fda, 0x3fc47b8b853aafec),
90    (0xbc87f644c1f9d314, 0x3fc5a5ac59b963cc),
91    (0x3c8f5aa8ec61fc2d, 0x3fc6d223c5b10638),
92    (0x3c27ab912c69ffeb, 0x3fc800f67b00d7b8),
93    (0xbc5b3564bc0ec9cd, 0x3fc9322934f54148),
94    (0x3c86a7062465be33, 0x3fca65c0b85ac1a8),
95    (0xbc885718d2ff1bf4, 0x3fcb9bc1d3910094),
96    (0xbc8045cb0c685e08, 0x3fccd4315e9e0834),
97    (0xbc16e7fb859d5055, 0x3fce0f143b41a554),
98    (0x3c851bbdee020603, 0x3fcf4c6f5508ee5c),
99    (0x3c6e17611afc42c5, 0x3fd04623d0b0f8c8),
100    (0xbc71c5b2e8735a43, 0x3fd0e7510fd7c564),
101    (0xbc825fe139c4cffd, 0x3fd189c1ecaeb084),
102    (0xbc789843c4964554, 0x3fd22d78f0fa061a),
103];
104
105pub(crate) static EXPM1_T0: [(u64, u64); 64] = [
106    (0x0000000000000000, 0x3ff0000000000000),
107    (0xbc719083535b085e, 0x3ff02c9a3e778061),
108    (0x3c8d73e2a475b466, 0x3ff059b0d3158574),
109    (0x3c6186be4bb28500, 0x3ff0874518759bc8),
110    (0x3c98a62e4adc610a, 0x3ff0b5586cf9890f),
111    (0x3c403a1727c57b52, 0x3ff0e3ec32d3d1a2),
112    (0xbc96c51039449b3a, 0x3ff11301d0125b51),
113    (0xbc932fbf9af1369e, 0x3ff1429aaea92de0),
114    (0xbc819041b9d78a76, 0x3ff172b83c7d517b),
115    (0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75),
116    (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
117    (0x3c8dc775814a8494, 0x3ff2063b88628cd6),
118    (0x3c99b07eb6c70572, 0x3ff2387a6e756238),
119    (0x3c82bd339940e9da, 0x3ff26b4565e27cdd),
120    (0x3c8612e8afad1256, 0x3ff29e9df51fdee1),
121    (0x3c90024754db41d4, 0x3ff2d285a6e4030b),
122    (0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
123    (0x3c932721843659a6, 0x3ff33c08b26416ff),
124    (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
125    (0xbc75e436d661f5e2, 0x3ff3a7db34e59ff7),
126    (0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
127    (0xbc5ef3691c309278, 0x3ff4160a21f72e2a),
128    (0x3c489b7a04ef80d0, 0x3ff44e086061892d),
129    (0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0),
130    (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
131    (0xbc94b309d25957e4, 0x3ff4f9b2769d2ca7),
132    (0xbc807abe1db13cac, 0x3ff5342b569d4f82),
133    (0x3c99bb2c011d93ac, 0x3ff56f4736b527da),
134    (0x3c96324c054647ac, 0x3ff5ab07dd485429),
135    (0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148),
136    (0xbc9383c17e40b496, 0x3ff6247eb03a5585),
137    (0xbc9bb60987591c34, 0x3ff6623882552225),
138    (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
139    (0xbc6bbe3a683c88aa, 0x3ff6dfb23c651a2f),
140    (0xbc816e4786887a9a, 0x3ff71f75e8ec5f74),
141    (0xbc90245957316dd4, 0x3ff75feb564267c9),
142    (0xbc841577ee049930, 0x3ff7a11473eb0187),
143    (0x3c705d02ba15797e, 0x3ff7e2f336cf4e62),
144    (0xbc9d4c1dd41532d8, 0x3ff82589994cce13),
145    (0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed),
146    (0x3c96e9f156864b26, 0x3ff8ace5422aa0db),
147    (0x3c85cc13a2e3976c, 0x3ff8f1ae99157736),
148    (0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5),
149    (0xbc9d185b7c1b85d0, 0x3ff97d829fde4e50),
150    (0x3c7c7c46b071f2be, 0x3ff9c49182a3f090),
151    (0xbc9359495d1cd532, 0x3ffa0c667b5de565),
152    (0xbc9d2f6edb8d41e2, 0x3ffa5503b23e255d),
153    (0x3c90fac90ef7fd32, 0x3ffa9e6b5579fdbf),
154    (0x3c97a1cd345dcc82, 0x3ffae89f995ad3ad),
155    (0xbc62805e3084d708, 0x3ffb33a2b84f15fb),
156    (0xbc75584f7e54ac3a, 0x3ffb7f76f2fb5e47),
157    (0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2),
158    (0x3c811065895048de, 0x3ffc199bdd85529c),
159    (0x3c92884dff483cac, 0x3ffc67f12e57d14b),
160    (0x3c7503cbd1e949dc, 0x3ffcb720dcef9069),
161    (0xbc9cbc3743797a9c, 0x3ffd072d4a07897c),
162    (0x3c82ed02d75b3706, 0x3ffd5818dcfba487),
163    (0x3c9c2300696db532, 0x3ffda9e603db3285),
164    (0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f),
165    (0x3c839e8980a9cc90, 0x3ffe502ee78b3ff6),
166    (0xbc9e9c23179c2894, 0x3ffea4afa2a490da),
167    (0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27),
168    (0x3c99d3e12dd8a18a, 0x3fff50765b6e4540),
169    (0x3c874853f3a5931e, 0x3fffa7c1819e90d8),
170];
171
172pub(crate) static EXPM1_T1: [(u64, u64); 64] = [
173    (0x0000000000000000, 0x3ff0000000000000),
174    (0x3c9ae8e38c59c72a, 0x3ff000b175effdc7),
175    (0xbc57b5d0d58ea8f4, 0x3ff00162f3904052),
176    (0x3c94115cb6b16a8e, 0x3ff0021478e11ce6),
177    (0xbc8d7c96f201bb2e, 0x3ff002c605e2e8cf),
178    (0x3c984711d4c35ea0, 0x3ff003779a95f959),
179    (0xbc80484245243778, 0x3ff0042936faa3d8),
180    (0xbc94b237da2025fa, 0x3ff004dadb113da0),
181    (0xbc75e00e62d6b30e, 0x3ff0058c86da1c0a),
182    (0x3c9a1d6cedbb9480, 0x3ff0063e3a559473),
183    (0xbc94acf197a00142, 0x3ff006eff583fc3d),
184    (0xbc6eaf2ea42391a6, 0x3ff007a1b865a8ca),
185    (0x3c7da93f90835f76, 0x3ff0085382faef83),
186    (0xbc86a79084ab093c, 0x3ff00905554425d4),
187    (0x3c986364f8fbe8f8, 0x3ff009b72f41a12b),
188    (0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd),
189    (0xbc84f6b2a7609f72, 0x3ff00b1afa5abcbf),
190    (0xbc7e1a258ea8f71a, 0x3ff00bcceb7707ec),
191    (0x3c74362ca5bc26f2, 0x3ff00c7ee448ee02),
192    (0x3c9095a56c919d02, 0x3ff00d30e4d0c483),
193    (0xbc6406ac4e81a646, 0x3ff00de2ed0ee0f5),
194    (0x3c9b5a6902767e08, 0x3ff00e94fd0398e0),
195    (0xbc991b2060859320, 0x3ff00f4714af41d3),
196    (0x3c8427068ab22306, 0x3ff00ff93412315c),
197    (0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11),
198    (0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b),
199    (0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63),
200    (0xbc734104ee7edae8, 0x3ff012c1fecd613b),
201    (0xbc72b6aeb6176892, 0x3ff0137444c9b5b5),
202    (0x3c7a8cd33b8a1bb2, 0x3ff01426927f5278),
203    (0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f),
204    (0x3c857ba2dc7e0c72, 0x3ff0158b4517bb88),
205    (0x3c9b61299ab8cdb8, 0x3ff0163da9fb3335),
206    (0xbc990565902c5f44, 0x3ff016f0169949ed),
207    (0x3c870fc41c5c2d54, 0x3ff017a28af25567),
208    (0x3c94b9a6e145d76c, 0x3ff018550706ab62),
209    (0xbc7008eff5142bfa, 0x3ff019078ad6a19f),
210    (0xbc977669f033c7de, 0x3ff019ba16628de2),
211    (0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3),
212    (0x3c9371231477ece6, 0x3ff01b1f44af9f9e),
213    (0x3c75e7626621eb5a, 0x3ff01bd1e77170b4),
214    (0xbc9bc72b100828a4, 0x3ff01c8491f08f08),
215    (0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070),
216    (0x3c816996709da2e2, 0x3ff01de9fe280ac8),
217    (0xbc8c11f5239bf536, 0x3ff01e9cbfe113ef),
218    (0x3c8e1d4eb5edc6b4, 0x3ff01f4f8958c1c6),
219    (0xbc9afb99946ee3f0, 0x3ff020025a8f6a35),
220    (0xbc98f06d8a148a32, 0x3ff020b533856324),
221    (0xbc82bf310fc54eb6, 0x3ff02168143b0281),
222    (0xbc9c95a035eb4176, 0x3ff0221afcb09e3e),
223    (0xbc9491793e46834c, 0x3ff022cdece68c4f),
224    (0xbc73e8d0d9c49090, 0x3ff02380e4dd22ad),
225    (0xbc9314aa16278aa4, 0x3ff02433e494b755),
226    (0x3c848daf888e9650, 0x3ff024e6ec0da046),
227    (0x3c856dc8046821f4, 0x3ff02599fb483385),
228    (0x3c945b42356b9d46, 0x3ff0264d1244c719),
229    (0xbc7082ef51b61d7e, 0x3ff027003103b10e),
230    (0x3c72106ed0920a34, 0x3ff027b357854772),
231    (0xbc9fd4cf26ea5d0e, 0x3ff0286685c9e059),
232    (0xbc909f8775e78084, 0x3ff02919bbd1d1d8),
233    (0x3c564cbba902ca28, 0x3ff029ccf99d720a),
234    (0x3c94383ef231d206, 0x3ff02a803f2d170d),
235    (0x3c94a47a505b3a46, 0x3ff02b338c811703),
236    (0x3c9e471202234680, 0x3ff02be6e199c811),
237];
238
239static EXPM1_DD1: [(u64, u64); 11] = [
240    (0x3c65555555555554, 0x3fc5555555555555),
241    (0x3c45555555555123, 0x3fa5555555555555),
242    (0x3c01111111118167, 0x3f81111111111111),
243    (0xbbef49f49e220cea, 0x3f56c16c16c16c17),
244    (0x3b6a019eff6f919c, 0x3f2a01a01a01a01a),
245    (0x3b39fcff48a75b41, 0x3efa01a01a01a01a),
246    (0xbb6c14f73758cd7f, 0x3ec71de3a556c734),
247    (0x3b3dfce97931018f, 0x3e927e4fb7789f5c),
248    (0x3afc513da9e4c9c5, 0x3e5ae64567f544e3),
249    (0x3acca00af84f2b60, 0x3e21eed8eff8d831),
250    (0x3a8f27ac6000898f, 0x3de6124613a86e8f),
251];
252
253static EXPM1_DD2: [(u64, u64); 7] = [
254    (0x3ff0000000000000, 0x0000000000000000),
255    (0x3fe0000000000000, 0x39c712f72ecec2cf),
256    (0x3fc5555555555555, 0x3c65555555554d07),
257    (0x3fa5555555555555, 0x3c455194d28275da),
258    (0x3f81111111111111, 0x3c012faa0e1c0f7b),
259    (0x3f56c16c16da6973, 0xbbf4ba45ab25d2a3),
260    (0x3f2a01a019eb7f31, 0xbbc9091d845ecd36),
261];
262
263#[inline]
264pub(crate) fn opoly_dd_generic<const N: usize>(
265    x: DoubleDouble,
266    poly: [(u64, u64); N],
267) -> DoubleDouble {
268    let zch = poly.last().unwrap();
269    let p0 = DoubleDouble::from_exact_add(f64::from_bits(zch.0), x.lo);
270    let ach = p0.hi;
271    let acl = f64::from_bits(zch.1) + p0.lo;
272    let mut ch = DoubleDouble::new(acl, ach);
273
274    for zch in poly.iter().rev().skip(1) {
275        ch = DoubleDouble::quick_mult_f64(ch, x.hi);
276        let z0 = DoubleDouble::from_bit_pair(*zch);
277        ch = DoubleDouble::add(z0, ch);
278    }
279
280    ch
281}
282
283#[cold]
284fn as_expm1_accurate(x: f64) -> f64 {
285    let mut ix;
286    if x.abs() < 0.25 {
287        const CL: [u64; 6] = [
288            0x3da93974a8ca5354,
289            0x3d6ae7f3e71e4908,
290            0x3d2ae7f357341648,
291            0x3ce952c7f96664cb,
292            0x3ca686f8ce633aae,
293            0x3c62f49b2fbfb5b6,
294        ];
295
296        let fl0 = f_fmla(x, f64::from_bits(CL[5]), f64::from_bits(CL[4]));
297        let fl1 = f_fmla(x, fl0, f64::from_bits(CL[3]));
298        let fl2 = f_fmla(x, fl1, f64::from_bits(CL[2]));
299        let fl3 = f_fmla(x, fl2, f64::from_bits(CL[1]));
300
301        let fl = x * f_fmla(x, fl3, f64::from_bits(CL[0]));
302        let mut f = opoly_dd_generic(DoubleDouble::new(fl, x), EXPM1_DD1);
303        f = DoubleDouble::quick_mult_f64(f, x);
304        f = DoubleDouble::quick_mult_f64(f, x);
305        f = DoubleDouble::quick_mult_f64(f, x);
306        let hx = 0.5 * x;
307        let dx2dd = DoubleDouble::from_exact_mult(x, hx);
308        f = DoubleDouble::add(dx2dd, f);
309        let v0 = DoubleDouble::from_exact_add(x, f.hi);
310        let v1 = DoubleDouble::from_exact_add(v0.lo, f.lo);
311        let v2 = DoubleDouble::from_exact_add(v0.hi, v1.hi);
312        let mut v3 = DoubleDouble::from_exact_add(v2.lo, v1.lo);
313        ix = v3.hi.to_bits();
314        if (ix & 0x000fffffffffffff) == 0 {
315            let v = v3.lo.to_bits();
316            let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64)
317                .wrapping_shl(1)
318                .wrapping_add(1) as i64;
319            ix = ix.wrapping_add(d as u64);
320            v3.hi = f64::from_bits(ix);
321        }
322        v3.hi + v2.hi
323    } else {
324        const S: f64 = f64::from_bits(0x40b71547652b82fe);
325        let t = (x * S).cpu_round_ties_even();
326        let jt: i64 = t as i64;
327        let i0 = (jt >> 6) & 0x3f;
328        let i1 = jt & 0x3f;
329        let ie = jt >> 12;
330        let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i0 as usize]);
331        let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
332
333        let bt = DoubleDouble::quick_mult(t0, t1);
334
335        const L2H: f64 = f64::from_bits(0x3f262e42ff000000);
336        const L2L: f64 = f64::from_bits(0x3d0718432a1b0e26);
337        const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3);
338
339        let dx = f_fmla(-L2H, t, x);
340        let dxl = L2L * t;
341        let dxll = f_fmla(L2LL, t, dd_fmla(L2L, t, -dxl));
342        let dxh = dx + dxl;
343        let dxl = (dx - dxh) + dxl + dxll;
344        let mut f = poly_dekker_generic(DoubleDouble::new(dxl, dxh), EXPM1_DD2);
345        f = DoubleDouble::quick_mult(DoubleDouble::new(dxl, dxh), f);
346        f = DoubleDouble::quick_mult(f, bt);
347        f = DoubleDouble::add(bt, f);
348        let off: u64 = (2048i64 + 1023i64).wrapping_sub(ie).wrapping_shl(52) as u64;
349        let e: f64;
350        if ie < 53 {
351            let fhz = DoubleDouble::from_exact_add(f64::from_bits(off), f.hi);
352            f.hi = fhz.hi;
353            e = fhz.lo;
354        } else if ie < 104 {
355            let fhz = DoubleDouble::from_exact_add(f.hi, f64::from_bits(off));
356            f.hi = fhz.hi;
357            e = fhz.lo;
358        } else {
359            e = 0.;
360        }
361        f.lo += e;
362        let dst = DoubleDouble::from_exact_add(f.hi, f.lo);
363        fast_ldexp(dst.hi, ie as i32)
364    }
365}
366
367#[inline(always)]
368fn expm1_gen<B: ExpfBackend>(x: f64, backend: B) -> f64 {
369    let ix = x.to_bits();
370    let aix: u64 = ix & 0x7fff_ffff_ffff_ffff;
371    if aix < 0x3fd0000000000000u64 {
372        if aix < 0x3ca0000000000000u64 {
373            if aix == 0 {
374                return x;
375            }
376            return backend.dyad_fma(f64::from_bits(0x3c90000000000000), x.abs(), x);
377        }
378        let sx = f64::from_bits(0x4060000000000000) * x;
379        let fx = backend.round_ties_even(sx);
380        let z = sx - fx;
381        let z2 = z * z;
382        let i: i64 = unsafe {
383            fx.to_int_unchecked::<i64>() // fx is already integer, this is just a conversion
384        };
385        let t = DoubleDouble::from_bit_pair(TZ[i.wrapping_add(32) as usize]);
386        const C: [u64; 6] = [
387            0x3f80000000000000,
388            0x3f00000000000000,
389            0x3e755555555551ad,
390            0x3de555555555599c,
391            0x3d511111ad1ad69d,
392            0x3cb6c16c168b1fb5,
393        ];
394        let fh = z * f64::from_bits(C[0]);
395
396        let fl0 = backend.fma(z, f64::from_bits(C[5]), f64::from_bits(C[4]));
397        let fl1 = backend.fma(z, f64::from_bits(C[2]), f64::from_bits(C[1]));
398
399        let fw0 = backend.fma(z, fl0, f64::from_bits(C[3]));
400
401        let fl = z2 * backend.fma(z2, fw0, fl1);
402        let mut f = DoubleDouble::new(fl, fh);
403        let e0 = f64::from_bits(0x3bea000000000000);
404        let eps = z2 * e0 + f64::from_bits(0x3970000000000000);
405        let mut r = DoubleDouble::from_exact_add(t.hi, f.hi);
406        r.lo += t.lo + f.lo;
407        f = backend.quick_mult(t, f);
408        f = DoubleDouble::add(r, f);
409        let ub = f.hi + (f.lo + eps);
410        let lb = f.hi + (f.lo - eps);
411        if ub != lb {
412            return as_expm1_accurate(x);
413        }
414        lb
415    } else {
416        if aix >= 0x40862e42fefa39f0u64 {
417            if aix > 0x7ff0000000000000u64 {
418                return x + x;
419            } // nan
420            if aix == 0x7ff0000000000000u64 {
421                return if (ix >> 63) != 0 { -1.0 } else { x };
422            }
423            if (ix >> 63) == 0 {
424                const Z: f64 = f64::from_bits(0x7fe0000000000000);
425                return black_box(Z) * black_box(Z);
426            }
427        }
428        if ix >= 0xc0425e4f7b2737fau64 {
429            if ix >= 0xc042b708872320e2u64 {
430                return black_box(-1.0) + black_box(f64::from_bits(0x3c80000000000000));
431            }
432            return (f64::from_bits(0x40425e4f7b2737fa) + x + f64::from_bits(0x3cc8486612173c69))
433                * f64::from_bits(0x3c971547652b82fe)
434                - f64::from_bits(0x3fefffffffffffff);
435        }
436
437        const S: f64 = f64::from_bits(0x40b71547652b82fe);
438        let t = backend.round_ties_even(x * S);
439        let jt: i64 = unsafe {
440            t.to_int_unchecked::<i64>() // t is already integer, this is just a conversion
441        };
442        let i0 = (jt >> 6) & 0x3f;
443        let i1 = jt & 0x3f;
444        let ie = jt >> 12;
445        let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i0 as usize]);
446        let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
447
448        let bt = backend.quick_mult(t0, t1);
449
450        const L2H: f64 = f64::from_bits(0x3f262e42ff000000);
451        const L2L: f64 = f64::from_bits(0x3d0718432a1b0e26);
452        let dx = backend.dd_fma(L2L, t, backend.fma(-L2H, t, x));
453        let dx2 = dx * dx;
454
455        const CH: [u64; 4] = [
456            0x3ff0000000000000,
457            0x3fe0000000000000,
458            0x3fc55555557e54ff,
459            0x3fa55555553a12f4,
460        ];
461
462        let p0 = backend.fma(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
463        let p1 = backend.fma(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
464
465        let p = backend.fma(dx2, p0, p1);
466        let mut fh = bt.hi;
467        let tx = bt.hi * dx;
468        let mut fl = backend.fma(tx, p, bt.lo);
469        let eps = f64::from_bits(0x3c0833beace2b6fe) * bt.hi;
470
471        let off: u64 = ((2048i64 + 1023i64).wrapping_sub(ie) as u64).wrapping_shl(52);
472        let e: f64;
473        if ie < 53 {
474            let flz = DoubleDouble::from_exact_add(f64::from_bits(off), fh);
475            e = flz.lo;
476            fh = flz.hi;
477        } else if ie < 75 {
478            let flz = DoubleDouble::from_exact_add(fh, f64::from_bits(off));
479            e = flz.lo;
480            fh = flz.hi;
481        } else {
482            e = 0.;
483        }
484        fl += e;
485        let ub = fh + (fl + eps);
486        let lb = fh + (fl - eps);
487        if ub != lb {
488            return as_expm1_accurate(x);
489        }
490        fast_ldexp(lb, ie as i32)
491    }
492}
493
494#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
495#[target_feature(enable = "avx", enable = "fma")]
496unsafe fn expm1_fma_impl(x: f64) -> f64 {
497    use crate::exponents::expf::FmaBackend;
498    expm1_gen(x, FmaBackend {})
499}
500
501/// Computes e^x - 1
502///
503/// Max found ULP 0.5
504pub fn f_expm1(x: f64) -> f64 {
505    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
506    {
507        expm1_gen(x, GenericExpfBackend {})
508    }
509    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
510    {
511        use std::sync::OnceLock;
512        static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
513        let q = EXECUTOR.get_or_init(|| {
514            if std::arch::is_x86_feature_detected!("avx")
515                && std::arch::is_x86_feature_detected!("fma")
516            {
517                expm1_fma_impl
518            } else {
519                fn def_expm1(x: f64) -> f64 {
520                    expm1_gen(x, GenericExpfBackend {})
521                }
522                def_expm1
523            }
524        });
525        unsafe { q(x) }
526    }
527}
528
529#[cfg(test)]
530mod tests {
531    use super::*;
532
533    #[test]
534    fn test_expm1() {
535        assert_eq!(f_expm1(0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000028321017343872864),
536                   0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000028321017343872864 );
537        assert_eq!(f_expm1(36.52188110363568), 7265264535836525.);
538        assert_eq!(f_expm1(2.), 6.38905609893065);
539        assert_eq!(f_expm1(0.4321321), 0.5405386068701713);
540        assert_eq!(f_expm1(-0.0000004321321), -4.321320066309375e-7);
541    }
542}