pxfm/exponents/expm1f.rs
1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1. Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2. Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3. Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::exponents::expf::{ExpfBackend, GenericExpfBackend};
30
31#[inline(always)]
32fn expm1f_gen<B: ExpfBackend>(x: f32, backend: B) -> f32 {
33 let x_u: u32 = x.to_bits();
34 let x_abs = x_u & 0x7fff_ffffu32;
35
36 // When |x| > 25*log(2), or nan
37 if x_abs >= 0x418a_a123u32 {
38 // x < log(2^-25)
39 if x.is_sign_negative() {
40 // exp(-Inf) = 0
41 if x.is_infinite() {
42 return -1.0;
43 }
44 // exp(nan) = nan
45 if x.is_nan() {
46 return x;
47 }
48 return -1.0;
49 } else {
50 // x >= 89 or nan
51 if x_u >= 0x42b2_0000 {
52 return x + f32::INFINITY;
53 }
54 }
55 }
56
57 // |x| < 2^-4
58 if x_abs < 0x3d80_0000u32 {
59 // |x| < 2^-25
60 if x_abs < 0x3300_0000u32 {
61 // x = -0.0f
62 if x_u == 0x8000_0000u32 {
63 return x;
64 }
65 // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
66 // is:
67 // |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
68 // = |x|
69 // < 2^-25
70 // < epsilon(1)/2.
71 // To simplify the rounding decision and make it more efficient, we use
72 // fma(x, x, x) ~ x + x^2 instead.
73 // Note: to use the formula x + x^2 to decide the correct rounding, we
74 // do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
75 // 2^-76. For targets without FMA instructions, we simply use double for
76 // intermediate results as it is more efficient than using an emulated
77 // version of FMA.
78 #[cfg(any(
79 all(
80 any(target_arch = "x86", target_arch = "x86_64"),
81 target_feature = "fma"
82 ),
83 target_arch = "aarch64"
84 ))]
85 {
86 return backend.fmaf(x, x, x);
87 }
88 #[cfg(not(any(
89 all(
90 any(target_arch = "x86", target_arch = "x86_64"),
91 target_feature = "fma"
92 ),
93 target_arch = "aarch64"
94 )))]
95 {
96 let xd = x as f64;
97 return backend.fma(xd, xd, xd) as f32;
98 }
99 }
100
101 const C: [u64; 7] = [
102 0x3fe0000000000000,
103 0x3fc55555555557dd,
104 0x3fa55555555552fa,
105 0x3f8111110fcd58b7,
106 0x3f56c16c1717660b,
107 0x3f2a0241f0006d62,
108 0x3efa01e3f8d3c060,
109 ];
110
111 // 2^-25 <= |x| < 2^-4
112 let xd = x as f64;
113 let xsq = xd * xd;
114 // Degree-8 minimax polynomial generated by Sollya with:
115 // > display = hexadecimal;
116 // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]);
117
118 return backend.fma(
119 backend.polyeval7(
120 xd,
121 f64::from_bits(C[0]),
122 f64::from_bits(C[1]),
123 f64::from_bits(C[2]),
124 f64::from_bits(C[3]),
125 f64::from_bits(C[4]),
126 f64::from_bits(C[5]),
127 f64::from_bits(C[6]),
128 ),
129 xsq,
130 xd,
131 ) as f32;
132 }
133
134 // For -104 < x < 89, to compute expm1(x), we perform the following range
135 // reduction: find hi, mid, lo such that:
136 // x = hi + mid + lo, in which
137 // hi is an integer,
138 // mid * 2^7 is an integer
139 // -2^(-8) <= lo < 2^-8.
140 // In particular,
141 // hi + mid = round(x * 2^7) * 2^(-7).
142 // Then,
143 // expm1(x) = expm1(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo) - 1.
144 // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2
145 // respectively. exp(lo) is computed using a degree-4 minimax polynomial
146 // generated by Sollya.
147
148 // x_hi = (hi + mid) * 2^7 = round(x * 2^7).
149 let kf = backend.roundf(x * 128.);
150 // Subtract (hi + mid) from x to get lo.
151 let xd = backend.fmaf(kf, -0.0078125 /* - 1/128 */, x) as f64;
152 let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; // it's already not indeterminate.
153 x_hi += 104 << 7;
154 // hi = x_hi >> 7
155 let exp_hi = f64::from_bits(crate::exponents::expf::EXP_M1[(x_hi >> 7) as usize]);
156 // mid * 2^7 = x_hi & 0x0000'007fU;
157 let exp_mid = f64::from_bits(crate::exponents::expf::EXP_M2[(x_hi & 0x7f) as usize]);
158 // Degree-4 minimax polynomial generated by Sollya with the following
159 // commands:
160 // d = [-2^-8, 2^-8];
161 // f_exp = expm1(x)/x;
162 // Q = fpminimax(f_exp, 3, [|D...|], [-2^-8, 2^-8]);
163 let p = backend.polyeval5(
164 xd,
165 1.,
166 f64::from_bits(0x3feffffffffff777),
167 f64::from_bits(0x3fe000000000071c),
168 f64::from_bits(0x3fc555566668e5e7),
169 f64::from_bits(0x3fa55555555ef243),
170 );
171 backend.fma(p * exp_hi, exp_mid, -1.) as f32
172}
173
174#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
175#[target_feature(enable = "avx", enable = "fma")]
176unsafe fn expm1f_fma_impl(x: f32) -> f32 {
177 use crate::exponents::expf::FmaBackend;
178 expm1f_gen(x, FmaBackend {})
179}
180
181/// Computes e^x - 1
182///
183/// Max ULP 0.5
184#[inline]
185pub fn f_expm1f(x: f32) -> f32 {
186 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
187 {
188 expm1f_gen(x, GenericExpfBackend {})
189 }
190 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
191 {
192 use std::sync::OnceLock;
193 static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
194 let q = EXECUTOR.get_or_init(|| {
195 if std::arch::is_x86_feature_detected!("avx")
196 && std::arch::is_x86_feature_detected!("fma")
197 {
198 expm1f_fma_impl
199 } else {
200 fn def_expm1f(x: f32) -> f32 {
201 expm1f_gen(x, GenericExpfBackend {})
202 }
203 def_expm1f
204 }
205 });
206 unsafe { q(x) }
207 }
208}
209
210#[cfg(test)]
211mod tests {
212 use crate::f_expm1f;
213
214 #[test]
215 fn test_expm1f() {
216 assert_eq!(f_expm1f(-3.0923562e-5), -3.0923085e-5);
217 assert_eq!(f_expm1f(2.213121), 8.144211);
218 assert_eq!(f_expm1f(-3.213121), -0.9597691);
219 assert_eq!(f_expm1f(-2.35099e-38), -2.35099e-38);
220 assert_eq!(
221 f_expm1f(0.00000000000000000000000000000000000004355616),
222 0.00000000000000000000000000000000000004355616
223 );
224 assert_eq!(f_expm1f(25.12315), 81441420000.0);
225 assert_eq!(f_expm1f(12.986543), 436498.6);
226 assert_eq!(f_expm1f(-12.986543), -0.99999774);
227 assert_eq!(f_expm1f(-25.12315), -1.0);
228 assert_eq!(f_expm1f(f32::INFINITY), f32::INFINITY);
229 assert_eq!(f_expm1f(f32::NEG_INFINITY), -1.);
230 assert!(f_expm1f(f32::NAN).is_nan());
231 }
232}