1use crate::common::{f_fmla, f_fmlaf, set_exponent_f32};
30use crate::logs::logf::{GenLogfBackend, LogfBackend};
31use crate::polyeval::f_polyeval3;
32
33pub(crate) static LOG2_R: [u64; 128] = [
34 0x0000000000000000,
35 0x3f872c7ba20f7327,
36 0x3f9743ee861f3556,
37 0x3fa184b8e4c56af8,
38 0x3fa77394c9d958d5,
39 0x3fad6ebd1f1febfe,
40 0x3fb1bb32a600549d,
41 0x3fb4c560fe68af88,
42 0x3fb7d60496cfbb4c,
43 0x3fb960caf9abb7ca,
44 0x3fbc7b528b70f1c5,
45 0x3fbf9c95dc1d1165,
46 0x3fc097e38ce60649,
47 0x3fc22dadc2ab3497,
48 0x3fc3c6fb650cde51,
49 0x3fc494f863b8df35,
50 0x3fc633a8bf437ce1,
51 0x3fc7046031c79f85,
52 0x3fc8a8980abfbd32,
53 0x3fc97c1cb13c7ec1,
54 0x3fcb2602497d5346,
55 0x3fcbfc67a7fff4cc,
56 0x3fcdac22d3e441d3,
57 0x3fce857d3d361368,
58 0x3fd01d9bbcfa61d4,
59 0x3fd08bce0d95fa38,
60 0x3fd169c05363f158,
61 0x3fd1d982c9d52708,
62 0x3fd249cd2b13cd6c,
63 0x3fd32bfee370ee68,
64 0x3fd39de8e1559f6f,
65 0x3fd4106017c3eca3,
66 0x3fd4f6fbb2cec598,
67 0x3fd56b22e6b578e5,
68 0x3fd5dfdcf1eeae0e,
69 0x3fd6552b49986277,
70 0x3fd6cb0f6865c8ea,
71 0x3fd7b89f02cf2aad,
72 0x3fd8304d90c11fd3,
73 0x3fd8a8980abfbd32,
74 0x3fd921800924dd3b,
75 0x3fd99b072a96c6b2,
76 0x3fda8ff971810a5e,
77 0x3fdb0b67f4f46810,
78 0x3fdb877c57b1b070,
79 0x3fdc043859e2fdb3,
80 0x3fdc819dc2d45fe4,
81 0x3fdcffae611ad12b,
82 0x3fdd7e6c0abc3579,
83 0x3fddfdd89d586e2b,
84 0x3fde7df5fe538ab3,
85 0x3fdefec61b011f85,
86 0x3fdf804ae8d0cd02,
87 0x3fe0014332be0033,
88 0x3fe042bd4b9a7c99,
89 0x3fe08494c66b8ef0,
90 0x3fe0c6caaf0c5597,
91 0x3fe1096015dee4da,
92 0x3fe14c560fe68af9,
93 0x3fe18fadb6e2d3c2,
94 0x3fe1d368296b5255,
95 0x3fe217868b0c37e8,
96 0x3fe25c0a0463beb0,
97 0x3fe2a0f3c340705c,
98 0x3fe2e644fac04fd8,
99 0x3fe2e644fac04fd8,
100 0x3fe32bfee370ee68,
101 0x3fe37222bb70747c,
102 0x3fe3b8b1c68fa6ed,
103 0x3fe3ffad4e74f1d6,
104 0x3fe44716a2c08262,
105 0x3fe44716a2c08262,
106 0x3fe48eef19317991,
107 0x3fe4d7380dcc422d,
108 0x3fe51ff2e30214bc,
109 0x3fe5692101d9b4a6,
110 0x3fe5b2c3da19723b,
111 0x3fe5b2c3da19723b,
112 0x3fe5fcdce2727ddb,
113 0x3fe6476d98ad990a,
114 0x3fe6927781d932a8,
115 0x3fe6927781d932a8,
116 0x3fe6ddfc2a78fc63,
117 0x3fe729fd26b707c8,
118 0x3fe7767c12967a45,
119 0x3fe7767c12967a45,
120 0x3fe7c37a9227e7fb,
121 0x3fe810fa51bf65fd,
122 0x3fe810fa51bf65fd,
123 0x3fe85efd062c656d,
124 0x3fe8ad846cf369a4,
125 0x3fe8ad846cf369a4,
126 0x3fe8fc924c89ac84,
127 0x3fe94c287492c4db,
128 0x3fe94c287492c4db,
129 0x3fe99c48be2063c8,
130 0x3fe9ecf50bf43f13,
131 0x3fe9ecf50bf43f13,
132 0x3fea3e2f4ac43f60,
133 0x3fea8ff971810a5e,
134 0x3fea8ff971810a5e,
135 0x3feae255819f022d,
136 0x3feb35458761d479,
137 0x3feb35458761d479,
138 0x3feb88cb9a2ab521,
139 0x3feb88cb9a2ab521,
140 0x3febdce9dcc96187,
141 0x3fec31a27dd00b4a,
142 0x3fec31a27dd00b4a,
143 0x3fec86f7b7ea4a89,
144 0x3fec86f7b7ea4a89,
145 0x3fecdcebd2373995,
146 0x3fed338120a6dd9d,
147 0x3fed338120a6dd9d,
148 0x3fed8aba045b01c8,
149 0x3fed8aba045b01c8,
150 0x3fede298ec0bac0d,
151 0x3fede298ec0bac0d,
152 0x3fee3b20546f554a,
153 0x3fee3b20546f554a,
154 0x3fee9452c8a71028,
155 0x3fee9452c8a71028,
156 0x3feeee32e2aeccbf,
157 0x3feeee32e2aeccbf,
158 0x3fef48c34bd1e96f,
159 0x3fef48c34bd1e96f,
160 0x3fefa406bd2443df,
161 0x3ff0000000000000,
162];
163
164#[inline(always)]
165fn log2f_gen<B: LogfBackend>(x: f32, backend: B) -> f32 {
166 let mut x_u = x.to_bits();
167
168 const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
169
170 let mut m = -(E_BIAS as i32);
171 if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() {
172 if x == 0.0 {
173 return f32::NEG_INFINITY;
174 }
175 if x_u == 0x80000000u32 {
176 return f32::NEG_INFINITY;
177 }
178 if x.is_sign_negative() && !x.is_nan() {
179 return f32::NAN + x;
180 }
181 if x.is_nan() || x.is_infinite() {
183 return x + x;
184 }
185 x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
187 m -= 23;
188 }
189
190 m = m.wrapping_add(x_u.wrapping_shr(23) as i32);
191 let mant = x_u & 0x007F_FFFF;
192 let index = mant.wrapping_shr(16);
193
194 x_u = set_exponent_f32(x_u, 0x7F);
195
196 let u = f32::from_bits(x_u);
197
198 let v = if B::HAS_FMA {
199 use crate::logs::logf::LOG_REDUCTION_F32;
200 backend.fmaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64 } else {
202 use crate::logs::LOG_RANGE_REDUCTION;
203 backend.fma(
204 u as f64,
205 f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
206 -1.0,
207 ) };
209 let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]);
213
214 const COEFFS: [u64; 5] = [
215 0x3ff71547652b8133,
216 0xbfe71547652d1e33,
217 0x3fdec70a098473de,
218 0xbfd7154c5ccdf121,
219 0x3fd2514fd90a130a,
220 ];
221 let v2 = v * v; let c0 = backend.fma(v, f64::from_bits(COEFFS[0]), extra_factor);
223 let c1 = backend.fma(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1]));
224 let c2 = backend.fma(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3]));
225
226 let r = backend.polyeval3(v2, c0, c1, c2);
227 r as f32
228}
229
230#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
231#[target_feature(enable = "avx", enable = "fma")]
232unsafe fn log2f_fma_impl(x: f32) -> f32 {
233 use crate::logs::logf::FmaLogfBackend;
234 log2f_gen(x, FmaLogfBackend {})
235}
236
237pub fn f_log2f(x: f32) -> f32 {
241 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
242 {
243 log2f_gen(x, GenLogfBackend {})
244 }
245 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
246 {
247 use std::sync::OnceLock;
248 static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
249 let q = EXECUTOR.get_or_init(|| {
250 if std::arch::is_x86_feature_detected!("avx")
251 && std::arch::is_x86_feature_detected!("fma")
252 {
253 log2f_fma_impl
254 } else {
255 fn def_log2f(x: f32) -> f32 {
256 log2f_gen(x, GenLogfBackend {})
257 }
258 def_log2f
259 }
260 });
261 unsafe { q(x) }
262 }
263}
264
265#[inline(always)]
269#[allow(dead_code)]
270pub(crate) fn f_log2fx(x: f32) -> f64 {
271 let mut x_u = x.to_bits();
272
273 const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
274
275 let mut m = -(E_BIAS as i32);
276 if x_u == 0x3f80_0000u32 {
277 return 0.0;
278 }
279
280 if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() {
281 if x == 0.0 {
282 return f64::NEG_INFINITY;
283 }
284 if x_u == 0x80000000u32 {
285 return f64::NEG_INFINITY;
286 }
287 if x.is_sign_negative() && !x.is_nan() {
288 return f64::NAN + x as f64;
289 }
290 if x.is_nan() || x.is_infinite() {
292 return (x + x) as f64;
293 }
294 x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
296 m -= 23;
297 }
298
299 m = m.wrapping_add(x_u.wrapping_shr(23) as i32);
300 let mant = x_u & 0x007F_FFFF;
301 let index = mant.wrapping_shr(16);
302
303 x_u = set_exponent_f32(x_u, 0x7F);
304
305 let v;
306 let u = f32::from_bits(x_u);
307
308 #[cfg(any(
309 all(
310 any(target_arch = "x86", target_arch = "x86_64"),
311 target_feature = "fma"
312 ),
313 target_arch = "aarch64"
314 ))]
315 {
316 use crate::logs::logf::LOG_REDUCTION_F32;
317 v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; }
319 #[cfg(not(any(
320 all(
321 any(target_arch = "x86", target_arch = "x86_64"),
322 target_feature = "fma"
323 ),
324 target_arch = "aarch64"
325 )))]
326 {
327 use crate::logs::log2::LOG_RANGE_REDUCTION;
328 v = f_fmla(
329 u as f64,
330 f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
331 -1.0,
332 ); }
334 let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]);
338
339 const COEFFS: [u64; 5] = [
340 0x3ff71547652b8133,
341 0xbfe71547652d1e33,
342 0x3fdec70a098473de,
343 0xbfd7154c5ccdf121,
344 0x3fd2514fd90a130a,
345 ];
346 let v2 = v * v; let c0 = f_fmla(v, f64::from_bits(COEFFS[0]), extra_factor);
348 let c1 = f_fmla(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1]));
349 let c2 = f_fmla(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3]));
350
351 f_polyeval3(v2, c0, c1, c2)
352}
353
354#[inline(always)]
356#[allow(dead_code)]
357pub(crate) fn dirty_log2f(d: f32) -> f32 {
358 let mut ix = d.to_bits();
359 ix = ix.wrapping_add(0x3f800000 - 0x3f3504f3);
361 let n = (ix >> 23) as i32 - 0x7f;
362 ix = (ix & 0x007fffff).wrapping_add(0x3f3504f3);
363 let a = f32::from_bits(ix);
364
365 let x = (a - 1.) / (a + 1.);
366
367 let x2 = x * x;
368 let mut u = 0.4121985850084821691e+0;
369 u = f_fmlaf(u, x2, 0.5770780163490337802e+0);
370 u = f_fmlaf(u, x2, 0.9617966939259845749e+0);
371 f_fmlaf(x2 * x, u, f_fmlaf(x, 0.2885390081777926802e+1, n as f32))
372}
373
374#[cfg(test)]
375mod tests {
376 use super::*;
377
378 #[test]
379 fn test_log2f() {
380 assert!((f_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5);
381 assert!((f_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5);
382 assert_eq!(f_log2f(0.), f32::NEG_INFINITY);
383 assert_eq!(f_log2f(1.0), 0.0);
384 assert!(f_log2f(-1.).is_nan());
385 assert!(f_log2f(f32::NAN).is_nan());
386 assert!(f_log2f(f32::NEG_INFINITY).is_nan());
387 assert_eq!(f_log2f(f32::INFINITY), f32::INFINITY);
388 }
389
390 #[test]
391 fn test_dirty_log2f() {
392 assert!((dirty_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5);
393 assert!((dirty_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5);
394 }
395}