1#![allow(clippy::too_many_arguments)]
30use crate::common::{dd_fmla, dyad_fmla, f_fmla, f_fmlaf, fmlaf, pow2if, rintfk};
31use crate::double_double::DoubleDouble;
32use crate::rounding::{CpuFloor, CpuRound};
33
34const L2U_F: f32 = 0.693_145_751_953_125;
35const L2L_F: f32 = 1.428_606_765_330_187_045_e-6;
36const R_LN2_F: f32 = std::f32::consts::LOG2_E;
37
38#[inline]
41pub const fn expf(d: f32) -> f32 {
42 const EXP_POLY_1_S: f32 = 2f32;
43 const EXP_POLY_2_S: f32 = 0.16666707f32;
44 const EXP_POLY_3_S: f32 = -0.002775669f32;
45 let qf = rintfk(d * R_LN2_F);
46 let q = qf as i32;
47 let r = fmlaf(qf, -L2U_F, d);
48 let r = fmlaf(qf, -L2L_F, r);
49
50 let f = r * r;
51 let mut u = EXP_POLY_3_S;
53 u = fmlaf(u, f, EXP_POLY_2_S);
54 u = fmlaf(u, f, EXP_POLY_1_S);
55 let u = 1f32 + 2f32 * r / (u - r);
56 let i2 = pow2if(q);
57 u * i2
58 }
65
66pub(crate) static EXP_M1: [u64; 207] = [
73 0x368f1e6b68529e33,
74 0x36a525be4e4e601d,
75 0x36bcbe0a45f75eb1,
76 0x36d3884e838aea68,
77 0x36ea8c1f14e2af5d,
78 0x37020a717e64a9bd,
79 0x3718851d84118908,
80 0x3730a9bdfb02d240,
81 0x3746a5bea046b42e,
82 0x375ec7f3b269efa8,
83 0x3774eafb87eab0f2,
84 0x378c6e2d05bbc000,
85 0x37a35208867c2683,
86 0x37ba425b317eeacd,
87 0x37d1d8508fa8246a,
88 0x37e840fbc08fdc8a,
89 0x38007b7112bc1ffe,
90 0x381666d0dad2961d,
91 0x382e726c3f64d0fe,
92 0x3844b0dc07cabf98,
93 0x385c1f2daf3b6a46,
94 0x38731c5957a47de2,
95 0x3889f96445648b9f,
96 0x38a1a6baeadb4fd1,
97 0x38b7fd974d372e45,
98 0x38d04da4d1452919,
99 0x38e62891f06b3450,
100 0x38fe1dd273aa8a4a,
101 0x3914775e0840bfdd,
102 0x392bd109d9d94bda,
103 0x3942e73f53fba844,
104 0x3959b138170d6bfe,
105 0x397175af0cf60ec5,
106 0x3987baee1bffa80b,
107 0x39a02057d1245ceb,
108 0x39b5eafffb34ba31,
109 0x39cdca23bae16424,
110 0x39e43e7fc88b8056,
111 0x39fb83bf23a9a9eb,
112 0x3a12b2b8dd05b318,
113 0x3a2969d47321e4cc,
114 0x3a41452b7723aed2,
115 0x3a5778fe2497184c,
116 0x3a6fe7116182e9cc,
117 0x3a85ae191a99585a,
118 0x3a9d775d87da854d,
119 0x3ab4063f8cc8bb98,
120 0x3acb374b315f87c1,
121 0x3ae27ec458c65e3c,
122 0x3af923372c67a074,
123 0x3b11152eaeb73c08,
124 0x3b2737c5645114b5,
125 0x3b3f8e6c24b5592e,
126 0x3b5571db733a9d61,
127 0x3b6d257d547e083f,
128 0x3b83ce9b9de78f85,
129 0x3b9aebabae3a41b5,
130 0x3bb24b6031b49bda,
131 0x3bc8dd5e1bb09d7e,
132 0x3be0e5b73d1ff53d,
133 0x3bf6f741de1748ec,
134 0x3c0f36bd37f42f3e,
135 0x3c2536452ee2f75c,
136 0x3c3cd480a1b74820,
137 0x3c539792499b1a24,
138 0x3c6aa0de4bf35b38,
139 0x3c82188ad6ae3303,
140 0x3c9898471fca6055,
141 0x3cb0b6c3afdde064,
142 0x3cc6b7719a59f0e0,
143 0x3cdee001eed62aa0,
144 0x3cf4fb547c775da8,
145 0x3d0c8464f7616468,
146 0x3d236121e24d3bba,
147 0x3d3a56e0c2ac7f75,
148 0x3d51e642baeb84a0,
149 0x3d6853f01d6d53ba,
150 0x3d80885298767e9a,
151 0x3d967852a7007e42,
152 0x3dae8a37a45fc32e,
153 0x3dc4c1078fe9228a,
154 0x3ddc3527e433fab1,
155 0x3df32b48bf117da2,
156 0x3e0a0db0d0ddb3ec,
157 0x3e21b48655f37267,
158 0x3e381056ff2c5772,
159 0x3e505a628c699fa1,
160 0x3e6639e3175a689d,
161 0x3e7e355bbaee85cb,
162 0x3e94875ca227ec38,
163 0x3eabe6c6fdb01612,
164 0x3ec2f6053b981d98,
165 0x3ed9c54c3b43bc8b,
166 0x3ef18354238f6764,
167 0x3f07cd79b5647c9b,
168 0x3f202cf22526545a,
169 0x3f35fc21041027ad,
170 0x3f4de16b9c24a98f,
171 0x3f644e51f113d4d6,
172 0x3f7b993fe00d5376,
173 0x3f92c155b8213cf4,
174 0x3fa97db0ccceb0af,
175 0x3fc152aaa3bf81cc,
176 0x3fd78b56362cef38,
177 0x3ff0000000000000,
178 0x4005bf0a8b145769,
179 0x401d8e64b8d4ddae,
180 0x403415e5bf6fb106,
181 0x404b4c902e273a58,
182 0x40628d389970338f,
183 0x407936dc5690c08f,
184 0x409122885aaeddaa,
185 0x40a749ea7d470c6e,
186 0x40bfa7157c470f82,
187 0x40d5829dcf950560,
188 0x40ed3c4488ee4f7f,
189 0x4103de1654d37c9a,
190 0x411b00b5916ac955,
191 0x413259ac48bf05d7,
192 0x4148f0ccafad2a87,
193 0x4160f2ebd0a80020,
194 0x417709348c0ea4f9,
195 0x418f4f22091940bd,
196 0x41a546d8f9ed26e1,
197 0x41bceb088b68e804,
198 0x41d3a6e1fd9eecfd,
199 0x41eab5adb9c43600,
200 0x420226af33b1fdc1,
201 0x4218ab7fb5475fb7,
202 0x4230c3d3920962c9,
203 0x4246c932696a6b5d,
204 0x425ef822f7f6731d,
205 0x42750bba3796379a,
206 0x428c9aae4631c056,
207 0x42a370470aec28ed,
208 0x42ba6b765d8cdf6d,
209 0x42d1f43fcc4b662c,
210 0x42e866f34a725782,
211 0x4300953e2f3a1ef7,
212 0x431689e221bc8d5b,
213 0x432ea215a1d20d76,
214 0x4344d13fbb1a001a,
215 0x435c4b334617cc67,
216 0x43733a43d282a519,
217 0x438a220d397972eb,
218 0x43a1c25c88df6862,
219 0x43b8232558201159,
220 0x43d0672a3c9eb871,
221 0x43e64b41c6d37832,
222 0x43fe4cf766fe49be,
223 0x44149767bc0483e3,
224 0x442bfc951eb8bb76,
225 0x444304d6aeca254b,
226 0x4459d97010884251,
227 0x44719103e4080b45,
228 0x4487e013cd114461,
229 0x44a03996528e074c,
230 0x44b60d4f6fdac731,
231 0x44cdf8c5af17ba3b,
232 0x44e45e3076d61699,
233 0x44fbaed16a6e0da7,
234 0x4512cffdfebde1a1,
235 0x4529919cabefcb69,
236 0x454160345c9953e3,
237 0x45579dbc9dc53c66,
238 0x45700c810d464097,
239 0x4585d009394c5c27,
240 0x459da57de8f107a8,
241 0x45b425982cf597cd,
242 0x45cb61e5ca3a5e31,
243 0x45e29bb825dfcf87,
244 0x45f94a90db0d6fe2,
245 0x46112fec759586fd,
246 0x46275c1dc469e3af,
247 0x463fbfd219c43b04,
248 0x4655936d44e1a146,
249 0x466d531d8a7ee79c,
250 0x4683ed9d24a2d51b,
251 0x469b15cfe5b6e17b,
252 0x46b268038c2c0e00,
253 0x46c9044a73545d48,
254 0x46e1002ab6218b38,
255 0x46f71b3540cbf921,
256 0x470f6799ea9c414a,
257 0x47255779b984f3eb,
258 0x473d01a210c44aa4,
259 0x4753b63da8e91210,
260 0x476aca8d6b0116b8,
261 0x478234de9e0c74e9,
262 0x4798bec7503ca477,
263 0x47b0d0eda9796b90,
264 0x47c6db0118477245,
265 0x47df1056dc7bf22d,
266 0x47f51c2cc3433801,
267 0x480cb108ffbec164,
268 0x48237f780991b584,
269 0x483a801c0ea8ac4d,
270 0x48520247cc4c46c1,
271 0x48687a0553328015,
272 0x4880a233dee4f9bb,
273 0x48969b7f55b808ba,
274 0x48aeba064644060a,
275 0x48c4e184933d9364,
276 0x48dc614fe2531841,
277 0x48f3494a9b171bf5,
278 0x490a36798b9d969b,
279 0x4921d03d8c0c04af,
280];
281
282pub(crate) static EXP_M2: [u64; 128] = [
287 0x3ff0000000000000,
288 0x3ff0202015600446,
289 0x3ff04080ab55de39,
290 0x3ff06122436410dd,
291 0x3ff08205601127ed,
292 0x3ff0a32a84e9c1f6,
293 0x3ff0c49236829e8c,
294 0x3ff0e63cfa7ab09d,
295 0x3ff1082b577d34ed,
296 0x3ff12a5dd543ccc5,
297 0x3ff14cd4fc989cd6,
298 0x3ff16f9157587069,
299 0x3ff192937074e0cd,
300 0x3ff1b5dbd3f68122,
301 0x3ff1d96b0eff0e79,
302 0x3ff1fd41afcba45e,
303 0x3ff2216045b6f5cd,
304 0x3ff245c7613b8a9b,
305 0x3ff26a7793f60164,
306 0x3ff28f7170a755fd,
307 0x3ff2b4b58b372c79,
308 0x3ff2da4478b620c7,
309 0x3ff3001ecf601af7,
310 0x3ff32645269ea829,
311 0x3ff34cb8170b5835,
312 0x3ff373783a722012,
313 0x3ff39a862bd3c106,
314 0x3ff3c1e2876834aa,
315 0x3ff3e98deaa11dcc,
316 0x3ff41188f42c3e32,
317 0x3ff439d443f5f159,
318 0x3ff462707b2bac21,
319 0x3ff48b5e3c3e8186,
320 0x3ff4b49e2ae5ac67,
321 0x3ff4de30ec211e60,
322 0x3ff50817263c13cd,
323 0x3ff5325180cfacf7,
324 0x3ff55ce0a4c58c7c,
325 0x3ff587c53c5a7af0,
326 0x3ff5b2fff3210fd9,
327 0x3ff5de9176045ff5,
328 0x3ff60a7a734ab0e8,
329 0x3ff636bb9a983258,
330 0x3ff663559cf1bc7c,
331 0x3ff690492cbf9433,
332 0x3ff6bd96fdd034a2,
333 0x3ff6eb3fc55b1e76,
334 0x3ff719443a03acb9,
335 0x3ff747a513dbef6a,
336 0x3ff776630c678bc1,
337 0x3ff7a57ede9ea23e,
338 0x3ff7d4f946f0ba8d,
339 0x3ff804d30347b546,
340 0x3ff8350cd30ac390,
341 0x3ff865a7772164c5,
342 0x3ff896a3b1f66a0e,
343 0x3ff8c802477b0010,
344 0x3ff8f9c3fd29beaf,
345 0x3ff92be99a09bf00,
346 0x3ff95e73e6b1b75e,
347 0x3ff99163ad4b1dcc,
348 0x3ff9c4b9b995509b,
349 0x3ff9f876d8e8c566,
350 0x3ffa2c9bda3a3e78,
351 0x3ffa61298e1e069c,
352 0x3ffa9620c6cb3374,
353 0x3ffacb82581eee54,
354 0x3ffb014f179fc3b8,
355 0x3ffb3787dc80f95f,
356 0x3ffb6e2d7fa5eb18,
357 0x3ffba540dba56e56,
358 0x3ffbdcc2cccd3c85,
359 0x3ffc14b431256446,
360 0x3ffc4d15e873c193,
361 0x3ffc85e8d43f7cd0,
362 0x3ffcbf2dd7d490f2,
363 0x3ffcf8e5d84758a9,
364 0x3ffd3311bc7822b4,
365 0x3ffd6db26d16cd67,
366 0x3ffda8c8d4a66969,
367 0x3ffde455df80e3c0,
368 0x3ffe205a7bdab73e,
369 0x3ffe5cd799c6a54e,
370 0x3ffe99ce2b397649,
371 0x3ffed73f240dc142,
372 0x3fff152b7a07bb76,
373 0x3fff539424d90f5e,
374 0x3fff927a1e24bb76,
375 0x3fffd1de6182f8c9,
376 0x400008e0f64294ab,
377 0x40002912df5ce72a,
378 0x400049856cd84339,
379 0x40006a39207f0a09,
380 0x40008b2e7d2035cf,
381 0x4000ac6606916501,
382 0x4000cde041b0e9ae,
383 0x4000ef9db467dcf8,
384 0x4001119ee5ac36b6,
385 0x400133e45d82e952,
386 0x4001566ea50201d7,
387 0x4001793e4652cc50,
388 0x40019c53ccb3fc6b,
389 0x4001bfafc47bda73,
390 0x4001e352bb1a74ad,
391 0x4002073d3f1bd518,
392 0x40022b6fe02a3b9c,
393 0x40024feb2f105cb8,
394 0x400274afbdbba4a6,
395 0x400299be1f3e7f1c,
396 0x4002bf16e7d2a38c,
397 0x4002e4baacdb6614,
398 0x40030aaa04e80d05,
399 0x400330e587b62b28,
400 0x4003576dce33fead,
401 0x40037e437282d4ee,
402 0x4003a5670ff972ed,
403 0x4003ccd9432682b4,
404 0x4003f49aa9d30590,
405 0x40041cabe304cb34,
406 0x4004450d8f00edd4,
407 0x40046dc04f4e5338,
408 0x400496c4c6b832da,
409 0x4004c01b9950a111,
410 0x4004e9c56c731f5d,
411 0x400513c2e6c731d7,
412 0x40053e14b042f9ca,
413 0x400568bb722dd593,
414 0x400593b7d72305bb,
415];
416
417pub(crate) trait ExpfBackend {
418 fn fmaf(&self, x: f32, y: f32, z: f32) -> f32;
419 fn fma(&self, x: f64, y: f64, z: f64) -> f64;
420 fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64;
421 fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64;
422 fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64;
423 fn polyeval5(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64) -> f64;
424 fn polyeval6(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64, a5: f64) -> f64;
425 fn polyeval7(
426 &self,
427 x: f64,
428 a0: f64,
429 a1: f64,
430 a2: f64,
431 a3: f64,
432 a4: f64,
433 a5: f64,
434 a6: f64,
435 ) -> f64;
436 fn roundf(&self, x: f32) -> f32;
437 fn round(&self, x: f64) -> f64;
438 fn floor(&self, x: f64) -> f64;
439 fn round_ties_even(&self, x: f64) -> f64;
440 fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble;
441 fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble;
442 fn quick_f64_mult(&self, x: f64, y: DoubleDouble) -> DoubleDouble;
443 fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble;
444}
445
446pub(crate) struct GenericExpfBackend {}
447
448impl ExpfBackend for GenericExpfBackend {
449 #[inline(always)]
450 fn fmaf(&self, x: f32, y: f32, z: f32) -> f32 {
451 f_fmlaf(x, y, z)
452 }
453
454 #[inline(always)]
455 fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
456 use crate::common::f_fmla;
457 f_fmla(x, y, z)
458 }
459 #[inline(always)]
460 fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64 {
461 dd_fmla(x, y, z)
462 }
463 #[inline(always)]
464 fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64 {
465 dyad_fmla(x, y, z)
466 }
467
468 #[inline(always)]
469 fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
470 use crate::polyeval::f_polyeval3;
471 f_polyeval3(x, a0, a1, a2)
472 }
473
474 #[inline(always)]
475 fn polyeval5(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64) -> f64 {
476 use crate::polyeval::f_polyeval5;
477 f_polyeval5(x, a0, a1, a2, a3, a4)
478 }
479
480 #[inline(always)]
481 fn polyeval6(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64, a5: f64) -> f64 {
482 use crate::polyeval::f_polyeval6;
483 f_polyeval6(x, a0, a1, a2, a3, a4, a5)
484 }
485
486 #[inline(always)]
487 fn polyeval7(
488 &self,
489 x: f64,
490 a0: f64,
491 a1: f64,
492 a2: f64,
493 a3: f64,
494 a4: f64,
495 a5: f64,
496 a6: f64,
497 ) -> f64 {
498 use crate::polyeval::f_polyeval7;
499 f_polyeval7(x, a0, a1, a2, a3, a4, a5, a6)
500 }
501
502 #[inline(always)]
503 fn roundf(&self, x: f32) -> f32 {
504 x.cpu_round()
505 }
506
507 #[inline(always)]
508 fn round(&self, x: f64) -> f64 {
509 x.cpu_round()
510 }
511
512 #[inline(always)]
513 fn floor(&self, x: f64) -> f64 {
514 x.cpu_floor()
515 }
516
517 #[inline(always)]
518 fn round_ties_even(&self, x: f64) -> f64 {
519 use crate::rounding::CpuRoundTiesEven;
520 x.cpu_round_ties_even()
521 }
522
523 #[inline(always)]
524 fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
525 DoubleDouble::quick_mult(x, y)
526 }
527
528 #[inline(always)]
529 fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
530 DoubleDouble::quick_mult_f64(x, y)
531 }
532
533 #[inline(always)]
534 fn quick_f64_mult(&self, x: f64, y: DoubleDouble) -> DoubleDouble {
535 DoubleDouble::quick_mult_f64(y, x)
536 }
537
538 #[inline(always)]
539 fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble {
540 DoubleDouble::from_exact_mult(x, y)
541 }
542}
543
544#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
545pub(crate) struct FmaBackend {}
546
547#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
548impl ExpfBackend for FmaBackend {
549 #[inline(always)]
550 fn fmaf(&self, x: f32, y: f32, z: f32) -> f32 {
551 f32::mul_add(x, y, z)
552 }
553
554 #[inline(always)]
555 fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
556 f64::mul_add(x, y, z)
557 }
558
559 #[inline(always)]
560 fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64 {
561 f64::mul_add(x, y, z)
562 }
563 #[inline(always)]
564 fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64 {
565 f64::mul_add(x, y, z)
566 }
567
568 #[inline(always)]
569 fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
570 use crate::polyeval::d_polyeval3;
571 d_polyeval3(x, a0, a1, a2)
572 }
573
574 #[inline(always)]
575 fn polyeval5(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64) -> f64 {
576 use crate::polyeval::d_polyeval5;
577 d_polyeval5(x, a0, a1, a2, a3, a4)
578 }
579
580 #[inline(always)]
581 fn polyeval6(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64, a5: f64) -> f64 {
582 use crate::polyeval::d_polyeval6;
583 d_polyeval6(x, a0, a1, a2, a3, a4, a5)
584 }
585
586 #[inline(always)]
587 fn polyeval7(
588 &self,
589 x: f64,
590 a0: f64,
591 a1: f64,
592 a2: f64,
593 a3: f64,
594 a4: f64,
595 a5: f64,
596 a6: f64,
597 ) -> f64 {
598 use crate::polyeval::d_polyeval7;
599 d_polyeval7(x, a0, a1, a2, a3, a4, a5, a6)
600 }
601
602 #[inline(always)]
603 fn roundf(&self, x: f32) -> f32 {
604 x.round()
605 }
606
607 #[inline(always)]
608 fn round(&self, x: f64) -> f64 {
609 x.round()
610 }
611
612 #[inline(always)]
613 fn floor(&self, x: f64) -> f64 {
614 x.floor()
615 }
616
617 #[inline(always)]
618 fn round_ties_even(&self, x: f64) -> f64 {
619 x.round_ties_even()
620 }
621
622 #[inline(always)]
623 fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
624 DoubleDouble::quick_mult_fma(x, y)
625 }
626
627 #[inline(always)]
628 fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
629 DoubleDouble::quick_mult_f64_fma(x, y)
630 }
631
632 #[inline(always)]
633 fn quick_f64_mult(&self, x: f64, y: DoubleDouble) -> DoubleDouble {
634 DoubleDouble::quick_mult_f64_fma(y, x)
635 }
636
637 #[inline(always)]
638 fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble {
639 DoubleDouble::from_exact_mult_fma(x, y)
640 }
641}
642
643#[inline(always)]
644fn expf_gen_impl<B: ExpfBackend>(x: f32, backend: B) -> f32 {
645 let x_u = x.to_bits();
646 let x_abs = x_u & 0x7fff_ffffu32;
647 if x_abs >= 0x42b2_0000u32 || x_abs <= 0x3280_0000u32 {
648 let exp = ((x_u >> 23) & 0xFF) as i32;
649 if exp <= 101i32 {
651 return 1.0 + x;
652 }
653
654 if x_u >= 0xc2cf_f1b5u32 {
656 if x.is_infinite() {
658 return 0.0;
659 }
660 if x.is_nan() {
662 return x;
663 }
664 return 0.0;
665 }
666 if x.is_sign_positive() && (x_u >= 0x42b2_0000) {
668 return x + f32::INFINITY;
670 }
671 }
672
673 let kf = backend.roundf(x * 128.);
689 let xd = backend.fmaf(kf, -0.0078125 , x) as f64;
691 let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; x_hi += 104 << 7;
693 let exp_hi = f64::from_bits(EXP_M1[(x_hi >> 7) as usize]);
695 let exp_mid = f64::from_bits(EXP_M2[(x_hi & 0x7f) as usize]);
697 let p = backend.polyeval5(
703 xd,
704 1.,
705 f64::from_bits(0x3feffffffffff777),
706 f64::from_bits(0x3fe000000000071c),
707 f64::from_bits(0x3fc555566668e5e7),
708 f64::from_bits(0x3fa55555555ef243),
709 );
710 (p * exp_hi * exp_mid) as f32
711}
712
713#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
714#[target_feature(enable = "avx", enable = "fma")]
715unsafe fn expf_fma_impl(x: f32) -> f32 {
716 expf_gen_impl(x, FmaBackend {})
717}
718
719#[inline]
723pub fn f_expf(x: f32) -> f32 {
724 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
725 {
726 expf_gen_impl(x, GenericExpfBackend {})
727 }
728 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
729 {
730 use std::sync::OnceLock;
731 static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
732 let q = EXECUTOR.get_or_init(|| {
733 if std::arch::is_x86_feature_detected!("avx")
734 && std::arch::is_x86_feature_detected!("fma")
735 {
736 expf_fma_impl
737 } else {
738 fn def_expf(x: f32) -> f32 {
739 expf_gen_impl(x, GenericExpfBackend {})
740 }
741 def_expf
742 }
743 });
744 unsafe { q(x) }
745 }
746}
747
748#[inline]
749pub(crate) fn core_expf(x: f32) -> f64 {
750 let kf = (x * 128.).cpu_round();
752 let xd = f_fmlaf(kf, -0.0078125 , x) as f64;
754 let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; x_hi += 104 << 7;
756 let exp_hi = f64::from_bits(EXP_M1[(x_hi >> 7) as usize]);
758 let exp_mid = f64::from_bits(EXP_M2[(x_hi & 0x7f) as usize]);
760 use crate::polyeval::f_polyeval5;
766 let p = f_polyeval5(
767 xd,
768 1.,
769 f64::from_bits(0x3feffffffffff777),
770 f64::from_bits(0x3fe000000000071c),
771 f64::from_bits(0x3fc555566668e5e7),
772 f64::from_bits(0x3fa55555555ef243),
773 );
774 p * exp_hi * exp_mid
775}
776
777#[inline]
778pub(crate) fn core_expdf(x: f64) -> f64 {
779 let kf = (x * 128.).cpu_round();
781 let xd = f_fmla(kf, -0.0078125 , x);
783 let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; x_hi += 104 << 7;
785 let exp_hi = f64::from_bits(EXP_M1[(x_hi >> 7) as usize]);
787 let exp_mid = f64::from_bits(EXP_M2[(x_hi & 0x7f) as usize]);
789 use crate::polyeval::f_polyeval5;
795 let p = f_polyeval5(
796 xd,
797 1.,
798 f64::from_bits(0x3feffffffffff777),
799 f64::from_bits(0x3fe000000000071c),
800 f64::from_bits(0x3fc555566668e5e7),
801 f64::from_bits(0x3fa55555555ef243),
802 );
803 p * exp_hi * exp_mid
804}
805
806#[cfg(test)]
807mod tests {
808 use super::*;
809
810 #[test]
811 fn expf_test() {
812 assert!(
813 (expf(0f32) - 1f32).abs() < 1e-6,
814 "Invalid result {}",
815 expf(0f32)
816 );
817 assert!(
818 (expf(5f32) - 148.4131591025766f32).abs() < 1e-6,
819 "Invalid result {}",
820 expf(5f32)
821 );
822 }
823
824 #[test]
825 fn f_expf_test() {
826 assert_eq!(f_expf(-103.971596), 1e-45);
827 assert!(
828 (f_expf(0f32) - 1f32).abs() < 1e-6,
829 "Invalid result {}",
830 f_expf(0f32)
831 );
832 assert!(
833 (f_expf(5f32) - 148.4131591025766f32).abs() < 1e-6,
834 "Invalid result {}",
835 f_expf(5f32)
836 );
837 assert_eq!(f_expf(f32::INFINITY), f32::INFINITY);
838 assert_eq!(f_expf(f32::NEG_INFINITY), 0.);
839 assert!(f_expf(f32::NAN).is_nan());
840 }
841}