1use crate::common::f_fmla;
30use crate::cube_roots::cbrtf::halley_refine_d;
31use crate::double_double::DoubleDouble;
32use crate::exponents::fast_ldexp;
33
34pub(crate) trait CbrtBackend {
35 fn fma(&self, x: f64, y: f64, z: f64) -> f64;
36 fn polyeval4(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64) -> f64;
37 fn halley(&self, x: f64, a: f64) -> f64;
38 fn exact_mul(&self, x: f64, y: f64) -> DoubleDouble;
39 fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble;
40}
41
42pub(crate) struct GenericCbrtBackend {}
43
44impl CbrtBackend for GenericCbrtBackend {
45 #[inline(always)]
46 fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
47 f_fmla(x, y, z)
48 }
49 #[inline(always)]
50 fn polyeval4(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64) -> f64 {
51 use crate::polyeval::f_polyeval4;
52 f_polyeval4(x, a0, a1, a2, a3)
53 }
54 #[inline(always)]
55 fn halley(&self, x: f64, a: f64) -> f64 {
56 halley_refine_d(x, a)
57 }
58 #[inline(always)]
59 fn exact_mul(&self, x: f64, y: f64) -> DoubleDouble {
60 DoubleDouble::from_exact_mult(x, y)
61 }
62 #[inline(always)]
63 fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
64 DoubleDouble::quick_mult_f64(x, y)
65 }
66}
67
68#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
69pub(crate) struct FmaCbrtBackend {}
70
71#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
72impl CbrtBackend for FmaCbrtBackend {
73 #[inline(always)]
74 fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
75 f64::mul_add(x, y, z)
76 }
77 #[inline(always)]
78 fn polyeval4(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64) -> f64 {
79 use crate::polyeval::d_polyeval4;
80 d_polyeval4(x, a0, a1, a2, a3)
81 }
82 #[inline(always)]
83 fn halley(&self, x: f64, a: f64) -> f64 {
84 use crate::cube_roots::cbrtf::halley_refine_d_fma;
85 halley_refine_d_fma(x, a)
86 }
87 #[inline(always)]
88 fn exact_mul(&self, x: f64, y: f64) -> DoubleDouble {
89 DoubleDouble::from_exact_mult_fma(x, y)
90 }
91 #[inline(always)]
92 fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
93 DoubleDouble::quick_mult_f64_fma(x, y)
94 }
95}
96
97#[inline(always)]
98fn cbrt_gen_impl<B: CbrtBackend>(x: f64, backend: B) -> f64 {
99 static ESCALE: [f64; 3] = [
101 1.0,
102 f64::from_bits(0x3ff428a2f98d728b),
103 f64::from_bits(0x3ff965fea53d6e3d),
104 ];
105
106 let bits = x.to_bits();
107 let mut exp = ((bits >> 52) & 0x7ff) as i32;
108 let mut mant = bits & ((1u64 << 52) - 1);
109
110 if exp == 0x7ff || x == 0.0 {
111 return x + x;
112 }
113
114 if exp == 0 && x != 0.0 {
116 let norm = x * f64::from_bits(0x4350000000000000); let norm_bits = norm.to_bits();
118 mant = norm_bits & ((1u64 << 52) - 1);
119 exp = ((norm_bits >> 52) & 0x7ff) as i32 - 54;
120 }
121
122 exp -= 1023;
123
124 mant |= 0x3ff << 52;
125 let m = f64::from_bits(mant);
126
127 let p = backend.polyeval4(
135 m,
136 f64::from_bits(0x3fe1b0babceeaafa),
137 f64::from_bits(0x3fe2c9a3e8e06a3c),
138 f64::from_bits(0xbfc4dc30afb71885),
139 f64::from_bits(0x3f97a8d3e05458e4),
140 );
141
142 let q = exp.div_euclid(3);
145 let rem_scale = exp.rem_euclid(3);
146
147 let z = p * ESCALE[rem_scale as usize];
148
149 let mm = fast_ldexp(m, rem_scale); let r = 1.0 / mm;
152
153 let y0 = backend.halley(z, mm);
156 let d2y = backend.exact_mul(y0, y0);
157 let d3y = backend.quick_mult_f64(d2y, y0);
158 let h = ((d3y.hi - mm) + d3y.lo) * r;
162 let y = backend.fma(-f64::from_bits(0x3fd5555555555555), y0 * h, y0);
164
165 f64::copysign(fast_ldexp(y, q), x)
166}
167
168#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
169#[target_feature(enable = "avx", enable = "fma")]
170unsafe fn cbrt_fma_impl(x: f64) -> f64 {
171 cbrt_gen_impl(x, FmaCbrtBackend {})
172}
173
174pub fn f_cbrt(x: f64) -> f64 {
178 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
179 {
180 cbrt_gen_impl(x, GenericCbrtBackend {})
181 }
182 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
183 {
184 use std::sync::OnceLock;
185 static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
186 let q = EXECUTOR.get_or_init(|| {
187 if std::arch::is_x86_feature_detected!("avx")
188 && std::arch::is_x86_feature_detected!("fma")
189 {
190 cbrt_fma_impl
191 } else {
192 fn def_cbrt(x: f64) -> f64 {
193 cbrt_gen_impl(x, GenericCbrtBackend {})
194 }
195 def_cbrt
196 }
197 });
198 unsafe { q(x) }
199 }
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205
206 #[test]
207 fn test_cbrt() {
208 assert_eq!(f_cbrt(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005432309223745),
209 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000017579026781511548);
210 assert_eq!(f_cbrt(1.225158611559834), 1.0700336588124544);
211 assert_eq!(f_cbrt(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000139491540182158), 1.1173329935611586e-103);
212 assert_eq!(f_cbrt(27.0), 3.0);
213 assert_eq!(f_cbrt(64.0), 4.0);
214 assert_eq!(f_cbrt(125.0), 5.0);
215 assert_eq!(f_cbrt(216.0), 6.0);
216 assert_eq!(f_cbrt(343.0), 7.0);
217 assert_eq!(f_cbrt(512.0), 8.0);
218 assert_eq!(f_cbrt(729.0), 9.0);
219 assert_eq!(f_cbrt(-729.0), -9.0);
220 assert_eq!(f_cbrt(-512.0), -8.0);
221 assert_eq!(f_cbrt(-343.0), -7.0);
222 assert_eq!(f_cbrt(-216.0), -6.0);
223 assert_eq!(f_cbrt(-125.0), -5.0);
224 assert_eq!(f_cbrt(-64.0), -4.0);
225 assert_eq!(f_cbrt(-27.0), -3.0);
226 assert_eq!(f_cbrt(0.0), 0.0);
227 assert_eq!(f_cbrt(f64::INFINITY), f64::INFINITY);
228 assert_eq!(f_cbrt(f64::NEG_INFINITY), f64::NEG_INFINITY);
229 assert!(f_cbrt(f64::NAN).is_nan());
230 }
231}