pxfm/bessel/
jincpif.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29#![allow(clippy::too_many_arguments)]
30use crate::bessel::j0f::j1f_rsqrt;
31use crate::bessel::j1_coeffs::{J1_ZEROS, J1_ZEROS_VALUE};
32use crate::bessel::j1f::{j1f_asympt_alpha, j1f_asympt_beta};
33use crate::bessel::j1f_coeffs::J1F_COEFFS;
34use crate::bessel::trigo_bessel::sin_small;
35use crate::common::f_fmla;
36use crate::double_double::DoubleDouble;
37use crate::rounding::CpuCeil;
38use crate::rounding::CpuRound;
39
40pub(crate) trait JincpifBackend {
41    fn fma(&self, x: f64, y: f64, z: f64) -> f64;
42    fn round(&self, x: f64) -> f64;
43    fn ceil(&self, x: f64) -> f64;
44    fn polyeval6(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64, a5: f64) -> f64;
45    fn polyeval14(
46        &self,
47        x: f64,
48        a0: f64,
49        a1: f64,
50        a2: f64,
51        a3: f64,
52        a4: f64,
53        a5: f64,
54        a6: f64,
55        a7: f64,
56        a8: f64,
57        a9: f64,
58        a10: f64,
59        a11: f64,
60        a12: f64,
61        a13: f64,
62    ) -> f64;
63}
64
65struct GenJincpifBackend {}
66
67impl JincpifBackend for GenJincpifBackend {
68    #[inline(always)]
69    fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
70        f_fmla(x, y, z)
71    }
72
73    #[inline(always)]
74    fn round(&self, x: f64) -> f64 {
75        x.cpu_round()
76    }
77
78    #[inline(always)]
79    fn ceil(&self, x: f64) -> f64 {
80        x.cpu_ceil()
81    }
82
83    #[inline(always)]
84    fn polyeval6(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64, a5: f64) -> f64 {
85        use crate::polyeval::f_polyeval6;
86        f_polyeval6(x, a0, a1, a2, a3, a4, a5)
87    }
88
89    #[inline(always)]
90    fn polyeval14(
91        &self,
92        x: f64,
93        a0: f64,
94        a1: f64,
95        a2: f64,
96        a3: f64,
97        a4: f64,
98        a5: f64,
99        a6: f64,
100        a7: f64,
101        a8: f64,
102        a9: f64,
103        a10: f64,
104        a11: f64,
105        a12: f64,
106        a13: f64,
107    ) -> f64 {
108        use crate::polyeval::f_polyeval14;
109        f_polyeval14(
110            x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13,
111        )
112    }
113}
114
115#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
116struct FmaJincpifBackend {}
117
118#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
119impl JincpifBackend for FmaJincpifBackend {
120    #[inline(always)]
121    fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
122        f64::mul_add(x, y, z)
123    }
124
125    #[inline(always)]
126    fn round(&self, x: f64) -> f64 {
127        x.round()
128    }
129
130    #[inline(always)]
131    fn ceil(&self, x: f64) -> f64 {
132        x.ceil()
133    }
134
135    #[inline(always)]
136    fn polyeval6(&self, x: f64, a0: f64, a1: f64, a2: f64, a3: f64, a4: f64, a5: f64) -> f64 {
137        use crate::polyeval::d_polyeval6;
138        d_polyeval6(x, a0, a1, a2, a3, a4, a5)
139    }
140
141    #[inline(always)]
142    fn polyeval14(
143        &self,
144        x: f64,
145        a0: f64,
146        a1: f64,
147        a2: f64,
148        a3: f64,
149        a4: f64,
150        a5: f64,
151        a6: f64,
152        a7: f64,
153        a8: f64,
154        a9: f64,
155        a10: f64,
156        a11: f64,
157        a12: f64,
158        a13: f64,
159    ) -> f64 {
160        use crate::polyeval::d_polyeval14;
161        d_polyeval14(
162            x, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13,
163        )
164    }
165}
166
167#[inline(always)]
168fn jincpif_gen<B: JincpifBackend>(x: f32, backend: B) -> f32 {
169    let ux = x.to_bits().wrapping_shl(1);
170    if ux >= 0xffu32 << 24 || ux <= 0x6800_0000u32 {
171        // |x| <= f32::EPSILON, |x| == inf, |x| == NaN
172        if ux <= 0x6800_0000u32 {
173            // |x| == 0
174            return 1.;
175        }
176        if x.is_infinite() {
177            return 0.;
178        }
179        return x + f32::NAN; // x == NaN
180    }
181
182    let ax = x.to_bits() & 0x7fff_ffff;
183
184    if ax < 0x429533c2u32 {
185        // |x| < 74.60109
186        if ax <= 0x3e800000u32 {
187            // |x| < 0.25
188            return jincf_near_zero(f32::from_bits(ax), &backend);
189        }
190        let scaled_pix = f32::from_bits(ax) * std::f32::consts::PI; // just test boundaries
191        if scaled_pix < 74.60109 {
192            return jincpif_small_argument(f32::from_bits(ax), &backend);
193        }
194    }
195
196    jincpif_asympt(f32::from_bits(ax), &backend) as f32
197}
198
199#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
200#[target_feature(enable = "avx", enable = "fma")]
201unsafe fn jincpif_fma_impl(x: f32) -> f32 {
202    jincpif_gen(x, FmaJincpifBackend {})
203}
204
205/// Normalized jinc 2*J1(PI\*x)/(pi\*x)
206///
207pub fn f_jincpif(x: f32) -> f32 {
208    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
209    {
210        jincpif_gen(x, GenJincpifBackend {})
211    }
212    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
213    {
214        use std::sync::OnceLock;
215        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
216        let q = EXECUTOR.get_or_init(|| {
217            if std::arch::is_x86_feature_detected!("avx")
218                && std::arch::is_x86_feature_detected!("fma")
219            {
220                jincpif_fma_impl
221            } else {
222                fn def_jincpif(x: f32) -> f32 {
223                    jincpif_gen(x, GenJincpifBackend {})
224                }
225                def_jincpif
226            }
227        });
228        unsafe { q(x) }
229    }
230}
231
232#[inline(always)]
233fn jincf_near_zero<B: JincpifBackend>(x: f32, backend: &B) -> f32 {
234    let dx = x as f64;
235    // Generated in Wolfram Mathematica:
236    // <<FunctionApproximations`
237    // ClearAll["Global`*"]
238    // f[x_]:=2*BesselJ[1,x*Pi]/(x*Pi)
239    // {err,approx}=MiniMaxApproximation[f[z],{z,{2^-23,0.3},5,5},WorkingPrecision->60]
240    // poly=Numerator[approx][[1]];
241    // coeffs=CoefficientList[poly,z];
242    // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
243    // poly=Denominator[approx][[1]];
244    // coeffs=CoefficientList[poly,z];
245    // TableForm[Table[Row[{"'",NumberForm[coeffs[[i+1]],{50,50},ExponentFunction->(Null&)],"',"}],{i,0,Length[coeffs]-1}]]
246    let p_num = backend.polyeval6(
247        dx,
248        f64::from_bits(0x3ff0000000000002),
249        f64::from_bits(0xbfe46cd1822a5aa0),
250        f64::from_bits(0xbfee583c923dc6f4),
251        f64::from_bits(0x3fe3834f47496519),
252        f64::from_bits(0x3fc8118468756e6f),
253        f64::from_bits(0xbfbfaff09f13df88),
254    );
255    let p_den = backend.polyeval6(
256        dx,
257        f64::from_bits(0x3ff0000000000000),
258        f64::from_bits(0xbfe46cd1822a4cb0),
259        f64::from_bits(0x3fd2447a026f477a),
260        f64::from_bits(0xbfc6bdf2192404e5),
261        f64::from_bits(0x3fa0cf182218e448),
262        f64::from_bits(0xbf939ab46c3f7a7d),
263    );
264    (p_num / p_den) as f32
265}
266
267/// This method on small range searches for nearest zero or extremum.
268/// Then picks stored series expansion at the point end evaluates the poly at the point.
269#[inline(always)]
270fn jincpif_small_argument<B: JincpifBackend>(ox: f32, backend: &B) -> f32 {
271    const PI: f64 = f64::from_bits(0x400921fb54442d18);
272    let x = ox as f64 * PI;
273    let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
274
275    // let avg_step = 74.60109 / 47.0;
276    // let inv_step = 1.0 / avg_step;
277
278    const INV_STEP: f64 = 0.6300176043004198;
279
280    let inv_scale = x;
281
282    let fx = x_abs * INV_STEP;
283    const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
284    let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
285    let idx1 = unsafe {
286        backend
287            .ceil(fx)
288            .min(J1_ZEROS_COUNT)
289            .to_int_unchecked::<usize>()
290    };
291
292    let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
293    let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
294
295    let dist0 = (found_zero0.hi - x_abs).abs();
296    let dist1 = (found_zero1.hi - x_abs).abs();
297
298    let (found_zero, idx, dist) = if dist0 < dist1 {
299        (found_zero0, idx0, dist0)
300    } else {
301        (found_zero1, idx1, dist1)
302    };
303
304    if idx == 0 {
305        return jincf_near_zero(ox, backend);
306    }
307
308    // We hit exact zero, value, better to return it directly
309    if dist == 0. {
310        return (f64::from_bits(J1_ZEROS_VALUE[idx]) / inv_scale * 2.) as f32;
311    }
312
313    let c = &J1F_COEFFS[idx - 1];
314
315    let r = (x_abs - found_zero.hi) - found_zero.lo;
316
317    let p = backend.polyeval14(
318        r,
319        f64::from_bits(c[0]),
320        f64::from_bits(c[1]),
321        f64::from_bits(c[2]),
322        f64::from_bits(c[3]),
323        f64::from_bits(c[4]),
324        f64::from_bits(c[5]),
325        f64::from_bits(c[6]),
326        f64::from_bits(c[7]),
327        f64::from_bits(c[8]),
328        f64::from_bits(c[9]),
329        f64::from_bits(c[10]),
330        f64::from_bits(c[11]),
331        f64::from_bits(c[12]),
332        f64::from_bits(c[13]),
333    );
334
335    (p / inv_scale * 2.) as f32
336}
337
338/*
339   Evaluates:
340   J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
341   discarding 1*PI/2 using identities gives:
342   J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
343
344   to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
345
346   J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
347*/
348#[inline(always)]
349pub(crate) fn jincpif_asympt<B: JincpifBackend>(x: f32, backend: &B) -> f64 {
350    const PI: f64 = f64::from_bits(0x400921fb54442d18);
351
352    let dox = x as f64;
353    let dx = dox * PI;
354
355    let alpha = j1f_asympt_alpha(dx);
356    let beta = j1f_asympt_beta(dx);
357
358    // argument reduction assuming x here value is already multiple of PI.
359    // k = round((x*Pi) / (pi*2))
360    let kd = backend.round(dox * 0.5);
361    //  y = (x * Pi) - k * 2
362    let angle = backend.fma(kd, -2., dox) * PI;
363
364    // 2^(3/2)/(Pi^2)
365    // reduce argument 2*sqrt(2/(PI*(x*PI))) = 2*sqrt(2)/PI
366    // adding additional pi from division then 2*sqrt(2)/PI^2
367    const Z2_3_2_OVER_PI_SQR: f64 = f64::from_bits(0x3fd25751e5614413);
368    const MPI_OVER_4: f64 = f64::from_bits(0xbfe921fb54442d18);
369
370    let x0pi34 = MPI_OVER_4 - alpha;
371    let r0 = angle + x0pi34;
372
373    let m_sin = sin_small(r0);
374
375    let z0 = beta * m_sin;
376    let scale = Z2_3_2_OVER_PI_SQR * j1f_rsqrt(dox);
377
378    let j1pix = scale * z0;
379    j1pix / dox
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385
386    #[test]
387    fn test_jincpif() {
388        assert_eq!(f_jincpif(-102.59484), 0.00024380769);
389        assert_eq!(f_jincpif(102.59484), 0.00024380769);
390        assert_eq!(f_jincpif(100.08199), -0.00014386141);
391        assert_eq!(f_jincpif(0.27715185), 0.9081822);
392        assert_eq!(f_jincpif(0.007638072), 0.99992806);
393        assert_eq!(f_jincpif(-f32::EPSILON), 1.0);
394        assert_eq!(f_jincpif(f32::EPSILON), 1.0);
395        assert_eq!(
396            f_jincpif(0.000000000000000000000000000000000000008827127),
397            1.0
398        );
399        assert_eq!(f_jincpif(5.4), -0.010821743);
400        assert_eq!(
401            f_jincpif(77.743162408196766932633181568235159),
402            -0.00041799102
403        );
404        assert_eq!(
405            f_jincpif(-77.743162408196766932633181568235159),
406            -0.00041799102
407        );
408        assert_eq!(
409            f_jincpif(84.027189586293545175976760219782591),
410            -0.00023927793
411        );
412        assert_eq!(f_jincpif(f32::INFINITY), 0.);
413        assert_eq!(f_jincpif(f32::NEG_INFINITY), 0.);
414        assert!(f_jincpif(f32::NAN).is_nan());
415        assert_eq!(f_jincpif(-1.7014118e38), -0.0);
416    }
417}