pxfm/
acospif.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::asinpif::ASINCOSF_PI_TABLE;
30use crate::common::{dd_fmla, f_fmla};
31
32#[inline(always)]
33/// fma - fma
34/// dd_fma - mandatory fma fallback
35fn acospif_gen_impl<Q: Fn(f64, f64, f64) -> f64, F: Fn(f64, f64, f64) -> f64>(
36    x: f32,
37    fma: Q,
38    dd_fma: F,
39) -> f32 {
40    let ax = x.abs();
41    let az = ax as f64;
42    let z = x as f64;
43    let t: u32 = x.to_bits();
44    let e: i32 = ((t >> 23) & 0xff) as i32;
45    if e >= 127 {
46        if x == 1.0 {
47            return 0.0;
48        }
49        if x == -1.0 {
50            return 1.0;
51        }
52        if e == 0xff && (t.wrapping_shl(9)) != 0 {
53            return x + x;
54        } // nan
55        return f32::NAN;
56    }
57    let s: i32 = 146i32.wrapping_sub(e);
58    let mut i = 0i32;
59    if s < 32 {
60        i = (((t & 0x007fffff) | 1 << 23) >> s) as i32;
61    }
62    let c = ASINCOSF_PI_TABLE[i as usize & 15];
63    let z2 = z * z;
64    let z4 = z2 * z2;
65    if i == 0 {
66        let mut c0 = fma(z2, f64::from_bits(c[1]), f64::from_bits(c[0]));
67        let c2 = fma(z2, f64::from_bits(c[3]), f64::from_bits(c[2]));
68        let mut c4 = fma(z2, f64::from_bits(c[5]), f64::from_bits(c[4]));
69        let c6 = fma(z2, f64::from_bits(c[7]), f64::from_bits(c[6]));
70        c0 += c2 * z4;
71        c4 += c6 * z4;
72        /* For |x| <= 0x1.0fd288p-127, c0 += c4*(z4*z4) would raise a spurious
73        underflow exception, we use an FMA instead, where c4 * z4 does not
74        underflow. */
75        c0 = dd_fma(c4 * z4, z4, c0);
76        fma(-z, c0, 0.5) as f32
77    } else {
78        let f = (1. - az).sqrt();
79        let mut c0 = fma(az, f64::from_bits(c[1]), f64::from_bits(c[0]));
80        let c2 = fma(az, f64::from_bits(c[3]), f64::from_bits(c[2]));
81        let mut c4 = fma(az, f64::from_bits(c[5]), f64::from_bits(c[4]));
82        let c6 = fma(az, f64::from_bits(c[7]), f64::from_bits(c[6]));
83        c0 += c2 * z2;
84        c4 += c6 * z2;
85        c0 += c4 * z4;
86        static SIGN: [f64; 2] = [0., 1.];
87        let r = SIGN[(t >> 31) as usize] + c0 * f64::copysign(f, x as f64);
88        r as f32
89    }
90}
91
92#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
93#[target_feature(enable = "avx", enable = "fma")]
94unsafe fn acospif_fma_impl(x: f32) -> f32 {
95    acospif_gen_impl(x, f64::mul_add, f64::mul_add)
96}
97
98/// Computes acos(x)/PI
99///
100/// Max ULP 0.5
101#[inline]
102pub fn f_acospif(x: f32) -> f32 {
103    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
104    {
105        acospif_gen_impl(x, f_fmla, dd_fmla)
106    }
107    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
108    {
109        use std::sync::OnceLock;
110        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
111        let q = EXECUTOR.get_or_init(|| {
112            if std::arch::is_x86_feature_detected!("avx")
113                && std::arch::is_x86_feature_detected!("fma")
114            {
115                acospif_fma_impl
116            } else {
117                fn def_acospif(x: f32) -> f32 {
118                    acospif_gen_impl(x, f_fmla, dd_fmla)
119                }
120                def_acospif
121            }
122        });
123        unsafe { q(x) }
124    }
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130    #[test]
131    fn test_acospif() {
132        assert_eq!(f_acospif(0.0), 0.5);
133        assert_eq!(f_acospif(0.5), 0.33333334);
134        assert_eq!(f_acospif(1.0), 0.0);
135    }
136}