pxfm/tangent/
atan2pif.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30
31static OFF: [f32; 8] = [0.0, 0.5, 1.0, 0.5, -0.0, -0.5, -1.0, -0.5];
32static SGNF: [f32; 2] = [1., -1.];
33static SGN: [f64; 2] = [1., -1.];
34
35#[inline(always)]
36fn atan2pif_gen_impl<Q: Fn(f64, f64, f64) -> f64>(y: f32, x: f32, fma: Q) -> f32 {
37    let tx = x.to_bits();
38    let ty: u32 = y.to_bits();
39    let ux: u32 = tx;
40    let uy: u32 = ty;
41    let ax: u32 = ux & 0x7fff_ffff;
42    let ay = uy & 0x7fff_ffff;
43    if ay >= (0xff << 23) || ax >= (0xff << 23) {
44        if ay > (0xff << 23) {
45            return x + y;
46        } // nan
47        if ax > (0xff << 23) {
48            return x + y;
49        } // nan
50        let yinf = ay == (0xff << 23);
51        let xinf = ax == (0xff << 23);
52        if yinf & xinf {
53            return if (ux >> 31) != 0 {
54                0.75 * SGNF[(uy >> 31) as usize]
55            } else {
56                0.25 * SGNF[(uy >> 31) as usize]
57            };
58        }
59        if xinf {
60            return if (ux >> 31) != 0 {
61                SGNF[(uy >> 31) as usize]
62            } else {
63                0.0 * SGNF[(uy >> 31) as usize]
64            };
65        }
66        if yinf {
67            return 0.5 * SGNF[(uy >> 31) as usize];
68        }
69    }
70    if ay == 0 {
71        if (ay | ax) == 0 {
72            let i: u32 = (uy >> 31) * 4 + (ux >> 31) * 2;
73            return OFF[i as usize];
74        }
75        if (ux >> 31) == 0 {
76            return 0.0 * SGNF[(uy >> 31) as usize];
77        }
78    }
79    if ax == ay {
80        static S: [f32; 4] = [0.25, 0.75, -0.25, -0.75];
81        let i = (uy >> 31) * 2 + (ux >> 31);
82        return S[i as usize];
83    }
84    let gt: usize = (ay > ax) as usize;
85    let i: u32 = (uy >> 31) * 4 + (ux >> 31) * 2 + gt as u32;
86
87    let zx = x as f64;
88    let zy = y as f64;
89    static M: [f64; 2] = [0., 1.];
90
91    let mut z = fma(M[gt], zx, M[1 - gt] * zy) / fma(M[gt], zy, M[1 - gt] * zx);
92
93    const CN: [u64; 7] = [
94        0x3fd45f306dc9c883,
95        0x3fe988d83a142ada,
96        0x3fe747bebf492057,
97        0x3fd2cc5645094ff3,
98        0x3faa0521c711ab66,
99        0x3f6881b8058b9a0d,
100        0x3efb16ff514a0af0,
101    ];
102
103    let mut r = f64::from_bits(CN[0]);
104    let z2 = z * z;
105    z *= SGN[gt];
106    // avoid spurious underflow in the polynomial evaluation excluding tiny arguments
107    if z2 > f64::from_bits(0x3c90000000000000) {
108        let z4 = z2 * z2;
109        let z8 = z4 * z4;
110        let mut cn0 = fma(z2, f64::from_bits(CN[1]), r);
111        let cn2 = fma(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2]));
112        let mut cn4 = fma(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4]));
113        let cn6 = f64::from_bits(CN[6]);
114        cn0 += z4 * cn2;
115        cn4 += z4 * cn6;
116        cn0 += z8 * cn4;
117
118        const CD: [u64; 7] = [
119            0x3ff0000000000000,
120            0x4006b8b143a3f6da,
121            0x4008421201d18ed5,
122            0x3ff8221d086914eb,
123            0x3fd670657e3a07ba,
124            0x3fa0f4951fd1e72d,
125            0x3f4b3874b8798286,
126        ];
127
128        let mut cd0 = fma(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0]));
129        let cd2 = fma(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2]));
130        let mut cd4 = fma(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4]));
131        let cd6 = f64::from_bits(CD[6]);
132        cd0 = fma(z4, cd2, cd0);
133        cd4 = fma(z4, cd6, cd4);
134        cd0 = fma(z8, cd4, cd0);
135
136        r = cn0 / cd0;
137    }
138    fma(z, r, OFF[i as usize] as f64) as f32
139}
140
141#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
142#[target_feature(enable = "avx", enable = "fma")]
143unsafe fn atan2pif_fma_impl(y: f32, x: f32) -> f32 {
144    atan2pif_gen_impl(y, x, f64::mul_add)
145}
146
147/// Computes atan(x/y) / PI
148///
149/// Max found ULP 0.5
150#[inline]
151pub fn f_atan2pif(y: f32, x: f32) -> f32 {
152    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
153    {
154        atan2pif_gen_impl(y, x, f_fmla)
155    }
156    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
157    {
158        use std::sync::OnceLock;
159        static EXECUTOR: OnceLock<unsafe fn(f32, f32) -> f32> = OnceLock::new();
160        let q = EXECUTOR.get_or_init(|| {
161            if std::arch::is_x86_feature_detected!("avx")
162                && std::arch::is_x86_feature_detected!("fma")
163            {
164                atan2pif_fma_impl
165            } else {
166                fn def_atan2pif(y: f32, x: f32) -> f32 {
167                    atan2pif_gen_impl(y, x, f_fmla)
168                }
169                def_atan2pif
170            }
171        });
172        unsafe { q(y, x) }
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179    #[test]
180    fn test_atan2pif() {
181        assert_eq!(f_atan2pif(0.32131, 0.987565), 0.10012555);
182        assert_eq!(f_atan2pif(532.32131, 12.987565), 0.49223542);
183        assert_eq!(f_atan2pif(-754.32131, 12.987565), -0.494520042);
184    }
185}