pxfm/sin_cosf/
cospif.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{is_integerf, is_odd_integerf};
30use crate::polyeval::f_polyeval5;
31use crate::sin_cosf::argument_reduction_pi::ArgumentReducerPi;
32use crate::sin_cosf::sincosf_eval::{cospif_eval, sinpif_eval};
33
34#[inline(always)]
35fn cospif_gen_impl(x: f32) -> f32 {
36    let x_abs = x.to_bits() & 0x7fff_ffffu32;
37    let x = f32::from_bits(x_abs);
38    let xd = x as f64;
39
40    // |x| <= 1/16
41    if x_abs <= 0x3d80_0000u32 {
42        // |x| < 0.00000009546391
43        if x_abs < 0x38a2_f984u32 {
44            #[cfg(any(
45                all(
46                    any(target_arch = "x86", target_arch = "x86_64"),
47                    target_feature = "fma"
48                ),
49                target_arch = "aarch64"
50            ))]
51            {
52                use crate::common::f_fmlaf;
53                return f_fmlaf(x, f32::from_bits(0xb3000000), 1.);
54            }
55            #[cfg(not(any(
56                all(
57                    any(target_arch = "x86", target_arch = "x86_64"),
58                    target_feature = "fma"
59                ),
60                target_arch = "aarch64"
61            )))]
62            {
63                use crate::common::f_fmla;
64                return f_fmla(xd, f64::from_bits(0xbe60000000000000), 1.) as f32;
65            }
66        }
67
68        // Cos(x*PI)
69        // Generated poly by Sollya:
70        // d = [0, 1/16];
71        // f_cos = cos(y*pi);
72        // Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|D...|], d, relative, floating);
73        //
74        // See ./notes/cospif.sollya
75
76        let x2 = xd * xd;
77        let p = f_polyeval5(
78            x2,
79            f64::from_bits(0x3ff0000000000000),
80            f64::from_bits(0xc013bd3cc9be43f7),
81            f64::from_bits(0x40103c1f08091fe0),
82            f64::from_bits(0xbff55d3ba3d94835),
83            f64::from_bits(0x3fce173c2a00e74e),
84        );
85        return p as f32;
86    }
87
88    // Numbers greater or equal to 2^23 are always integers or NaN
89    if x_abs >= 0x4b00_0000u32 || is_integerf(x) {
90        if x_abs >= 0x7f80_0000u32 {
91            return x + f32::NAN;
92        }
93        if x_abs < 0x4b80_0000u32 {
94            static CF: [f32; 2] = [1., -1.];
95            return CF[is_odd_integerf(x) as usize];
96        }
97        return 1.;
98    }
99
100    // We're computing cos(y) after argument reduction then return valid value
101    // based on quadrant
102    let reducer = ArgumentReducerPi { x: x as f64 };
103    let (y, k) = reducer.reduce_0p25();
104    // Decide based on quadrant what kernel function to use
105    (match k & 3 {
106        0 => cospif_eval(y),
107        1 => sinpif_eval(-y),
108        2 => -cospif_eval(y),
109        _ => sinpif_eval(y),
110    }) as f32
111}
112
113#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
114#[target_feature(enable = "avx", enable = "fma")]
115unsafe fn cospif_fma_impl(x: f32) -> f32 {
116    let x_abs = x.to_bits() & 0x7fff_ffffu32;
117    let x = f32::from_bits(x_abs);
118    let xd = x as f64;
119
120    // |x| <= 1/16
121    if x_abs <= 0x3d80_0000u32 {
122        // |x| < 0.00000009546391
123        if x_abs < 0x38a2_f984u32 {
124            return f32::mul_add(x, f32::from_bits(0xb3000000), 1.);
125        }
126
127        // Cos(x*PI)
128        // Generated poly by Sollya:
129        // d = [0, 1/16];
130        // f_cos = cos(y*pi);
131        // Q = fpminimax(f_cos, [|0, 2, 4, 6, 8|], [|D...|], d, relative, floating);
132        //
133        // See ./notes/cospif.sollya
134
135        let x2 = xd * xd;
136        use crate::polyeval::d_polyeval5;
137        let p = d_polyeval5(
138            x2,
139            f64::from_bits(0x3ff0000000000000),
140            f64::from_bits(0xc013bd3cc9be43f7),
141            f64::from_bits(0x40103c1f08091fe0),
142            f64::from_bits(0xbff55d3ba3d94835),
143            f64::from_bits(0x3fce173c2a00e74e),
144        );
145        return p as f32;
146    }
147
148    // Numbers greater or equal to 2^23 are always integers or NaN
149    if x_abs >= 0x4b00_0000u32 || x.round_ties_even() == x {
150        if x_abs >= 0x7f80_0000u32 {
151            return x + f32::NAN;
152        }
153        if x_abs < 0x4b80_0000u32 {
154            static CF: [f32; 2] = [1., -1.];
155            let is_odd_integer = unsafe { (x.to_int_unchecked::<i32>() & 1) != 0 };
156            return CF[is_odd_integer as usize];
157        }
158        return 1.;
159    }
160
161    // We're computing cos(y) after argument reduction then return valid value
162    // based on quadrant
163    let reducer = ArgumentReducerPi { x: x as f64 };
164    let (y, k) = reducer.reduce_0p25_fma();
165    // Decide based on quadrant what kernel function to use
166    use crate::sin_cosf::sincosf_eval::{cospif_eval_fma, sinpif_eval_fma};
167    (match k & 3 {
168        0 => cospif_eval_fma(y),
169        1 => sinpif_eval_fma(-y),
170        2 => -cospif_eval_fma(y),
171        _ => sinpif_eval_fma(y),
172    }) as f32
173}
174
175/// Computes cos(PI*x)
176///
177/// Max ULP 0.5
178#[inline]
179pub fn f_cospif(x: f32) -> f32 {
180    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
181    {
182        cospif_gen_impl(x)
183    }
184    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
185    {
186        use std::sync::OnceLock;
187        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
188        let q = EXECUTOR.get_or_init(|| {
189            if std::arch::is_x86_feature_detected!("avx")
190                && std::arch::is_x86_feature_detected!("fma")
191            {
192                cospif_fma_impl
193            } else {
194                cospif_gen_impl
195            }
196        });
197        unsafe { q(x) }
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn test_f_cospif() {
207        assert_eq!(f_cospif(1.), -1.);
208        assert_eq!(f_cospif(-3.5), 0.0);
209        assert_eq!(f_cospif(3.), -1.);
210        assert_eq!(f_cospif(-3.), -1.);
211        assert_eq!(f_cospif(2.), 1.);
212        assert_eq!(f_cospif(-2.), 1.);
213        assert_eq!(f_cospif(115.30706), -0.5696978);
214        assert!(f_cospif(f32::INFINITY).is_nan());
215        assert!(f_cospif(f32::NAN).is_nan());
216        assert!(f_cospif(f32::NEG_INFINITY).is_nan());
217    }
218}
pxfm/sin_cosf/cospif.rs

pxfm/sin_cosf/
cospif.rs