pxfm/sin_cosf/
sincosf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::polyeval::f_estrin_polyeval5;
31use crate::sin_cosf::sincosf_eval::sincosf_eval;
32
33#[inline(always)]
34fn sincosf_gen_impl(x: f32) -> (f32, f32) {
35    let x_abs = x.to_bits() & 0x7fff_ffffu32;
36    let xd = x as f64;
37
38    // |x| <= pi/16
39    if x_abs <= 0x3e49_0fdbu32 {
40        // |x| < 0.000443633
41        if x_abs < 0x3980_0000u32 {
42            if x_abs == 0u32 {
43                // For signed zeros.
44                return (x, 1.0);
45            }
46            #[cfg(any(
47                all(
48                    any(target_arch = "x86", target_arch = "x86_64"),
49                    target_feature = "fma"
50                ),
51                target_arch = "aarch64"
52            ))]
53            {
54                use crate::common::f_fmlaf;
55                let sf = f_fmlaf(x, f32::from_bits(0xb3000000), x);
56                let cf = f_fmlaf(f32::from_bits(x_abs), f32::from_bits(0xb3000000), 1.);
57                return (sf, cf);
58            }
59            #[cfg(not(any(
60                all(
61                    any(target_arch = "x86", target_arch = "x86_64"),
62                    target_feature = "fma"
63                ),
64                target_arch = "aarch64"
65            )))]
66            {
67                let sf = f_fmla(xd, f64::from_bits(0xbe60000000000000), xd) as f32;
68                let cf = f_fmla(xd, f64::from_bits(0xbe60000000000000), 1.) as f32;
69                return (sf, cf);
70            }
71        }
72
73        let xsqr = xd * xd;
74
75        /*
76        Generated by Sollya:
77        f_sinpi_16 = sin(x)/x;
78        Q = fpminimax(f_sinpi_16, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
79
80        See ./notes/sincosf.sollya
81         */
82        let p = f_estrin_polyeval5(
83            xsqr,
84            f64::from_bits(0x3ff0000000000000),
85            f64::from_bits(0xbfc55555555554c6),
86            f64::from_bits(0x3f81111111085e65),
87            f64::from_bits(0xbf2a019f70fb4d4f),
88            f64::from_bits(0x3ec718d179815e74),
89        );
90        let sf = (xd * p) as f32;
91
92        // Cosine
93        // Generated poly by Sollya:
94        // f_cos_16 = cos(x);
95        //
96        // Q = fpminimax(f_cos_16, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
97        let cf = f_estrin_polyeval5(
98            xsqr,
99            f64::from_bits(0x3ff0000000000000),
100            f64::from_bits(0xbfdffffffffffcea),
101            f64::from_bits(0x3fa55555553d611a),
102            f64::from_bits(0xbf56c16b2e26561a),
103            f64::from_bits(0x3ef9faa67b9da80b),
104        );
105        return (sf, cf as f32);
106    }
107
108    if x_abs >= 0x7f80_0000u32 {
109        return (x + f32::NAN, x + f32::NAN);
110    }
111
112    // Formula:
113    //   sin(x) = sin((k + y)*pi/32)
114    //          = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32)
115    // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..31 are precomputed
116    // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
117    // computed using degree-7 and degree-6 minimax polynomials generated by
118    // Sollya respectively.
119
120    let rs = sincosf_eval(xd, x_abs);
121    let sf = f_fmla(rs.sin_y, rs.cos_k, f_fmla(rs.cosm1_y, rs.sin_k, rs.sin_k)) as f32;
122    let cf = f_fmla(rs.sin_y, -rs.sin_k, f_fmla(rs.cosm1_y, rs.cos_k, rs.cos_k)) as f32;
123    (sf, cf)
124}
125
126#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
127#[target_feature(enable = "avx", enable = "fma")]
128unsafe fn sincosf_fma_impl(x: f32) -> (f32, f32) {
129    let x_abs = x.to_bits() & 0x7fff_ffffu32;
130    let xd = x as f64;
131
132    // |x| <= pi/16
133    if x_abs <= 0x3e49_0fdbu32 {
134        // |x| < 0.000443633
135        if x_abs < 0x3980_0000u32 {
136            if x_abs == 0u32 {
137                // For signed zeros.
138                return (x, 1.0);
139            }
140            let sf = f32::mul_add(x, f32::from_bits(0xb3000000), x);
141            let cf = f32::mul_add(f32::from_bits(x_abs), f32::from_bits(0xb3000000), 1.);
142            return (sf, cf);
143        }
144
145        let xsqr = xd * xd;
146
147        /*
148        Generated by Sollya:
149        f_sinpi_16 = sin(x)/x;
150        Q = fpminimax(f_sinpi_16, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
151
152        See ./notes/sincosf.sollya
153         */
154        use crate::polyeval::d_estrin_polyeval5;
155        let p = d_estrin_polyeval5(
156            xsqr,
157            f64::from_bits(0x3ff0000000000000),
158            f64::from_bits(0xbfc55555555554c6),
159            f64::from_bits(0x3f81111111085e65),
160            f64::from_bits(0xbf2a019f70fb4d4f),
161            f64::from_bits(0x3ec718d179815e74),
162        );
163        let sf = (xd * p) as f32;
164
165        // Cosine
166        // Generated poly by Sollya:
167        // f_cos_16 = cos(x);
168        //
169        // Q = fpminimax(f_cos_16, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
170        let cf = d_estrin_polyeval5(
171            xsqr,
172            f64::from_bits(0x3ff0000000000000),
173            f64::from_bits(0xbfdffffffffffcea),
174            f64::from_bits(0x3fa55555553d611a),
175            f64::from_bits(0xbf56c16b2e26561a),
176            f64::from_bits(0x3ef9faa67b9da80b),
177        );
178        return (sf, cf as f32);
179    }
180
181    if x_abs >= 0x7f80_0000u32 {
182        return (x + f32::NAN, x + f32::NAN);
183    }
184
185    // Formula:
186    //   sin(x) = sin((k + y)*pi/32)
187    //          = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32)
188    // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..31 are precomputed
189    // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
190    // computed using degree-7 and degree-6 minimax polynomials generated by
191    // Sollya respectively.
192
193    use crate::sin_cosf::sincosf_eval::sincosf_eval_fma;
194    let rs = sincosf_eval_fma(xd, x_abs);
195    let sf = f64::mul_add(
196        rs.sin_y,
197        rs.cos_k,
198        f64::mul_add(rs.cosm1_y, rs.sin_k, rs.sin_k),
199    ) as f32;
200    let cf = f64::mul_add(
201        rs.sin_y,
202        -rs.sin_k,
203        f64::mul_add(rs.cosm1_y, rs.cos_k, rs.cos_k),
204    ) as f32;
205    (sf, cf)
206}
207
208/// Sine and cosine
209///
210/// Max found ULP on working range 0.49999967
211#[inline]
212pub fn f_sincosf(x: f32) -> (f32, f32) {
213    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
214    {
215        sincosf_gen_impl(x)
216    }
217    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
218    {
219        use std::sync::OnceLock;
220        static EXECUTOR: OnceLock<unsafe fn(f32) -> (f32, f32)> = OnceLock::new();
221        let q = EXECUTOR.get_or_init(|| {
222            if std::arch::is_x86_feature_detected!("avx")
223                && std::arch::is_x86_feature_detected!("fma")
224            {
225                sincosf_fma_impl
226            } else {
227                sincosf_gen_impl
228            }
229        });
230        unsafe { q(x) }
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn f_sincosf_test() {
240        let sincos0 = f_sincosf(0.0);
241        assert!(sincos0.0 < 1e-8);
242        assert_eq!(sincos0.1, 1.0);
243        let sincos_pi = f_sincosf(std::f32::consts::PI);
244        assert!(sincos_pi.0 < 1e-8);
245        let sincos_pi_0_5 = f_sincosf(0.5);
246        assert_eq!(sincos_pi_0_5.0, 0.47942555);
247        assert_eq!(sincos_pi_0_5.1, 0.87758255);
248        let sincos_pi_n0_5 = f_sincosf(-0.5);
249        assert_eq!(sincos_pi_n0_5.0, -0.47942555);
250        assert_eq!(sincos_pi_n0_5.1, 0.87758255);
251        let v_z = f_sincosf(0.0002480338);
252        assert_eq!(v_z.1, 0.9999999692396206);
253    }
254
255    #[test]
256    fn f_sincosf_edge_test() {
257        let s0 = f_sincosf(f32::INFINITY);
258        assert!(s0.0.is_nan());
259        assert!(s0.1.is_nan());
260        let s1 = f_sincosf(f32::NEG_INFINITY);
261        assert!(s1.0.is_nan());
262        assert!(s1.1.is_nan());
263        let s2 = f_sincosf(f32::NAN);
264        assert!(s2.0.is_nan());
265        assert!(s2.1.is_nan());
266    }
267}