pxfm/sin_cosf/
sinf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::polyeval::f_polyeval5;
31use crate::sin_cosf::sincosf_eval::sincosf_eval;
32
33#[inline(always)]
34fn sinf_gen_impl(x: f32) -> f32 {
35    let x_abs = x.to_bits() & 0x7fff_ffffu32;
36    let xd = x as f64;
37
38    // |x| <= pi/16
39    if x_abs <= 0x3e49_0fdbu32 {
40        // |x| < 0.000443633
41        if x_abs < 0x39e8_9769u32 {
42            if x_abs == 0u32 {
43                // For signed zeros.
44                return x;
45            }
46            #[cfg(any(
47                all(
48                    any(target_arch = "x86", target_arch = "x86_64"),
49                    target_feature = "fma"
50                ),
51                target_arch = "aarch64"
52            ))]
53            {
54                use crate::common::f_fmlaf;
55                return f_fmlaf(x, f32::from_bits(0xb3000000), x);
56            }
57            #[cfg(not(any(
58                all(
59                    any(target_arch = "x86", target_arch = "x86_64"),
60                    target_feature = "fma"
61                ),
62                target_arch = "aarch64"
63            )))]
64            {
65                return f_fmla(xd, f64::from_bits(0xbe60000000000000), xd) as f32;
66            }
67        }
68
69        let xsqr = xd * xd;
70
71        /*
72           Generated by Sollya:
73           f_sinpi_16 = sin(x)/x;
74           Q = fpminimax(f_sinpi_16, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
75
76           See ./notes/sinf.sollya
77        */
78        let p = f_polyeval5(
79            xsqr,
80            f64::from_bits(0x3ff0000000000000),
81            f64::from_bits(0xbfc55555555554c6),
82            f64::from_bits(0x3f81111111085e65),
83            f64::from_bits(0xbf2a019f70fb4d4f),
84            f64::from_bits(0x3ec718d179815e74),
85        );
86        return (xd * p) as f32;
87    }
88
89    if x_abs >= 0x7f80_0000u32 {
90        return x + f32::NAN;
91    }
92
93    // Formula:
94    //   sin(x) = sin((k + y)*pi/32)
95    //          = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32)
96    // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..31 are precomputed
97    // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
98    // computed using degree-7 and degree-6 minimax polynomials generated by
99    // Sollya respectively.
100
101    let rs = sincosf_eval(xd, x_abs);
102    f_fmla(rs.sin_y, rs.cos_k, f_fmla(rs.cosm1_y, rs.sin_k, rs.sin_k)) as f32
103}
104
105#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
106#[target_feature(enable = "avx", enable = "fma")]
107unsafe fn sinf_fma_impl(x: f32) -> f32 {
108    let x_abs = x.to_bits() & 0x7fff_ffffu32;
109    let xd = x as f64;
110
111    // |x| <= pi/16
112    if x_abs <= 0x3e49_0fdbu32 {
113        // |x| < 0.000443633
114        if x_abs < 0x39e8_9769u32 {
115            if x_abs == 0u32 {
116                // For signed zeros.
117                return x;
118            }
119            return f32::mul_add(x, f32::from_bits(0xb3000000), x);
120        }
121
122        let xsqr = xd * xd;
123
124        /*
125           Generated by Sollya:
126           f_sinpi_16 = sin(x)/x;
127           Q = fpminimax(f_sinpi_16, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
128
129           See ./notes/sinf.sollya
130        */
131        use crate::polyeval::d_polyeval5;
132        let p = d_polyeval5(
133            xsqr,
134            f64::from_bits(0x3ff0000000000000),
135            f64::from_bits(0xbfc55555555554c6),
136            f64::from_bits(0x3f81111111085e65),
137            f64::from_bits(0xbf2a019f70fb4d4f),
138            f64::from_bits(0x3ec718d179815e74),
139        );
140        return (xd * p) as f32;
141    }
142
143    if x_abs >= 0x7f80_0000u32 {
144        return x + f32::NAN;
145    }
146
147    // Formula:
148    //   sin(x) = sin((k + y)*pi/32)
149    //          = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32)
150    // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..31 are precomputed
151    // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
152    // computed using degree-7 and degree-6 minimax polynomials generated by
153    // Sollya respectively.
154    use crate::sin_cosf::sincosf_eval::sincosf_eval_fma;
155    let rs = sincosf_eval_fma(xd, x_abs);
156    f64::mul_add(
157        rs.sin_y,
158        rs.cos_k,
159        f64::mul_add(rs.cosm1_y, rs.sin_k, rs.sin_k),
160    ) as f32
161}
162
163/// Sine function
164///
165/// Max found ULP 0.5
166#[inline]
167pub fn f_sinf(x: f32) -> f32 {
168    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
169    {
170        sinf_gen_impl(x)
171    }
172    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
173    {
174        use std::sync::OnceLock;
175        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
176        let q = EXECUTOR.get_or_init(|| {
177            if std::arch::is_x86_feature_detected!("avx")
178                && std::arch::is_x86_feature_detected!("fma")
179            {
180                sinf_fma_impl
181            } else {
182                sinf_gen_impl
183            }
184        });
185        unsafe { q(x) }
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    #[test]
194    fn f_sinf_test() {
195        assert_eq!(f_sinf(0.0), 0.0);
196        assert_eq!(f_sinf(1.0), 0.84147096);
197        assert_eq!(f_sinf(0.3), 0.29552022);
198        assert_eq!(f_sinf(-1.0), -0.84147096);
199        assert_eq!(f_sinf(-0.3), -0.29552022);
200        assert_eq!(f_sinf(std::f32::consts::PI / 2.), 1.);
201        assert!(f_sinf(f32::INFINITY).is_nan());
202        assert!(f_sinf(f32::NEG_INFINITY).is_nan());
203        assert!((f_sinf(std::f32::consts::PI) - 0f32).abs() < 1e-6);
204        assert!((f_sinf(std::f32::consts::FRAC_PI_2) - 1f32).abs() < 1e-6);
205    }
206}