pxfm/sin_cosf/
secf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::polyeval::f_polyeval6;
31use crate::sin_cosf::sincosf_eval::sincosf_eval;
32
33#[inline(always)]
34fn secf_gen_impl(x: f32) -> f32 {
35    let x_abs = x.to_bits() & 0x7fff_ffffu32;
36    let x = f32::from_bits(x_abs);
37    let xd = x as f64;
38
39    // |x| <= pi/16
40    if x_abs <= 0x3e49_0fdbu32 {
41        // |x| < 0.000244141
42        if x_abs < 0x3980_0000u32 {
43            // taylor series for sec(x) ~ 1 + x^2/2 + O(x^4)
44            // for such small interval just doing 2 first coefficients from taylor series
45            // FMA availability is mandatory to perform it in f32 without upcasting to f64.
46            #[cfg(any(
47                all(
48                    any(target_arch = "x86", target_arch = "x86_64"),
49                    target_feature = "fma"
50                ),
51                target_arch = "aarch64"
52            ))]
53            {
54                use crate::common::f_fmlaf;
55                return f_fmlaf(x, x * f32::from_bits(0x3f000000), 1.);
56            }
57            #[cfg(not(any(
58                all(
59                    any(target_arch = "x86", target_arch = "x86_64"),
60                    target_feature = "fma"
61                ),
62                target_arch = "aarch64"
63            )))]
64            {
65                let x2 = xd * xd;
66                return f_fmla(x2, f64::from_bits(0x3fe0000000000000), 1.) as f32;
67            }
68        }
69
70        // Secant
71        // Generated poly by Sollya:
72        // f = 1 / cos(x);
73        // d = [0.000244141; pi/16];
74        // pf = fpminimax(f, [|0, 2, 4, 6, 8, 10|], [|1, D...|], d, relative, floating);
75        //
76        // See ./notes/secf.sollya
77
78        let x2 = xd * xd;
79        let p = f_polyeval6(
80            x2,
81            f64::from_bits(0x3ff0000000000000),
82            f64::from_bits(0x3fe000000001c0fb),
83            f64::from_bits(0x3fcaaaaaa0b8a71b),
84            f64::from_bits(0x3fb5b06437bc5a13),
85            f64::from_bits(0x3fa192a33a9fca4f),
86            f64::from_bits(0x3f8dde280c29af37),
87        );
88        return p as f32;
89    }
90
91    if x_abs >= 0x7f80_0000u32 {
92        return x + f32::NAN;
93    }
94
95    // Formula:
96    //   cos(x) = cos((k + y)*pi/32)
97    //          = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
98    // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..63 are precomputed
99    // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
100    // computed using degree-7 and degree-6 minimax polynomials generated by
101    // Sollya respectively.
102    // Combine the results with the sine of sum formula:
103    //   cos(x) = cos((k + y)*pi/32)
104    //          = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
105    //          = cosm1_y * cos_k + sin_y * sin_k
106    //          = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
107    // then sec(x) = 1/cos(x)
108
109    let rs = sincosf_eval(xd, x_abs);
110    (1. / f_fmla(rs.sin_y, -rs.sin_k, f_fmla(rs.cosm1_y, rs.cos_k, rs.cos_k))) as f32
111}
112
113#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
114#[target_feature(enable = "avx", enable = "fma")]
115unsafe fn secf_fma_impl(x: f32) -> f32 {
116    let x_abs = x.to_bits() & 0x7fff_ffffu32;
117    let x = f32::from_bits(x_abs);
118    let xd = x as f64;
119
120    // |x| <= pi/16
121    if x_abs <= 0x3e49_0fdbu32 {
122        // |x| < 0.000244141
123        if x_abs < 0x3980_0000u32 {
124            // taylor series for sec(x) ~ 1 + x^2/2 + O(x^4)
125            // for such small interval just doing 2 first coefficients from taylor series
126            // FMA availability is mandatory to perform it in f32 without upcasting to f64.
127            return f32::mul_add(x, x * f32::from_bits(0x3f000000), 1.);
128        }
129
130        // Secant
131        // Generated poly by Sollya:
132        // f = 1 / cos(x);
133        // d = [0.000244141; pi/16];
134        // pf = fpminimax(f, [|0, 2, 4, 6, 8, 10|], [|1, D...|], d, relative, floating);
135        //
136        // See ./notes/secf.sollya
137
138        let x2 = xd * xd;
139        use crate::polyeval::d_polyeval6;
140        let p = d_polyeval6(
141            x2,
142            f64::from_bits(0x3ff0000000000000),
143            f64::from_bits(0x3fe000000001c0fb),
144            f64::from_bits(0x3fcaaaaaa0b8a71b),
145            f64::from_bits(0x3fb5b06437bc5a13),
146            f64::from_bits(0x3fa192a33a9fca4f),
147            f64::from_bits(0x3f8dde280c29af37),
148        );
149        return p as f32;
150    }
151
152    if x_abs >= 0x7f80_0000u32 {
153        return x + f32::NAN;
154    }
155
156    // Formula:
157    //   cos(x) = cos((k + y)*pi/32)
158    //          = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
159    // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..63 are precomputed
160    // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
161    // computed using degree-7 and degree-6 minimax polynomials generated by
162    // Sollya respectively.
163    // Combine the results with the sine of sum formula:
164    //   cos(x) = cos((k + y)*pi/32)
165    //          = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
166    //          = cosm1_y * cos_k + sin_y * sin_k
167    //          = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
168    // then sec(x) = 1/cos(x)
169    use crate::sin_cosf::sincosf_eval::sincosf_eval_fma;
170    let rs = sincosf_eval_fma(xd, x_abs);
171    (1. / f64::mul_add(
172        rs.sin_y,
173        -rs.sin_k,
174        f64::mul_add(rs.cosm1_y, rs.cos_k, rs.cos_k),
175    )) as f32
176}
177
178/// Computes secant ( 1 / cos(x) )
179///
180/// Max found ULP 0.5
181#[inline]
182pub fn f_secf(x: f32) -> f32 {
183    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
184    {
185        secf_gen_impl(x)
186    }
187    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
188    {
189        use std::sync::OnceLock;
190        static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
191        let q = EXECUTOR.get_or_init(|| {
192            if std::arch::is_x86_feature_detected!("avx")
193                && std::arch::is_x86_feature_detected!("fma")
194            {
195                secf_fma_impl
196            } else {
197                secf_gen_impl
198            }
199        });
200        unsafe { q(x) }
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn test_f_secf() {
210        assert_eq!(f_secf(0.0), 1.0);
211        assert_eq!(f_secf(0.5), 1.139494);
212        assert_eq!(f_secf(-0.5), 1.139494);
213        assert_eq!(f_secf(1.5), 14.136833);
214        assert_eq!(f_secf(-1.5), 14.136833);
215        assert!(f_secf(f32::INFINITY).is_nan());
216        assert!(f_secf(f32::NEG_INFINITY).is_nan());
217        assert!(f_secf(f32::NAN).is_nan());
218    }
219}