pxfm/sin_cosf/secf.rs
1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1. Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2. Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3. Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::polyeval::f_polyeval6;
31use crate::sin_cosf::sincosf_eval::sincosf_eval;
32
33#[inline(always)]
34fn secf_gen_impl(x: f32) -> f32 {
35 let x_abs = x.to_bits() & 0x7fff_ffffu32;
36 let x = f32::from_bits(x_abs);
37 let xd = x as f64;
38
39 // |x| <= pi/16
40 if x_abs <= 0x3e49_0fdbu32 {
41 // |x| < 0.000244141
42 if x_abs < 0x3980_0000u32 {
43 // taylor series for sec(x) ~ 1 + x^2/2 + O(x^4)
44 // for such small interval just doing 2 first coefficients from taylor series
45 // FMA availability is mandatory to perform it in f32 without upcasting to f64.
46 #[cfg(any(
47 all(
48 any(target_arch = "x86", target_arch = "x86_64"),
49 target_feature = "fma"
50 ),
51 target_arch = "aarch64"
52 ))]
53 {
54 use crate::common::f_fmlaf;
55 return f_fmlaf(x, x * f32::from_bits(0x3f000000), 1.);
56 }
57 #[cfg(not(any(
58 all(
59 any(target_arch = "x86", target_arch = "x86_64"),
60 target_feature = "fma"
61 ),
62 target_arch = "aarch64"
63 )))]
64 {
65 let x2 = xd * xd;
66 return f_fmla(x2, f64::from_bits(0x3fe0000000000000), 1.) as f32;
67 }
68 }
69
70 // Secant
71 // Generated poly by Sollya:
72 // f = 1 / cos(x);
73 // d = [0.000244141; pi/16];
74 // pf = fpminimax(f, [|0, 2, 4, 6, 8, 10|], [|1, D...|], d, relative, floating);
75 //
76 // See ./notes/secf.sollya
77
78 let x2 = xd * xd;
79 let p = f_polyeval6(
80 x2,
81 f64::from_bits(0x3ff0000000000000),
82 f64::from_bits(0x3fe000000001c0fb),
83 f64::from_bits(0x3fcaaaaaa0b8a71b),
84 f64::from_bits(0x3fb5b06437bc5a13),
85 f64::from_bits(0x3fa192a33a9fca4f),
86 f64::from_bits(0x3f8dde280c29af37),
87 );
88 return p as f32;
89 }
90
91 if x_abs >= 0x7f80_0000u32 {
92 return x + f32::NAN;
93 }
94
95 // Formula:
96 // cos(x) = cos((k + y)*pi/32)
97 // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
98 // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..63 are precomputed
99 // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
100 // computed using degree-7 and degree-6 minimax polynomials generated by
101 // Sollya respectively.
102 // Combine the results with the sine of sum formula:
103 // cos(x) = cos((k + y)*pi/32)
104 // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
105 // = cosm1_y * cos_k + sin_y * sin_k
106 // = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
107 // then sec(x) = 1/cos(x)
108
109 let rs = sincosf_eval(xd, x_abs);
110 (1. / f_fmla(rs.sin_y, -rs.sin_k, f_fmla(rs.cosm1_y, rs.cos_k, rs.cos_k))) as f32
111}
112
113#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
114#[target_feature(enable = "avx", enable = "fma")]
115unsafe fn secf_fma_impl(x: f32) -> f32 {
116 let x_abs = x.to_bits() & 0x7fff_ffffu32;
117 let x = f32::from_bits(x_abs);
118 let xd = x as f64;
119
120 // |x| <= pi/16
121 if x_abs <= 0x3e49_0fdbu32 {
122 // |x| < 0.000244141
123 if x_abs < 0x3980_0000u32 {
124 // taylor series for sec(x) ~ 1 + x^2/2 + O(x^4)
125 // for such small interval just doing 2 first coefficients from taylor series
126 // FMA availability is mandatory to perform it in f32 without upcasting to f64.
127 return f32::mul_add(x, x * f32::from_bits(0x3f000000), 1.);
128 }
129
130 // Secant
131 // Generated poly by Sollya:
132 // f = 1 / cos(x);
133 // d = [0.000244141; pi/16];
134 // pf = fpminimax(f, [|0, 2, 4, 6, 8, 10|], [|1, D...|], d, relative, floating);
135 //
136 // See ./notes/secf.sollya
137
138 let x2 = xd * xd;
139 use crate::polyeval::d_polyeval6;
140 let p = d_polyeval6(
141 x2,
142 f64::from_bits(0x3ff0000000000000),
143 f64::from_bits(0x3fe000000001c0fb),
144 f64::from_bits(0x3fcaaaaaa0b8a71b),
145 f64::from_bits(0x3fb5b06437bc5a13),
146 f64::from_bits(0x3fa192a33a9fca4f),
147 f64::from_bits(0x3f8dde280c29af37),
148 );
149 return p as f32;
150 }
151
152 if x_abs >= 0x7f80_0000u32 {
153 return x + f32::NAN;
154 }
155
156 // Formula:
157 // cos(x) = cos((k + y)*pi/32)
158 // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
159 // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..63 are precomputed
160 // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
161 // computed using degree-7 and degree-6 minimax polynomials generated by
162 // Sollya respectively.
163 // Combine the results with the sine of sum formula:
164 // cos(x) = cos((k + y)*pi/32)
165 // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
166 // = cosm1_y * cos_k + sin_y * sin_k
167 // = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
168 // then sec(x) = 1/cos(x)
169 use crate::sin_cosf::sincosf_eval::sincosf_eval_fma;
170 let rs = sincosf_eval_fma(xd, x_abs);
171 (1. / f64::mul_add(
172 rs.sin_y,
173 -rs.sin_k,
174 f64::mul_add(rs.cosm1_y, rs.cos_k, rs.cos_k),
175 )) as f32
176}
177
178/// Computes secant ( 1 / cos(x) )
179///
180/// Max found ULP 0.5
181#[inline]
182pub fn f_secf(x: f32) -> f32 {
183 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
184 {
185 secf_gen_impl(x)
186 }
187 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
188 {
189 use std::sync::OnceLock;
190 static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
191 let q = EXECUTOR.get_or_init(|| {
192 if std::arch::is_x86_feature_detected!("avx")
193 && std::arch::is_x86_feature_detected!("fma")
194 {
195 secf_fma_impl
196 } else {
197 secf_gen_impl
198 }
199 });
200 unsafe { q(x) }
201 }
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207
208 #[test]
209 fn test_f_secf() {
210 assert_eq!(f_secf(0.0), 1.0);
211 assert_eq!(f_secf(0.5), 1.139494);
212 assert_eq!(f_secf(-0.5), 1.139494);
213 assert_eq!(f_secf(1.5), 14.136833);
214 assert_eq!(f_secf(-1.5), 14.136833);
215 assert!(f_secf(f32::INFINITY).is_nan());
216 assert!(f_secf(f32::NEG_INFINITY).is_nan());
217 assert!(f_secf(f32::NAN).is_nan());
218 }
219}