pxfm/sin_cosf/cosf.rs
1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1. Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2. Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3. Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::polyeval::f_polyeval5;
31use crate::sin_cosf::sincosf_eval::sincosf_eval;
32
33#[inline(always)]
34fn cosf_gen_impl(x: f32) -> f32 {
35 let x_abs = x.to_bits() & 0x7fff_ffffu32;
36 let x = f32::from_bits(x_abs);
37 let xd = x as f64;
38
39 // |x| <= pi/16
40 if x_abs <= 0x3e49_0fdbu32 {
41 // |x| < 0.000244141
42 if x_abs < 0x3980_0000u32 {
43 #[cfg(any(
44 all(
45 any(target_arch = "x86", target_arch = "x86_64"),
46 target_feature = "fma"
47 ),
48 target_arch = "aarch64"
49 ))]
50 {
51 use crate::common::f_fmlaf;
52 return f_fmlaf(x, f32::from_bits(0xb3000000), 1.);
53 }
54 #[cfg(not(any(
55 all(
56 any(target_arch = "x86", target_arch = "x86_64"),
57 target_feature = "fma"
58 ),
59 target_arch = "aarch64"
60 )))]
61 {
62 return f_fmla(xd, f64::from_bits(0xbe60000000000000), 1.) as f32;
63 }
64 }
65
66 // Cosine
67 // Generated poly by Sollya:
68 // f_cos_16 = cos(x);
69 //
70 // Q = fpminimax(f_cos_16, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
71 // See ./notes/cosf.sollya
72
73 let x2 = xd * xd;
74 let p = f_polyeval5(
75 x2,
76 f64::from_bits(0x3ff0000000000000),
77 f64::from_bits(0xbfdffffffffffcea),
78 f64::from_bits(0x3fa55555553d611a),
79 f64::from_bits(0xbf56c16b2e26561a),
80 f64::from_bits(0x3ef9faa67b9da80b),
81 );
82 return p as f32;
83 }
84
85 if x_abs >= 0x7f80_0000u32 {
86 return x + f32::NAN;
87 }
88
89 // Formula:
90 // cos(x) = cos((k + y)*pi/32)
91 // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
92 // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..63 are precomputed
93 // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
94 // computed using degree-7 and degree-6 minimax polynomials generated by
95 // Sollya respectively.
96 // Combine the results with the sine of sum formula:
97 // cos(x) = cos((k + y)*pi/32)
98 // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
99 // = cosm1_y * cos_k + sin_y * sin_k
100 // = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
101
102 let rs = sincosf_eval(xd, x_abs);
103 f_fmla(rs.sin_y, -rs.sin_k, f_fmla(rs.cosm1_y, rs.cos_k, rs.cos_k)) as f32
104}
105
106#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
107#[target_feature(enable = "avx", enable = "fma")]
108unsafe fn cosf_fma_impl(x: f32) -> f32 {
109 let x_abs = x.to_bits() & 0x7fff_ffffu32;
110 let x = f32::from_bits(x_abs);
111 let xd = x as f64;
112
113 // |x| <= pi/16
114 if x_abs <= 0x3e49_0fdbu32 {
115 // |x| < 0.000244141
116 if x_abs < 0x3980_0000u32 {
117 return f32::mul_add(x, f32::from_bits(0xb3000000), 1.);
118 }
119
120 // Cosine
121 // Generated poly by Sollya:
122 // f_cos_16 = cos(x);
123 //
124 // Q = fpminimax(f_cos_16, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
125 // See ./notes/cosf.sollya
126
127 let x2 = xd * xd;
128 use crate::polyeval::d_polyeval5;
129 let p = d_polyeval5(
130 x2,
131 f64::from_bits(0x3ff0000000000000),
132 f64::from_bits(0xbfdffffffffffcea),
133 f64::from_bits(0x3fa55555553d611a),
134 f64::from_bits(0xbf56c16b2e26561a),
135 f64::from_bits(0x3ef9faa67b9da80b),
136 );
137 return p as f32;
138 }
139
140 if x_abs >= 0x7f80_0000u32 {
141 return x + f32::NAN;
142 }
143
144 // Formula:
145 // cos(x) = cos((k + y)*pi/32)
146 // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
147 // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..63 are precomputed
148 // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
149 // computed using degree-7 and degree-6 minimax polynomials generated by
150 // Sollya respectively.
151 // Combine the results with the sine of sum formula:
152 // cos(x) = cos((k + y)*pi/32)
153 // = cos(y*pi/32) * cos(k*pi/32) - sin(y*pi/32) * sin(k*pi/32)
154 // = cosm1_y * cos_k + sin_y * sin_k
155 // = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
156 use crate::sin_cosf::sincosf_eval::sincosf_eval_fma;
157 let rs = sincosf_eval_fma(xd, x_abs);
158 f64::mul_add(
159 rs.sin_y,
160 -rs.sin_k,
161 f64::mul_add(rs.cosm1_y, rs.cos_k, rs.cos_k),
162 ) as f32
163}
164
165/// Computes cosine function
166///
167/// ULP 0.5
168///
169#[inline]
170pub fn f_cosf(x: f32) -> f32 {
171 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
172 {
173 cosf_gen_impl(x)
174 }
175 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
176 {
177 use std::sync::OnceLock;
178 static EXECUTOR: OnceLock<unsafe fn(f32) -> f32> = OnceLock::new();
179 let q = EXECUTOR.get_or_init(|| {
180 if std::arch::is_x86_feature_detected!("avx")
181 && std::arch::is_x86_feature_detected!("fma")
182 {
183 cosf_fma_impl
184 } else {
185 cosf_gen_impl
186 }
187 });
188 unsafe { q(x) }
189 }
190}
191
192#[cfg(test)]
193mod tests {
194 use super::*;
195
196 #[test]
197 fn f_cosf_test() {
198 assert_eq!(f_cosf(0.0), 1.0);
199 assert_eq!(f_cosf(std::f32::consts::PI), -1f32);
200 assert_eq!(f_cosf(0.5), 0.87758255);
201 assert_eq!(f_cosf(0.7), 0.7648422);
202 assert_eq!(f_cosf(1.7), -0.12884454);
203 assert!(f_cosf(f32::INFINITY).is_nan());
204 assert!(f_cosf(f32::NEG_INFINITY).is_nan());
205 assert!(f_cosf(f32::NAN).is_nan());
206 assert_eq!(f_cosf(0.0002480338), 0.9999999692396206);
207 }
208}