const_soft_float/soft_f64/sqrt.rs
1/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
2/*
3 * ====================================================
4 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 *
6 * Developed at SunSoft, a Sun Microsystems, Inc. business.
7 * Permission to use, copy, modify, and distribute this
8 * software is freely granted, provided that this notice
9 * is preserved.
10 * ====================================================
11 */
12/* sqrt(x)
13 * Return correctly rounded sqrt.
14 * ------------------------------------------
15 * | Use the hardware sqrt if you have one |
16 * ------------------------------------------
17 * Method:
18 * Bit by bit method using integer arithmetic. (Slow, but portable)
19 * 1. Normalization
20 * Scale x to y in [1,4) with even powers of 2:
21 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
22 * sqrt(x) = 2^k * sqrt(y)
23 * 2. Bit by bit computation
24 * Let q = sqrt(y) truncated to i bit after binary point (q = 1),
25 * i 0
26 * i+1 2
27 * s = 2*q , and y = 2 * ( y - q ). (1)
28 * i i i i
29 *
30 * To compute q from q , one checks whether
31 * i+1 i
32 *
33 * -(i+1) 2
34 * (q + 2 ) <= y. (2)
35 * i
36 * -(i+1)
37 * If (2) is false, then q = q ; otherwise q = q + 2 .
38 * i+1 i i+1 i
39 *
40 * With some algebraic manipulation, it is not difficult to see
41 * that (2) is equivalent to
42 * -(i+1)
43 * s + 2 <= y (3)
44 * i i
45 *
46 * The advantage of (3) is that s and y can be computed by
47 * i i
48 * the following recurrence formula:
49 * if (3) is false
50 *
51 * s = s , y = y ; (4)
52 * i+1 i i+1 i
53 *
54 * otherwise,
55 * -i -(i+1)
56 * s = s + 2 , y = y - s - 2 (5)
57 * i+1 i i+1 i i
58 *
59 * One may easily use induction to prove (4) and (5).
60 * Note. Since the left hand side of (3) contain only i+2 bits,
61 * it does not necessary to do a full (53-bit) comparison
62 * in (3).
63 * 3. Final rounding
64 * After generating the 53 bits result, we compute one more bit.
65 * Together with the remainder, we can decide whether the
66 * result is exact, bigger than 1/2ulp, or less than 1/2ulp
67 * (it will never equal to 1/2ulp).
68 * The rounding mode can be detected by checking whether
69 * huge + tiny is equal to huge, and whether huge - tiny is
70 * equal to huge for some floating point number "huge" and "tiny".
71 *
72 * Special cases:
73 * sqrt(+-0) = +-0 ... exact
74 * sqrt(inf) = inf
75 * sqrt(-ve) = NaN ... with invalid signal
76 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN
77 */
78
79use crate::soft_f64::{
80 helpers::{ge, gt},
81 SoftF64,
82};
83
84type F = SoftF64;
85
86pub(crate) const fn sqrt(x: F) -> F {
87 const TINY: F = SoftF64(1.0e-300);
88
89 let mut z: F;
90 let sign: u32 = 0x80000000;
91 let mut ix0: i32;
92 let mut s0: i32;
93 let mut q: i32;
94 let mut m: i32;
95 let mut t: i32;
96 let mut i: i32;
97 let mut r: u32;
98 let mut t1: u32;
99 let mut s1: u32;
100 let mut ix1: u32;
101 let mut q1: u32;
102
103 ix0 = (x.to_bits() >> 32) as i32;
104 ix1 = x.to_bits() as u32;
105
106 /* take care of Inf and NaN */
107 if (ix0 & 0x7ff00000) == 0x7ff00000 {
108 return x.mul(x).add(x); /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
109 }
110 /* take care of zero */
111 if ix0 <= 0 {
112 if ((ix0 & !(sign as i32)) | ix1 as i32) == 0 {
113 return x; /* sqrt(+-0) = +-0 */
114 }
115 if ix0 < 0 {
116 return (x.sub(x)).div(x.sub(x)); /* sqrt(-ve) = sNaN */
117 }
118 }
119 /* normalize x */
120 m = ix0 >> 20;
121 if m == 0 {
122 /* subnormal x */
123 while ix0 == 0 {
124 m -= 21;
125 ix0 |= (ix1 >> 11) as i32;
126 ix1 <<= 21;
127 }
128 i = 0;
129 while (ix0 & 0x00100000) == 0 {
130 i += 1;
131 ix0 <<= 1;
132 }
133 m -= i - 1;
134 ix0 |= (ix1 as usize >> (32 - i) as usize) as i32;
135 ix1 = ix1 << i as usize;
136 }
137 m -= 1023; /* unbias exponent */
138 ix0 = (ix0 & 0x000fffff) | 0x00100000;
139 if (m & 1) == 1 {
140 /* odd m, double x to make it even */
141 ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
142 ix1 = ix1.wrapping_add(ix1);
143 }
144 m >>= 1; /* m = [m/2] */
145
146 /* generate sqrt(x) bit by bit */
147 ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
148 ix1 = ix1.wrapping_add(ix1);
149 q = 0; /* [q,q1] = sqrt(x) */
150 q1 = 0;
151 s0 = 0;
152 s1 = 0;
153 r = 0x00200000; /* r = moving bit from right to left */
154
155 while r != 0 {
156 t = s0 + r as i32;
157 if t <= ix0 {
158 s0 = t + r as i32;
159 ix0 -= t;
160 q += r as i32;
161 }
162 ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
163 ix1 = ix1.wrapping_add(ix1);
164 r >>= 1;
165 }
166
167 r = sign;
168 while r != 0 {
169 t1 = s1.wrapping_add(r);
170 t = s0;
171 if t < ix0 || (t == ix0 && t1 <= ix1) {
172 s1 = t1.wrapping_add(r);
173 if (t1 & sign) == sign && (s1 & sign) == 0 {
174 s0 += 1;
175 }
176 ix0 -= t;
177 if ix1 < t1 {
178 ix0 -= 1;
179 }
180 ix1 = ix1.wrapping_sub(t1);
181 q1 += r;
182 }
183 ix0 += ix0 + ((ix1 & sign) >> 31) as i32;
184 ix1 = ix1.wrapping_add(ix1);
185 r >>= 1;
186 }
187
188 /* use floating add to find out rounding direction */
189 if (ix0 as u32 | ix1) != 0 {
190 z = SoftF64(1.0).sub(TINY); /* raise inexact flag */
191 if ge(z, SoftF64::ONE) {
192 z = SoftF64::ONE.add(TINY);
193 if q1 == 0xffffffff {
194 q1 = 0;
195 q += 1;
196 } else if gt(z, SoftF64::ONE) {
197 if q1 == 0xfffffffe {
198 q += 1;
199 }
200 q1 = q1.wrapping_add(2);
201 } else {
202 q1 += q1 & 1;
203 }
204 }
205 }
206 ix0 = (q >> 1) + 0x3fe00000;
207 ix1 = q1 >> 1;
208 if (q & 1) == 1 {
209 ix1 |= sign;
210 }
211 ix0 += m << 20;
212 SoftF64::from_bits((ix0 as u64) << 32 | ix1 as u64)
213}
214
215#[cfg(test)]
216mod tests {
217 use super::*;
218 // use core::f64::*;
219
220 #[test]
221 fn sanity_check() {
222 const SQRT_100: SoftF64 = sqrt(SoftF64(100.0));
223 assert_eq!(SQRT_100.0, 10.0);
224
225 const SQRT_4: SoftF64 = sqrt(SoftF64(4.0));
226 assert_eq!(SQRT_4.0, 2.0);
227 }
228
229 /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
230 #[test]
231 fn spec_tests() {
232 // Not Asserted: FE_INVALID exception is raised if argument is negative.
233 assert!(sqrt(SoftF64(-1.0)).0.is_nan());
234 assert!(sqrt(SoftF64(f64::NAN)).0.is_nan());
235 for f in [0.0, -0.0, f64::INFINITY].iter().copied() {
236 assert_eq!(sqrt(SoftF64(f)).0, f);
237 }
238 }
239}