diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/mips/math-emu/dp_sqrt.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/mips/math-emu/dp_sqrt.c')
-rw-r--r-- | arch/mips/math-emu/dp_sqrt.c | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c new file mode 100644 index 000000000000..c35e871ae975 --- /dev/null +++ b/arch/mips/math-emu/dp_sqrt.c | |||
@@ -0,0 +1,165 @@ | |||
1 | /* IEEE754 floating point arithmetic | ||
2 | * double precision square root | ||
3 | */ | ||
4 | /* | ||
5 | * MIPS floating point support | ||
6 | * Copyright (C) 1994-2000 Algorithmics Ltd. | ||
7 | * http://www.algor.co.uk | ||
8 | * | ||
9 | * ######################################################################## | ||
10 | * | ||
11 | * This program is free software; you can distribute it and/or modify it | ||
12 | * under the terms of the GNU General Public License (Version 2) as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | ||
18 | * for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License along | ||
21 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
22 | * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
23 | * | ||
24 | * ######################################################################## | ||
25 | */ | ||
26 | |||
27 | |||
28 | #include "ieee754dp.h" | ||
29 | |||
30 | static const unsigned table[] = { | ||
31 | 0, 1204, 3062, 5746, 9193, 13348, 18162, 23592, | ||
32 | 29598, 36145, 43202, 50740, 58733, 67158, 75992, | ||
33 | 85215, 83599, 71378, 60428, 50647, 41945, 34246, | ||
34 | 27478, 21581, 16499, 12183, 8588, 5674, 3403, | ||
35 | 1742, 661, 130 | ||
36 | }; | ||
37 | |||
38 | ieee754dp ieee754dp_sqrt(ieee754dp x) | ||
39 | { | ||
40 | struct ieee754_csr oldcsr; | ||
41 | ieee754dp y, z, t; | ||
42 | unsigned scalx, yh; | ||
43 | COMPXDP; | ||
44 | |||
45 | EXPLODEXDP; | ||
46 | CLEARCX; | ||
47 | FLUSHXDP; | ||
48 | |||
49 | /* x == INF or NAN? */ | ||
50 | switch (xc) { | ||
51 | case IEEE754_CLASS_QNAN: | ||
52 | /* sqrt(Nan) = Nan */ | ||
53 | return ieee754dp_nanxcpt(x, "sqrt"); | ||
54 | case IEEE754_CLASS_SNAN: | ||
55 | SETCX(IEEE754_INVALID_OPERATION); | ||
56 | return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt"); | ||
57 | case IEEE754_CLASS_ZERO: | ||
58 | /* sqrt(0) = 0 */ | ||
59 | return x; | ||
60 | case IEEE754_CLASS_INF: | ||
61 | if (xs) { | ||
62 | /* sqrt(-Inf) = Nan */ | ||
63 | SETCX(IEEE754_INVALID_OPERATION); | ||
64 | return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt"); | ||
65 | } | ||
66 | /* sqrt(+Inf) = Inf */ | ||
67 | return x; | ||
68 | case IEEE754_CLASS_DNORM: | ||
69 | DPDNORMX; | ||
70 | /* fall through */ | ||
71 | case IEEE754_CLASS_NORM: | ||
72 | if (xs) { | ||
73 | /* sqrt(-x) = Nan */ | ||
74 | SETCX(IEEE754_INVALID_OPERATION); | ||
75 | return ieee754dp_nanxcpt(ieee754dp_indef(), "sqrt"); | ||
76 | } | ||
77 | break; | ||
78 | } | ||
79 | |||
80 | /* save old csr; switch off INX enable & flag; set RN rounding */ | ||
81 | oldcsr = ieee754_csr; | ||
82 | ieee754_csr.mx &= ~IEEE754_INEXACT; | ||
83 | ieee754_csr.sx &= ~IEEE754_INEXACT; | ||
84 | ieee754_csr.rm = IEEE754_RN; | ||
85 | |||
86 | /* adjust exponent to prevent overflow */ | ||
87 | scalx = 0; | ||
88 | if (xe > 512) { /* x > 2**-512? */ | ||
89 | xe -= 512; /* x = x / 2**512 */ | ||
90 | scalx += 256; | ||
91 | } else if (xe < -512) { /* x < 2**-512? */ | ||
92 | xe += 512; /* x = x * 2**512 */ | ||
93 | scalx -= 256; | ||
94 | } | ||
95 | |||
96 | y = x = builddp(0, xe + DP_EBIAS, xm & ~DP_HIDDEN_BIT); | ||
97 | |||
98 | /* magic initial approximation to almost 8 sig. bits */ | ||
99 | yh = y.bits >> 32; | ||
100 | yh = (yh >> 1) + 0x1ff80000; | ||
101 | yh = yh - table[(yh >> 15) & 31]; | ||
102 | y.bits = ((u64) yh << 32) | (y.bits & 0xffffffff); | ||
103 | |||
104 | /* Heron's rule once with correction to improve to ~18 sig. bits */ | ||
105 | /* t=x/y; y=y+t; py[n0]=py[n0]-0x00100006; py[n1]=0; */ | ||
106 | t = ieee754dp_div(x, y); | ||
107 | y = ieee754dp_add(y, t); | ||
108 | y.bits -= 0x0010000600000000LL; | ||
109 | y.bits &= 0xffffffff00000000LL; | ||
110 | |||
111 | /* triple to almost 56 sig. bits: y ~= sqrt(x) to within 1 ulp */ | ||
112 | /* t=y*y; z=t; pt[n0]+=0x00100000; t+=z; z=(x-z)*y; */ | ||
113 | z = t = ieee754dp_mul(y, y); | ||
114 | t.parts.bexp += 0x001; | ||
115 | t = ieee754dp_add(t, z); | ||
116 | z = ieee754dp_mul(ieee754dp_sub(x, z), y); | ||
117 | |||
118 | /* t=z/(t+x) ; pt[n0]+=0x00100000; y+=t; */ | ||
119 | t = ieee754dp_div(z, ieee754dp_add(t, x)); | ||
120 | t.parts.bexp += 0x001; | ||
121 | y = ieee754dp_add(y, t); | ||
122 | |||
123 | /* twiddle last bit to force y correctly rounded */ | ||
124 | |||
125 | /* set RZ, clear INEX flag */ | ||
126 | ieee754_csr.rm = IEEE754_RZ; | ||
127 | ieee754_csr.sx &= ~IEEE754_INEXACT; | ||
128 | |||
129 | /* t=x/y; ...chopped quotient, possibly inexact */ | ||
130 | t = ieee754dp_div(x, y); | ||
131 | |||
132 | if (ieee754_csr.sx & IEEE754_INEXACT || t.bits != y.bits) { | ||
133 | |||
134 | if (!(ieee754_csr.sx & IEEE754_INEXACT)) | ||
135 | /* t = t-ulp */ | ||
136 | t.bits -= 1; | ||
137 | |||
138 | /* add inexact to result status */ | ||
139 | oldcsr.cx |= IEEE754_INEXACT; | ||
140 | oldcsr.sx |= IEEE754_INEXACT; | ||
141 | |||
142 | switch (oldcsr.rm) { | ||
143 | case IEEE754_RP: | ||
144 | y.bits += 1; | ||
145 | /* drop through */ | ||
146 | case IEEE754_RN: | ||
147 | t.bits += 1; | ||
148 | break; | ||
149 | } | ||
150 | |||
151 | /* y=y+t; ...chopped sum */ | ||
152 | y = ieee754dp_add(y, t); | ||
153 | |||
154 | /* adjust scalx for correctly rounded sqrt(x) */ | ||
155 | scalx -= 1; | ||
156 | } | ||
157 | |||
158 | /* py[n0]=py[n0]+scalx; ...scale back y */ | ||
159 | y.parts.bexp += scalx; | ||
160 | |||
161 | /* restore rounding mode, possibly set inexact */ | ||
162 | ieee754_csr = oldcsr; | ||
163 | |||
164 | return y; | ||
165 | } | ||