aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/math-emu/poly_sin.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-01-30 07:30:11 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-30 07:30:11 -0500
commit3d0d14f983b55a570b976976284df4c434af3223 (patch)
tree864f11c0ce5ee1e15acdd196018b79d0d0e2685d /arch/x86/math-emu/poly_sin.c
parenta4ec1effce83796209a0258602b0cf50026d86f2 (diff)
x86: lindent arch/i386/math-emu
lindent these files: errors lines of code errors/KLOC arch/x86/math-emu/ 2236 9424 237.2 arch/x86/math-emu/ 128 8706 14.7 no other changes. No code changed: text data bss dec hex filename 5589802 612739 3833856 10036397 9924ad vmlinux.before 5589802 612739 3833856 10036397 9924ad vmlinux.after the intent of this patch is to ease the automated tracking of kernel code quality - it's just much easier for us to maintain it if every file in arch/x86 is supposed to be clean. NOTE: it is a known problem of lindent that it causes some style damage of its own, but it's a safe tool (well, except for the gcc array range initializers extension), so we did the bulk of the changes via lindent, and did the manual fixups in a followup patch. the resulting math-emu code has been tested by Thomas Gleixner on a real 386 DX CPU as well, and it works fine. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/math-emu/poly_sin.c')
-rw-r--r--arch/x86/math-emu/poly_sin.c599
1 files changed, 290 insertions, 309 deletions
diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c
index a36313fb06f1..7273ae0c7692 100644
--- a/arch/x86/math-emu/poly_sin.c
+++ b/arch/x86/math-emu/poly_sin.c
@@ -11,7 +11,6 @@
11 | | 11 | |
12 +---------------------------------------------------------------------------*/ 12 +---------------------------------------------------------------------------*/
13 13
14
15#include "exception.h" 14#include "exception.h"
16#include "reg_constant.h" 15#include "reg_constant.h"
17#include "fpu_emu.h" 16#include "fpu_emu.h"
@@ -19,379 +18,361 @@
19#include "control_w.h" 18#include "control_w.h"
20#include "poly.h" 19#include "poly.h"
21 20
22
23#define N_COEFF_P 4 21#define N_COEFF_P 4
24#define N_COEFF_N 4 22#define N_COEFF_N 4
25 23
26static const unsigned long long pos_terms_l[N_COEFF_P] = 24static const unsigned long long pos_terms_l[N_COEFF_P] = {
27{ 25 0xaaaaaaaaaaaaaaabLL,
28 0xaaaaaaaaaaaaaaabLL, 26 0x00d00d00d00cf906LL,
29 0x00d00d00d00cf906LL, 27 0x000006b99159a8bbLL,
30 0x000006b99159a8bbLL, 28 0x000000000d7392e6LL
31 0x000000000d7392e6LL
32}; 29};
33 30
34static const unsigned long long neg_terms_l[N_COEFF_N] = 31static const unsigned long long neg_terms_l[N_COEFF_N] = {
35{ 32 0x2222222222222167LL,
36 0x2222222222222167LL, 33 0x0002e3bc74aab624LL,
37 0x0002e3bc74aab624LL, 34 0x0000000b09229062LL,
38 0x0000000b09229062LL, 35 0x00000000000c7973LL
39 0x00000000000c7973LL
40}; 36};
41 37
42
43
44#define N_COEFF_PH 4 38#define N_COEFF_PH 4
45#define N_COEFF_NH 4 39#define N_COEFF_NH 4
46static const unsigned long long pos_terms_h[N_COEFF_PH] = 40static const unsigned long long pos_terms_h[N_COEFF_PH] = {
47{ 41 0x0000000000000000LL,
48 0x0000000000000000LL, 42 0x05b05b05b05b0406LL,
49 0x05b05b05b05b0406LL, 43 0x000049f93edd91a9LL,
50 0x000049f93edd91a9LL, 44 0x00000000c9c9ed62LL
51 0x00000000c9c9ed62LL
52}; 45};
53 46
54static const unsigned long long neg_terms_h[N_COEFF_NH] = 47static const unsigned long long neg_terms_h[N_COEFF_NH] = {
55{ 48 0xaaaaaaaaaaaaaa98LL,
56 0xaaaaaaaaaaaaaa98LL, 49 0x001a01a01a019064LL,
57 0x001a01a01a019064LL, 50 0x0000008f76c68a77LL,
58 0x0000008f76c68a77LL, 51 0x0000000000d58f5eLL
59 0x0000000000d58f5eLL
60}; 52};
61 53
62
63/*--- poly_sine() -----------------------------------------------------------+ 54/*--- poly_sine() -----------------------------------------------------------+
64 | | 55 | |
65 +---------------------------------------------------------------------------*/ 56 +---------------------------------------------------------------------------*/
66void poly_sine(FPU_REG *st0_ptr) 57void poly_sine(FPU_REG * st0_ptr)
67{ 58{
68 int exponent, echange; 59 int exponent, echange;
69 Xsig accumulator, argSqrd, argTo4; 60 Xsig accumulator, argSqrd, argTo4;
70 unsigned long fix_up, adj; 61 unsigned long fix_up, adj;
71 unsigned long long fixed_arg; 62 unsigned long long fixed_arg;
72 FPU_REG result; 63 FPU_REG result;
73 64
74 exponent = exponent(st0_ptr); 65 exponent = exponent(st0_ptr);
75 66
76 accumulator.lsw = accumulator.midw = accumulator.msw = 0; 67 accumulator.lsw = accumulator.midw = accumulator.msw = 0;
77 68
78 /* Split into two ranges, for arguments below and above 1.0 */ 69 /* Split into two ranges, for arguments below and above 1.0 */
79 /* The boundary between upper and lower is approx 0.88309101259 */ 70 /* The boundary between upper and lower is approx 0.88309101259 */
80 if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) ) 71 if ((exponent < -1)
81 { 72 || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
82 /* The argument is <= 0.88309101259 */ 73 /* The argument is <= 0.88309101259 */
74
75 argSqrd.msw = st0_ptr->sigh;
76 argSqrd.midw = st0_ptr->sigl;
77 argSqrd.lsw = 0;
78 mul64_Xsig(&argSqrd, &significand(st0_ptr));
79 shr_Xsig(&argSqrd, 2 * (-1 - exponent));
80 argTo4.msw = argSqrd.msw;
81 argTo4.midw = argSqrd.midw;
82 argTo4.lsw = argSqrd.lsw;
83 mul_Xsig_Xsig(&argTo4, &argTo4);
83 84
84 argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0; 85 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
85 mul64_Xsig(&argSqrd, &significand(st0_ptr)); 86 N_COEFF_N - 1);
86 shr_Xsig(&argSqrd, 2*(-1-exponent)); 87 mul_Xsig_Xsig(&accumulator, &argSqrd);
87 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; 88 negate_Xsig(&accumulator);
88 argTo4.lsw = argSqrd.lsw;
89 mul_Xsig_Xsig(&argTo4, &argTo4);
90 89
91 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, 90 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
92 N_COEFF_N-1); 91 N_COEFF_P - 1);
93 mul_Xsig_Xsig(&accumulator, &argSqrd);
94 negate_Xsig(&accumulator);
95 92
96 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, 93 shr_Xsig(&accumulator, 2); /* Divide by four */
97 N_COEFF_P-1); 94 accumulator.msw |= 0x80000000; /* Add 1.0 */
98 95
99 shr_Xsig(&accumulator, 2); /* Divide by four */ 96 mul64_Xsig(&accumulator, &significand(st0_ptr));
100 accumulator.msw |= 0x80000000; /* Add 1.0 */ 97 mul64_Xsig(&accumulator, &significand(st0_ptr));
98 mul64_Xsig(&accumulator, &significand(st0_ptr));
101 99
102 mul64_Xsig(&accumulator, &significand(st0_ptr)); 100 /* Divide by four, FPU_REG compatible, etc */
103 mul64_Xsig(&accumulator, &significand(st0_ptr)); 101 exponent = 3 * exponent;
104 mul64_Xsig(&accumulator, &significand(st0_ptr));
105 102
106 /* Divide by four, FPU_REG compatible, etc */ 103 /* The minimum exponent difference is 3 */
107 exponent = 3*exponent; 104 shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
108 105
109 /* The minimum exponent difference is 3 */ 106 negate_Xsig(&accumulator);
110 shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); 107 XSIG_LL(accumulator) += significand(st0_ptr);
111 108
112 negate_Xsig(&accumulator); 109 echange = round_Xsig(&accumulator);
113 XSIG_LL(accumulator) += significand(st0_ptr);
114 110
115 echange = round_Xsig(&accumulator); 111 setexponentpos(&result, exponent(st0_ptr) + echange);
112 } else {
113 /* The argument is > 0.88309101259 */
114 /* We use sin(st(0)) = cos(pi/2-st(0)) */
116 115
117 setexponentpos(&result, exponent(st0_ptr) + echange); 116 fixed_arg = significand(st0_ptr);
118 }
119 else
120 {
121 /* The argument is > 0.88309101259 */
122 /* We use sin(st(0)) = cos(pi/2-st(0)) */
123 117
124 fixed_arg = significand(st0_ptr); 118 if (exponent == 0) {
119 /* The argument is >= 1.0 */
125 120
126 if ( exponent == 0 ) 121 /* Put the binary point at the left. */
127 { 122 fixed_arg <<= 1;
128 /* The argument is >= 1.0 */ 123 }
124 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
125 fixed_arg = 0x921fb54442d18469LL - fixed_arg;
126 /* There is a special case which arises due to rounding, to fix here. */
127 if (fixed_arg == 0xffffffffffffffffLL)
128 fixed_arg = 0;
129 129
130 /* Put the binary point at the left. */ 130 XSIG_LL(argSqrd) = fixed_arg;
131 fixed_arg <<= 1; 131 argSqrd.lsw = 0;
132 } 132 mul64_Xsig(&argSqrd, &fixed_arg);
133 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
134 fixed_arg = 0x921fb54442d18469LL - fixed_arg;
135 /* There is a special case which arises due to rounding, to fix here. */
136 if ( fixed_arg == 0xffffffffffffffffLL )
137 fixed_arg = 0;
138 133
139 XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; 134 XSIG_LL(argTo4) = XSIG_LL(argSqrd);
140 mul64_Xsig(&argSqrd, &fixed_arg); 135 argTo4.lsw = argSqrd.lsw;
136 mul_Xsig_Xsig(&argTo4, &argTo4);
141 137
142 XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; 138 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
143 mul_Xsig_Xsig(&argTo4, &argTo4); 139 N_COEFF_NH - 1);
140 mul_Xsig_Xsig(&accumulator, &argSqrd);
141 negate_Xsig(&accumulator);
144 142
145 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, 143 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
146 N_COEFF_NH-1); 144 N_COEFF_PH - 1);
147 mul_Xsig_Xsig(&accumulator, &argSqrd); 145 negate_Xsig(&accumulator);
148 negate_Xsig(&accumulator);
149 146
150 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, 147 mul64_Xsig(&accumulator, &fixed_arg);
151 N_COEFF_PH-1); 148 mul64_Xsig(&accumulator, &fixed_arg);
152 negate_Xsig(&accumulator);
153 149
154 mul64_Xsig(&accumulator, &fixed_arg); 150 shr_Xsig(&accumulator, 3);
155 mul64_Xsig(&accumulator, &fixed_arg); 151 negate_Xsig(&accumulator);
156 152
157 shr_Xsig(&accumulator, 3); 153 add_Xsig_Xsig(&accumulator, &argSqrd);
158 negate_Xsig(&accumulator);
159 154
160 add_Xsig_Xsig(&accumulator, &argSqrd); 155 shr_Xsig(&accumulator, 1);
161 156
162 shr_Xsig(&accumulator, 1); 157 accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
158 negate_Xsig(&accumulator);
163 159
164 accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ 160 /* The basic computation is complete. Now fix the answer to
165 negate_Xsig(&accumulator); 161 compensate for the error due to the approximation used for
162 pi/2
163 */
166 164
167 /* The basic computation is complete. Now fix the answer to 165 /* This has an exponent of -65 */
168 compensate for the error due to the approximation used for 166 fix_up = 0x898cc517;
169 pi/2 167 /* The fix-up needs to be improved for larger args */
170 */ 168 if (argSqrd.msw & 0xffc00000) {
169 /* Get about 32 bit precision in these: */
170 fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
171 }
172 fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
171 173
172 /* This has an exponent of -65 */ 174 adj = accumulator.lsw; /* temp save */
173 fix_up = 0x898cc517; 175 accumulator.lsw -= fix_up;
174 /* The fix-up needs to be improved for larger args */ 176 if (accumulator.lsw > adj)
175 if ( argSqrd.msw & 0xffc00000 ) 177 XSIG_LL(accumulator)--;
176 {
177 /* Get about 32 bit precision in these: */
178 fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
179 }
180 fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
181 178
182 adj = accumulator.lsw; /* temp save */ 179 echange = round_Xsig(&accumulator);
183 accumulator.lsw -= fix_up;
184 if ( accumulator.lsw > adj )
185 XSIG_LL(accumulator) --;
186 180
187 echange = round_Xsig(&accumulator); 181 setexponentpos(&result, echange - 1);
188 182 }
189 setexponentpos(&result, echange - 1);
190 }
191 183
192 significand(&result) = XSIG_LL(accumulator); 184 significand(&result) = XSIG_LL(accumulator);
193 setsign(&result, getsign(st0_ptr)); 185 setsign(&result, getsign(st0_ptr));
194 FPU_copy_to_reg0(&result, TAG_Valid); 186 FPU_copy_to_reg0(&result, TAG_Valid);
195 187
196#ifdef PARANOID 188#ifdef PARANOID
197 if ( (exponent(&result) >= 0) 189 if ((exponent(&result) >= 0)
198 && (significand(&result) > 0x8000000000000000LL) ) 190 && (significand(&result) > 0x8000000000000000LL)) {
199 { 191 EXCEPTION(EX_INTERNAL | 0x150);
200 EXCEPTION(EX_INTERNAL|0x150); 192 }
201 }
202#endif /* PARANOID */ 193#endif /* PARANOID */
203 194
204} 195}
205 196
206
207
208/*--- poly_cos() ------------------------------------------------------------+ 197/*--- poly_cos() ------------------------------------------------------------+
209 | | 198 | |
210 +---------------------------------------------------------------------------*/ 199 +---------------------------------------------------------------------------*/
211void poly_cos(FPU_REG *st0_ptr) 200void poly_cos(FPU_REG * st0_ptr)
212{ 201{
213 FPU_REG result; 202 FPU_REG result;
214 long int exponent, exp2, echange; 203 long int exponent, exp2, echange;
215 Xsig accumulator, argSqrd, fix_up, argTo4; 204 Xsig accumulator, argSqrd, fix_up, argTo4;
216 unsigned long long fixed_arg; 205 unsigned long long fixed_arg;
217 206
218#ifdef PARANOID 207#ifdef PARANOID
219 if ( (exponent(st0_ptr) > 0) 208 if ((exponent(st0_ptr) > 0)
220 || ((exponent(st0_ptr) == 0) 209 || ((exponent(st0_ptr) == 0)
221 && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) ) 210 && (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
222 { 211 EXCEPTION(EX_Invalid);
223 EXCEPTION(EX_Invalid); 212 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
224 FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); 213 return;
225 return;
226 }
227#endif /* PARANOID */
228
229 exponent = exponent(st0_ptr);
230
231 accumulator.lsw = accumulator.midw = accumulator.msw = 0;
232
233 if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
234 {
235 /* arg is < 0.687705 */
236
237 argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
238 argSqrd.lsw = 0;
239 mul64_Xsig(&argSqrd, &significand(st0_ptr));
240
241 if ( exponent < -1 )
242 {
243 /* shift the argument right by the required places */
244 shr_Xsig(&argSqrd, 2*(-1-exponent));
245 }
246
247 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
248 argTo4.lsw = argSqrd.lsw;
249 mul_Xsig_Xsig(&argTo4, &argTo4);
250
251 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
252 N_COEFF_NH-1);
253 mul_Xsig_Xsig(&accumulator, &argSqrd);
254 negate_Xsig(&accumulator);
255
256 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
257 N_COEFF_PH-1);
258 negate_Xsig(&accumulator);
259
260 mul64_Xsig(&accumulator, &significand(st0_ptr));
261 mul64_Xsig(&accumulator, &significand(st0_ptr));
262 shr_Xsig(&accumulator, -2*(1+exponent));
263
264 shr_Xsig(&accumulator, 3);
265 negate_Xsig(&accumulator);
266
267 add_Xsig_Xsig(&accumulator, &argSqrd);
268
269 shr_Xsig(&accumulator, 1);
270
271 /* It doesn't matter if accumulator is all zero here, the
272 following code will work ok */
273 negate_Xsig(&accumulator);
274
275 if ( accumulator.lsw & 0x80000000 )
276 XSIG_LL(accumulator) ++;
277 if ( accumulator.msw == 0 )
278 {
279 /* The result is 1.0 */
280 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
281 return;
282 }
283 else
284 {
285 significand(&result) = XSIG_LL(accumulator);
286
287 /* will be a valid positive nr with expon = -1 */
288 setexponentpos(&result, -1);
289 }
290 }
291 else
292 {
293 fixed_arg = significand(st0_ptr);
294
295 if ( exponent == 0 )
296 {
297 /* The argument is >= 1.0 */
298
299 /* Put the binary point at the left. */
300 fixed_arg <<= 1;
301 }
302 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
303 fixed_arg = 0x921fb54442d18469LL - fixed_arg;
304 /* There is a special case which arises due to rounding, to fix here. */
305 if ( fixed_arg == 0xffffffffffffffffLL )
306 fixed_arg = 0;
307
308 exponent = -1;
309 exp2 = -1;
310
311 /* A shift is needed here only for a narrow range of arguments,
312 i.e. for fixed_arg approx 2^-32, but we pick up more... */
313 if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
314 {
315 fixed_arg <<= 16;
316 exponent -= 16;
317 exp2 -= 16;
318 } 214 }
215#endif /* PARANOID */
319 216
320 XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; 217 exponent = exponent(st0_ptr);
321 mul64_Xsig(&argSqrd, &fixed_arg); 218
322 219 accumulator.lsw = accumulator.midw = accumulator.msw = 0;
323 if ( exponent < -1 ) 220
324 { 221 if ((exponent < -1)
325 /* shift the argument right by the required places */ 222 || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
326 shr_Xsig(&argSqrd, 2*(-1-exponent)); 223 /* arg is < 0.687705 */
327 } 224
328 225 argSqrd.msw = st0_ptr->sigh;
329 argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; 226 argSqrd.midw = st0_ptr->sigl;
330 argTo4.lsw = argSqrd.lsw; 227 argSqrd.lsw = 0;
331 mul_Xsig_Xsig(&argTo4, &argTo4); 228 mul64_Xsig(&argSqrd, &significand(st0_ptr));
332 229
333 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, 230 if (exponent < -1) {
334 N_COEFF_N-1); 231 /* shift the argument right by the required places */
335 mul_Xsig_Xsig(&accumulator, &argSqrd); 232 shr_Xsig(&argSqrd, 2 * (-1 - exponent));
336 negate_Xsig(&accumulator); 233 }
337 234
338 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, 235 argTo4.msw = argSqrd.msw;
339 N_COEFF_P-1); 236 argTo4.midw = argSqrd.midw;
340 237 argTo4.lsw = argSqrd.lsw;
341 shr_Xsig(&accumulator, 2); /* Divide by four */ 238 mul_Xsig_Xsig(&argTo4, &argTo4);
342 accumulator.msw |= 0x80000000; /* Add 1.0 */ 239
343 240 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
344 mul64_Xsig(&accumulator, &fixed_arg); 241 N_COEFF_NH - 1);
345 mul64_Xsig(&accumulator, &fixed_arg); 242 mul_Xsig_Xsig(&accumulator, &argSqrd);
346 mul64_Xsig(&accumulator, &fixed_arg); 243 negate_Xsig(&accumulator);
347 244
348 /* Divide by four, FPU_REG compatible, etc */ 245 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
349 exponent = 3*exponent; 246 N_COEFF_PH - 1);
350 247 negate_Xsig(&accumulator);
351 /* The minimum exponent difference is 3 */ 248
352 shr_Xsig(&accumulator, exp2 - exponent); 249 mul64_Xsig(&accumulator, &significand(st0_ptr));
353 250 mul64_Xsig(&accumulator, &significand(st0_ptr));
354 negate_Xsig(&accumulator); 251 shr_Xsig(&accumulator, -2 * (1 + exponent));
355 XSIG_LL(accumulator) += fixed_arg; 252
356 253 shr_Xsig(&accumulator, 3);
357 /* The basic computation is complete. Now fix the answer to 254 negate_Xsig(&accumulator);
358 compensate for the error due to the approximation used for 255
359 pi/2 256 add_Xsig_Xsig(&accumulator, &argSqrd);
360 */ 257
361 258 shr_Xsig(&accumulator, 1);
362 /* This has an exponent of -65 */ 259
363 XSIG_LL(fix_up) = 0x898cc51701b839a2ll; 260 /* It doesn't matter if accumulator is all zero here, the
364 fix_up.lsw = 0; 261 following code will work ok */
365 262 negate_Xsig(&accumulator);
366 /* The fix-up needs to be improved for larger args */ 263
367 if ( argSqrd.msw & 0xffc00000 ) 264 if (accumulator.lsw & 0x80000000)
368 { 265 XSIG_LL(accumulator)++;
369 /* Get about 32 bit precision in these: */ 266 if (accumulator.msw == 0) {
370 fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; 267 /* The result is 1.0 */
371 fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; 268 FPU_copy_to_reg0(&CONST_1, TAG_Valid);
269 return;
270 } else {
271 significand(&result) = XSIG_LL(accumulator);
272
273 /* will be a valid positive nr with expon = -1 */
274 setexponentpos(&result, -1);
275 }
276 } else {
277 fixed_arg = significand(st0_ptr);
278
279 if (exponent == 0) {
280 /* The argument is >= 1.0 */
281
282 /* Put the binary point at the left. */
283 fixed_arg <<= 1;
284 }
285 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
286 fixed_arg = 0x921fb54442d18469LL - fixed_arg;
287 /* There is a special case which arises due to rounding, to fix here. */
288 if (fixed_arg == 0xffffffffffffffffLL)
289 fixed_arg = 0;
290
291 exponent = -1;
292 exp2 = -1;
293
294 /* A shift is needed here only for a narrow range of arguments,
295 i.e. for fixed_arg approx 2^-32, but we pick up more... */
296 if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
297 fixed_arg <<= 16;
298 exponent -= 16;
299 exp2 -= 16;
300 }
301
302 XSIG_LL(argSqrd) = fixed_arg;
303 argSqrd.lsw = 0;
304 mul64_Xsig(&argSqrd, &fixed_arg);
305
306 if (exponent < -1) {
307 /* shift the argument right by the required places */
308 shr_Xsig(&argSqrd, 2 * (-1 - exponent));
309 }
310
311 argTo4.msw = argSqrd.msw;
312 argTo4.midw = argSqrd.midw;
313 argTo4.lsw = argSqrd.lsw;
314 mul_Xsig_Xsig(&argTo4, &argTo4);
315
316 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
317 N_COEFF_N - 1);
318 mul_Xsig_Xsig(&accumulator, &argSqrd);
319 negate_Xsig(&accumulator);
320
321 polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
322 N_COEFF_P - 1);
323
324 shr_Xsig(&accumulator, 2); /* Divide by four */
325 accumulator.msw |= 0x80000000; /* Add 1.0 */
326
327 mul64_Xsig(&accumulator, &fixed_arg);
328 mul64_Xsig(&accumulator, &fixed_arg);
329 mul64_Xsig(&accumulator, &fixed_arg);
330
331 /* Divide by four, FPU_REG compatible, etc */
332 exponent = 3 * exponent;
333
334 /* The minimum exponent difference is 3 */
335 shr_Xsig(&accumulator, exp2 - exponent);
336
337 negate_Xsig(&accumulator);
338 XSIG_LL(accumulator) += fixed_arg;
339
340 /* The basic computation is complete. Now fix the answer to
341 compensate for the error due to the approximation used for
342 pi/2
343 */
344
345 /* This has an exponent of -65 */
346 XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
347 fix_up.lsw = 0;
348
349 /* The fix-up needs to be improved for larger args */
350 if (argSqrd.msw & 0xffc00000) {
351 /* Get about 32 bit precision in these: */
352 fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
353 fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
354 }
355
356 exp2 += norm_Xsig(&accumulator);
357 shr_Xsig(&accumulator, 1); /* Prevent overflow */
358 exp2++;
359 shr_Xsig(&fix_up, 65 + exp2);
360
361 add_Xsig_Xsig(&accumulator, &fix_up);
362
363 echange = round_Xsig(&accumulator);
364
365 setexponentpos(&result, exp2 + echange);
366 significand(&result) = XSIG_LL(accumulator);
372 } 367 }
373 368
374 exp2 += norm_Xsig(&accumulator); 369 FPU_copy_to_reg0(&result, TAG_Valid);
375 shr_Xsig(&accumulator, 1); /* Prevent overflow */
376 exp2++;
377 shr_Xsig(&fix_up, 65 + exp2);
378
379 add_Xsig_Xsig(&accumulator, &fix_up);
380
381 echange = round_Xsig(&accumulator);
382
383 setexponentpos(&result, exp2 + echange);
384 significand(&result) = XSIG_LL(accumulator);
385 }
386
387 FPU_copy_to_reg0(&result, TAG_Valid);
388 370
389#ifdef PARANOID 371#ifdef PARANOID
390 if ( (exponent(&result) >= 0) 372 if ((exponent(&result) >= 0)
391 && (significand(&result) > 0x8000000000000000LL) ) 373 && (significand(&result) > 0x8000000000000000LL)) {
392 { 374 EXCEPTION(EX_INTERNAL | 0x151);
393 EXCEPTION(EX_INTERNAL|0x151); 375 }
394 }
395#endif /* PARANOID */ 376#endif /* PARANOID */
396 377
397} 378}