diff options
Diffstat (limited to 'arch/x86/math-emu/poly_sin.c')
-rw-r--r-- | arch/x86/math-emu/poly_sin.c | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c new file mode 100644 index 000000000000..a36313fb06f1 --- /dev/null +++ b/arch/x86/math-emu/poly_sin.c | |||
@@ -0,0 +1,397 @@ | |||
1 | /*---------------------------------------------------------------------------+ | ||
2 | | poly_sin.c | | ||
3 | | | | ||
4 | | Computation of an approximation of the sin function and the cosine | | ||
5 | | function by a polynomial. | | ||
6 | | | | ||
7 | | Copyright (C) 1992,1993,1994,1997,1999 | | ||
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | ||
9 | | E-mail billm@melbpc.org.au | | ||
10 | | | | ||
11 | | | | ||
12 | +---------------------------------------------------------------------------*/ | ||
13 | |||
14 | |||
15 | #include "exception.h" | ||
16 | #include "reg_constant.h" | ||
17 | #include "fpu_emu.h" | ||
18 | #include "fpu_system.h" | ||
19 | #include "control_w.h" | ||
20 | #include "poly.h" | ||
21 | |||
22 | |||
23 | #define N_COEFF_P 4 | ||
24 | #define N_COEFF_N 4 | ||
25 | |||
26 | static const unsigned long long pos_terms_l[N_COEFF_P] = | ||
27 | { | ||
28 | 0xaaaaaaaaaaaaaaabLL, | ||
29 | 0x00d00d00d00cf906LL, | ||
30 | 0x000006b99159a8bbLL, | ||
31 | 0x000000000d7392e6LL | ||
32 | }; | ||
33 | |||
34 | static const unsigned long long neg_terms_l[N_COEFF_N] = | ||
35 | { | ||
36 | 0x2222222222222167LL, | ||
37 | 0x0002e3bc74aab624LL, | ||
38 | 0x0000000b09229062LL, | ||
39 | 0x00000000000c7973LL | ||
40 | }; | ||
41 | |||
42 | |||
43 | |||
44 | #define N_COEFF_PH 4 | ||
45 | #define N_COEFF_NH 4 | ||
46 | static const unsigned long long pos_terms_h[N_COEFF_PH] = | ||
47 | { | ||
48 | 0x0000000000000000LL, | ||
49 | 0x05b05b05b05b0406LL, | ||
50 | 0x000049f93edd91a9LL, | ||
51 | 0x00000000c9c9ed62LL | ||
52 | }; | ||
53 | |||
54 | static const unsigned long long neg_terms_h[N_COEFF_NH] = | ||
55 | { | ||
56 | 0xaaaaaaaaaaaaaa98LL, | ||
57 | 0x001a01a01a019064LL, | ||
58 | 0x0000008f76c68a77LL, | ||
59 | 0x0000000000d58f5eLL | ||
60 | }; | ||
61 | |||
62 | |||
63 | /*--- poly_sine() -----------------------------------------------------------+ | ||
64 | | | | ||
65 | +---------------------------------------------------------------------------*/ | ||
66 | void poly_sine(FPU_REG *st0_ptr) | ||
67 | { | ||
68 | int exponent, echange; | ||
69 | Xsig accumulator, argSqrd, argTo4; | ||
70 | unsigned long fix_up, adj; | ||
71 | unsigned long long fixed_arg; | ||
72 | FPU_REG result; | ||
73 | |||
74 | exponent = exponent(st0_ptr); | ||
75 | |||
76 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; | ||
77 | |||
78 | /* Split into two ranges, for arguments below and above 1.0 */ | ||
79 | /* The boundary between upper and lower is approx 0.88309101259 */ | ||
80 | if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) ) | ||
81 | { | ||
82 | /* The argument is <= 0.88309101259 */ | ||
83 | |||
84 | argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0; | ||
85 | mul64_Xsig(&argSqrd, &significand(st0_ptr)); | ||
86 | shr_Xsig(&argSqrd, 2*(-1-exponent)); | ||
87 | argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; | ||
88 | argTo4.lsw = argSqrd.lsw; | ||
89 | mul_Xsig_Xsig(&argTo4, &argTo4); | ||
90 | |||
91 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, | ||
92 | N_COEFF_N-1); | ||
93 | mul_Xsig_Xsig(&accumulator, &argSqrd); | ||
94 | negate_Xsig(&accumulator); | ||
95 | |||
96 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, | ||
97 | N_COEFF_P-1); | ||
98 | |||
99 | shr_Xsig(&accumulator, 2); /* Divide by four */ | ||
100 | accumulator.msw |= 0x80000000; /* Add 1.0 */ | ||
101 | |||
102 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | ||
103 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | ||
104 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | ||
105 | |||
106 | /* Divide by four, FPU_REG compatible, etc */ | ||
107 | exponent = 3*exponent; | ||
108 | |||
109 | /* The minimum exponent difference is 3 */ | ||
110 | shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); | ||
111 | |||
112 | negate_Xsig(&accumulator); | ||
113 | XSIG_LL(accumulator) += significand(st0_ptr); | ||
114 | |||
115 | echange = round_Xsig(&accumulator); | ||
116 | |||
117 | setexponentpos(&result, exponent(st0_ptr) + echange); | ||
118 | } | ||
119 | else | ||
120 | { | ||
121 | /* The argument is > 0.88309101259 */ | ||
122 | /* We use sin(st(0)) = cos(pi/2-st(0)) */ | ||
123 | |||
124 | fixed_arg = significand(st0_ptr); | ||
125 | |||
126 | if ( exponent == 0 ) | ||
127 | { | ||
128 | /* The argument is >= 1.0 */ | ||
129 | |||
130 | /* Put the binary point at the left. */ | ||
131 | fixed_arg <<= 1; | ||
132 | } | ||
133 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | ||
134 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; | ||
135 | /* There is a special case which arises due to rounding, to fix here. */ | ||
136 | if ( fixed_arg == 0xffffffffffffffffLL ) | ||
137 | fixed_arg = 0; | ||
138 | |||
139 | XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; | ||
140 | mul64_Xsig(&argSqrd, &fixed_arg); | ||
141 | |||
142 | XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; | ||
143 | mul_Xsig_Xsig(&argTo4, &argTo4); | ||
144 | |||
145 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, | ||
146 | N_COEFF_NH-1); | ||
147 | mul_Xsig_Xsig(&accumulator, &argSqrd); | ||
148 | negate_Xsig(&accumulator); | ||
149 | |||
150 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, | ||
151 | N_COEFF_PH-1); | ||
152 | negate_Xsig(&accumulator); | ||
153 | |||
154 | mul64_Xsig(&accumulator, &fixed_arg); | ||
155 | mul64_Xsig(&accumulator, &fixed_arg); | ||
156 | |||
157 | shr_Xsig(&accumulator, 3); | ||
158 | negate_Xsig(&accumulator); | ||
159 | |||
160 | add_Xsig_Xsig(&accumulator, &argSqrd); | ||
161 | |||
162 | shr_Xsig(&accumulator, 1); | ||
163 | |||
164 | accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ | ||
165 | negate_Xsig(&accumulator); | ||
166 | |||
167 | /* The basic computation is complete. Now fix the answer to | ||
168 | compensate for the error due to the approximation used for | ||
169 | pi/2 | ||
170 | */ | ||
171 | |||
172 | /* This has an exponent of -65 */ | ||
173 | fix_up = 0x898cc517; | ||
174 | /* The fix-up needs to be improved for larger args */ | ||
175 | if ( argSqrd.msw & 0xffc00000 ) | ||
176 | { | ||
177 | /* Get about 32 bit precision in these: */ | ||
178 | fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; | ||
179 | } | ||
180 | fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); | ||
181 | |||
182 | adj = accumulator.lsw; /* temp save */ | ||
183 | accumulator.lsw -= fix_up; | ||
184 | if ( accumulator.lsw > adj ) | ||
185 | XSIG_LL(accumulator) --; | ||
186 | |||
187 | echange = round_Xsig(&accumulator); | ||
188 | |||
189 | setexponentpos(&result, echange - 1); | ||
190 | } | ||
191 | |||
192 | significand(&result) = XSIG_LL(accumulator); | ||
193 | setsign(&result, getsign(st0_ptr)); | ||
194 | FPU_copy_to_reg0(&result, TAG_Valid); | ||
195 | |||
196 | #ifdef PARANOID | ||
197 | if ( (exponent(&result) >= 0) | ||
198 | && (significand(&result) > 0x8000000000000000LL) ) | ||
199 | { | ||
200 | EXCEPTION(EX_INTERNAL|0x150); | ||
201 | } | ||
202 | #endif /* PARANOID */ | ||
203 | |||
204 | } | ||
205 | |||
206 | |||
207 | |||
208 | /*--- poly_cos() ------------------------------------------------------------+ | ||
209 | | | | ||
210 | +---------------------------------------------------------------------------*/ | ||
211 | void poly_cos(FPU_REG *st0_ptr) | ||
212 | { | ||
213 | FPU_REG result; | ||
214 | long int exponent, exp2, echange; | ||
215 | Xsig accumulator, argSqrd, fix_up, argTo4; | ||
216 | unsigned long long fixed_arg; | ||
217 | |||
218 | #ifdef PARANOID | ||
219 | if ( (exponent(st0_ptr) > 0) | ||
220 | || ((exponent(st0_ptr) == 0) | ||
221 | && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) ) | ||
222 | { | ||
223 | EXCEPTION(EX_Invalid); | ||
224 | FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); | ||
225 | return; | ||
226 | } | ||
227 | #endif /* PARANOID */ | ||
228 | |||
229 | exponent = exponent(st0_ptr); | ||
230 | |||
231 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; | ||
232 | |||
233 | if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) ) | ||
234 | { | ||
235 | /* arg is < 0.687705 */ | ||
236 | |||
237 | argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; | ||
238 | argSqrd.lsw = 0; | ||
239 | mul64_Xsig(&argSqrd, &significand(st0_ptr)); | ||
240 | |||
241 | if ( exponent < -1 ) | ||
242 | { | ||
243 | /* shift the argument right by the required places */ | ||
244 | shr_Xsig(&argSqrd, 2*(-1-exponent)); | ||
245 | } | ||
246 | |||
247 | argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; | ||
248 | argTo4.lsw = argSqrd.lsw; | ||
249 | mul_Xsig_Xsig(&argTo4, &argTo4); | ||
250 | |||
251 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, | ||
252 | N_COEFF_NH-1); | ||
253 | mul_Xsig_Xsig(&accumulator, &argSqrd); | ||
254 | negate_Xsig(&accumulator); | ||
255 | |||
256 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, | ||
257 | N_COEFF_PH-1); | ||
258 | negate_Xsig(&accumulator); | ||
259 | |||
260 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | ||
261 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | ||
262 | shr_Xsig(&accumulator, -2*(1+exponent)); | ||
263 | |||
264 | shr_Xsig(&accumulator, 3); | ||
265 | negate_Xsig(&accumulator); | ||
266 | |||
267 | add_Xsig_Xsig(&accumulator, &argSqrd); | ||
268 | |||
269 | shr_Xsig(&accumulator, 1); | ||
270 | |||
271 | /* It doesn't matter if accumulator is all zero here, the | ||
272 | following code will work ok */ | ||
273 | negate_Xsig(&accumulator); | ||
274 | |||
275 | if ( accumulator.lsw & 0x80000000 ) | ||
276 | XSIG_LL(accumulator) ++; | ||
277 | if ( accumulator.msw == 0 ) | ||
278 | { | ||
279 | /* The result is 1.0 */ | ||
280 | FPU_copy_to_reg0(&CONST_1, TAG_Valid); | ||
281 | return; | ||
282 | } | ||
283 | else | ||
284 | { | ||
285 | significand(&result) = XSIG_LL(accumulator); | ||
286 | |||
287 | /* will be a valid positive nr with expon = -1 */ | ||
288 | setexponentpos(&result, -1); | ||
289 | } | ||
290 | } | ||
291 | else | ||
292 | { | ||
293 | fixed_arg = significand(st0_ptr); | ||
294 | |||
295 | if ( exponent == 0 ) | ||
296 | { | ||
297 | /* The argument is >= 1.0 */ | ||
298 | |||
299 | /* Put the binary point at the left. */ | ||
300 | fixed_arg <<= 1; | ||
301 | } | ||
302 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | ||
303 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; | ||
304 | /* There is a special case which arises due to rounding, to fix here. */ | ||
305 | if ( fixed_arg == 0xffffffffffffffffLL ) | ||
306 | fixed_arg = 0; | ||
307 | |||
308 | exponent = -1; | ||
309 | exp2 = -1; | ||
310 | |||
311 | /* A shift is needed here only for a narrow range of arguments, | ||
312 | i.e. for fixed_arg approx 2^-32, but we pick up more... */ | ||
313 | if ( !(LL_MSW(fixed_arg) & 0xffff0000) ) | ||
314 | { | ||
315 | fixed_arg <<= 16; | ||
316 | exponent -= 16; | ||
317 | exp2 -= 16; | ||
318 | } | ||
319 | |||
320 | XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; | ||
321 | mul64_Xsig(&argSqrd, &fixed_arg); | ||
322 | |||
323 | if ( exponent < -1 ) | ||
324 | { | ||
325 | /* shift the argument right by the required places */ | ||
326 | shr_Xsig(&argSqrd, 2*(-1-exponent)); | ||
327 | } | ||
328 | |||
329 | argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; | ||
330 | argTo4.lsw = argSqrd.lsw; | ||
331 | mul_Xsig_Xsig(&argTo4, &argTo4); | ||
332 | |||
333 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, | ||
334 | N_COEFF_N-1); | ||
335 | mul_Xsig_Xsig(&accumulator, &argSqrd); | ||
336 | negate_Xsig(&accumulator); | ||
337 | |||
338 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, | ||
339 | N_COEFF_P-1); | ||
340 | |||
341 | shr_Xsig(&accumulator, 2); /* Divide by four */ | ||
342 | accumulator.msw |= 0x80000000; /* Add 1.0 */ | ||
343 | |||
344 | mul64_Xsig(&accumulator, &fixed_arg); | ||
345 | mul64_Xsig(&accumulator, &fixed_arg); | ||
346 | mul64_Xsig(&accumulator, &fixed_arg); | ||
347 | |||
348 | /* Divide by four, FPU_REG compatible, etc */ | ||
349 | exponent = 3*exponent; | ||
350 | |||
351 | /* The minimum exponent difference is 3 */ | ||
352 | shr_Xsig(&accumulator, exp2 - exponent); | ||
353 | |||
354 | negate_Xsig(&accumulator); | ||
355 | XSIG_LL(accumulator) += fixed_arg; | ||
356 | |||
357 | /* The basic computation is complete. Now fix the answer to | ||
358 | compensate for the error due to the approximation used for | ||
359 | pi/2 | ||
360 | */ | ||
361 | |||
362 | /* This has an exponent of -65 */ | ||
363 | XSIG_LL(fix_up) = 0x898cc51701b839a2ll; | ||
364 | fix_up.lsw = 0; | ||
365 | |||
366 | /* The fix-up needs to be improved for larger args */ | ||
367 | if ( argSqrd.msw & 0xffc00000 ) | ||
368 | { | ||
369 | /* Get about 32 bit precision in these: */ | ||
370 | fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; | ||
371 | fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; | ||
372 | } | ||
373 | |||
374 | exp2 += norm_Xsig(&accumulator); | ||
375 | shr_Xsig(&accumulator, 1); /* Prevent overflow */ | ||
376 | exp2++; | ||
377 | shr_Xsig(&fix_up, 65 + exp2); | ||
378 | |||
379 | add_Xsig_Xsig(&accumulator, &fix_up); | ||
380 | |||
381 | echange = round_Xsig(&accumulator); | ||
382 | |||
383 | setexponentpos(&result, exp2 + echange); | ||
384 | significand(&result) = XSIG_LL(accumulator); | ||
385 | } | ||
386 | |||
387 | FPU_copy_to_reg0(&result, TAG_Valid); | ||
388 | |||
389 | #ifdef PARANOID | ||
390 | if ( (exponent(&result) >= 0) | ||
391 | && (significand(&result) > 0x8000000000000000LL) ) | ||
392 | { | ||
393 | EXCEPTION(EX_INTERNAL|0x151); | ||
394 | } | ||
395 | #endif /* PARANOID */ | ||
396 | |||
397 | } | ||