diff options
Diffstat (limited to 'arch/m68k/math-emu')
-rw-r--r-- | arch/m68k/math-emu/Makefile | 11 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_arith.c | 701 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_arith.h | 52 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_cond.S | 334 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_decode.h | 417 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_emu.h | 146 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_entry.S | 325 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_log.c | 223 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_move.S | 244 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_movem.S | 368 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_scan.S | 478 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_trig.c | 183 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_trig.h | 32 | ||||
-rw-r--r-- | arch/m68k/math-emu/fp_util.S | 1455 | ||||
-rw-r--r-- | arch/m68k/math-emu/multi_arith.h | 819 |
15 files changed, 5788 insertions, 0 deletions
diff --git a/arch/m68k/math-emu/Makefile b/arch/m68k/math-emu/Makefile new file mode 100644 index 000000000000..539940401814 --- /dev/null +++ b/arch/m68k/math-emu/Makefile | |||
@@ -0,0 +1,11 @@ | |||
1 | # | ||
2 | # Makefile for the linux kernel. | ||
3 | # | ||
4 | |||
5 | EXTRA_AFLAGS := -traditional | ||
6 | |||
7 | #EXTRA_AFLAGS += -DFPU_EMU_DEBUG | ||
8 | #EXTRA_CFLAGS += -DFPU_EMU_DEBUG | ||
9 | |||
10 | obj-y := fp_entry.o fp_scan.o fp_util.o fp_move.o fp_movem.o \ | ||
11 | fp_cond.o fp_arith.o fp_log.o fp_trig.o | ||
diff --git a/arch/m68k/math-emu/fp_arith.c b/arch/m68k/math-emu/fp_arith.c new file mode 100644 index 000000000000..08f286db3c5a --- /dev/null +++ b/arch/m68k/math-emu/fp_arith.c | |||
@@ -0,0 +1,701 @@ | |||
1 | /* | ||
2 | |||
3 | fp_arith.c: floating-point math routines for the Linux-m68k | ||
4 | floating point emulator. | ||
5 | |||
6 | Copyright (c) 1998-1999 David Huggins-Daines. | ||
7 | |||
8 | Somewhat based on the AlphaLinux floating point emulator, by David | ||
9 | Mosberger-Tang. | ||
10 | |||
11 | You may copy, modify, and redistribute this file under the terms of | ||
12 | the GNU General Public License, version 2, or any later version, at | ||
13 | your convenience. | ||
14 | */ | ||
15 | |||
16 | #include "fp_emu.h" | ||
17 | #include "multi_arith.h" | ||
18 | #include "fp_arith.h" | ||
19 | |||
20 | const struct fp_ext fp_QNaN = | ||
21 | { | ||
22 | .exp = 0x7fff, | ||
23 | .mant = { .m64 = ~0 } | ||
24 | }; | ||
25 | |||
26 | const struct fp_ext fp_Inf = | ||
27 | { | ||
28 | .exp = 0x7fff, | ||
29 | }; | ||
30 | |||
31 | /* let's start with the easy ones */ | ||
32 | |||
33 | struct fp_ext * | ||
34 | fp_fabs(struct fp_ext *dest, struct fp_ext *src) | ||
35 | { | ||
36 | dprint(PINSTR, "fabs\n"); | ||
37 | |||
38 | fp_monadic_check(dest, src); | ||
39 | |||
40 | dest->sign = 0; | ||
41 | |||
42 | return dest; | ||
43 | } | ||
44 | |||
45 | struct fp_ext * | ||
46 | fp_fneg(struct fp_ext *dest, struct fp_ext *src) | ||
47 | { | ||
48 | dprint(PINSTR, "fneg\n"); | ||
49 | |||
50 | fp_monadic_check(dest, src); | ||
51 | |||
52 | dest->sign = !dest->sign; | ||
53 | |||
54 | return dest; | ||
55 | } | ||
56 | |||
57 | /* Now, the slightly harder ones */ | ||
58 | |||
59 | /* fp_fadd: Implements the kernel of the FADD, FSADD, FDADD, FSUB, | ||
60 | FDSUB, and FCMP instructions. */ | ||
61 | |||
62 | struct fp_ext * | ||
63 | fp_fadd(struct fp_ext *dest, struct fp_ext *src) | ||
64 | { | ||
65 | int diff; | ||
66 | |||
67 | dprint(PINSTR, "fadd\n"); | ||
68 | |||
69 | fp_dyadic_check(dest, src); | ||
70 | |||
71 | if (IS_INF(dest)) { | ||
72 | /* infinity - infinity == NaN */ | ||
73 | if (IS_INF(src) && (src->sign != dest->sign)) | ||
74 | fp_set_nan(dest); | ||
75 | return dest; | ||
76 | } | ||
77 | if (IS_INF(src)) { | ||
78 | fp_copy_ext(dest, src); | ||
79 | return dest; | ||
80 | } | ||
81 | |||
82 | if (IS_ZERO(dest)) { | ||
83 | if (IS_ZERO(src)) { | ||
84 | if (src->sign != dest->sign) { | ||
85 | if (FPDATA->rnd == FPCR_ROUND_RM) | ||
86 | dest->sign = 1; | ||
87 | else | ||
88 | dest->sign = 0; | ||
89 | } | ||
90 | } else | ||
91 | fp_copy_ext(dest, src); | ||
92 | return dest; | ||
93 | } | ||
94 | |||
95 | dest->lowmant = src->lowmant = 0; | ||
96 | |||
97 | if ((diff = dest->exp - src->exp) > 0) | ||
98 | fp_denormalize(src, diff); | ||
99 | else if ((diff = -diff) > 0) | ||
100 | fp_denormalize(dest, diff); | ||
101 | |||
102 | if (dest->sign == src->sign) { | ||
103 | if (fp_addmant(dest, src)) | ||
104 | if (!fp_addcarry(dest)) | ||
105 | return dest; | ||
106 | } else { | ||
107 | if (dest->mant.m64 < src->mant.m64) { | ||
108 | fp_submant(dest, src, dest); | ||
109 | dest->sign = !dest->sign; | ||
110 | } else | ||
111 | fp_submant(dest, dest, src); | ||
112 | } | ||
113 | |||
114 | return dest; | ||
115 | } | ||
116 | |||
117 | /* fp_fsub: Implements the kernel of the FSUB, FSSUB, and FDSUB | ||
118 | instructions. | ||
119 | |||
120 | Remember that the arguments are in assembler-syntax order! */ | ||
121 | |||
122 | struct fp_ext * | ||
123 | fp_fsub(struct fp_ext *dest, struct fp_ext *src) | ||
124 | { | ||
125 | dprint(PINSTR, "fsub "); | ||
126 | |||
127 | src->sign = !src->sign; | ||
128 | return fp_fadd(dest, src); | ||
129 | } | ||
130 | |||
131 | |||
132 | struct fp_ext * | ||
133 | fp_fcmp(struct fp_ext *dest, struct fp_ext *src) | ||
134 | { | ||
135 | dprint(PINSTR, "fcmp "); | ||
136 | |||
137 | FPDATA->temp[1] = *dest; | ||
138 | src->sign = !src->sign; | ||
139 | return fp_fadd(&FPDATA->temp[1], src); | ||
140 | } | ||
141 | |||
142 | struct fp_ext * | ||
143 | fp_ftst(struct fp_ext *dest, struct fp_ext *src) | ||
144 | { | ||
145 | dprint(PINSTR, "ftst\n"); | ||
146 | |||
147 | (void)dest; | ||
148 | |||
149 | return src; | ||
150 | } | ||
151 | |||
152 | struct fp_ext * | ||
153 | fp_fmul(struct fp_ext *dest, struct fp_ext *src) | ||
154 | { | ||
155 | union fp_mant128 temp; | ||
156 | int exp; | ||
157 | |||
158 | dprint(PINSTR, "fmul\n"); | ||
159 | |||
160 | fp_dyadic_check(dest, src); | ||
161 | |||
162 | /* calculate the correct sign now, as it's necessary for infinities */ | ||
163 | dest->sign = src->sign ^ dest->sign; | ||
164 | |||
165 | /* Handle infinities */ | ||
166 | if (IS_INF(dest)) { | ||
167 | if (IS_ZERO(src)) | ||
168 | fp_set_nan(dest); | ||
169 | return dest; | ||
170 | } | ||
171 | if (IS_INF(src)) { | ||
172 | if (IS_ZERO(dest)) | ||
173 | fp_set_nan(dest); | ||
174 | else | ||
175 | fp_copy_ext(dest, src); | ||
176 | return dest; | ||
177 | } | ||
178 | |||
179 | /* Of course, as we all know, zero * anything = zero. You may | ||
180 | not have known that it might be a positive or negative | ||
181 | zero... */ | ||
182 | if (IS_ZERO(dest) || IS_ZERO(src)) { | ||
183 | dest->exp = 0; | ||
184 | dest->mant.m64 = 0; | ||
185 | dest->lowmant = 0; | ||
186 | |||
187 | return dest; | ||
188 | } | ||
189 | |||
190 | exp = dest->exp + src->exp - 0x3ffe; | ||
191 | |||
192 | /* shift up the mantissa for denormalized numbers, | ||
193 | so that the highest bit is set, this makes the | ||
194 | shift of the result below easier */ | ||
195 | if ((long)dest->mant.m32[0] >= 0) | ||
196 | exp -= fp_overnormalize(dest); | ||
197 | if ((long)src->mant.m32[0] >= 0) | ||
198 | exp -= fp_overnormalize(src); | ||
199 | |||
200 | /* now, do a 64-bit multiply with expansion */ | ||
201 | fp_multiplymant(&temp, dest, src); | ||
202 | |||
203 | /* normalize it back to 64 bits and stuff it back into the | ||
204 | destination struct */ | ||
205 | if ((long)temp.m32[0] > 0) { | ||
206 | exp--; | ||
207 | fp_putmant128(dest, &temp, 1); | ||
208 | } else | ||
209 | fp_putmant128(dest, &temp, 0); | ||
210 | |||
211 | if (exp >= 0x7fff) { | ||
212 | fp_set_ovrflw(dest); | ||
213 | return dest; | ||
214 | } | ||
215 | dest->exp = exp; | ||
216 | if (exp < 0) { | ||
217 | fp_set_sr(FPSR_EXC_UNFL); | ||
218 | fp_denormalize(dest, -exp); | ||
219 | } | ||
220 | |||
221 | return dest; | ||
222 | } | ||
223 | |||
224 | /* fp_fdiv: Implements the "kernel" of the FDIV, FSDIV, FDDIV and | ||
225 | FSGLDIV instructions. | ||
226 | |||
227 | Note that the order of the operands is counter-intuitive: instead | ||
228 | of src / dest, the result is actually dest / src. */ | ||
229 | |||
230 | struct fp_ext * | ||
231 | fp_fdiv(struct fp_ext *dest, struct fp_ext *src) | ||
232 | { | ||
233 | union fp_mant128 temp; | ||
234 | int exp; | ||
235 | |||
236 | dprint(PINSTR, "fdiv\n"); | ||
237 | |||
238 | fp_dyadic_check(dest, src); | ||
239 | |||
240 | /* calculate the correct sign now, as it's necessary for infinities */ | ||
241 | dest->sign = src->sign ^ dest->sign; | ||
242 | |||
243 | /* Handle infinities */ | ||
244 | if (IS_INF(dest)) { | ||
245 | /* infinity / infinity = NaN (quiet, as always) */ | ||
246 | if (IS_INF(src)) | ||
247 | fp_set_nan(dest); | ||
248 | /* infinity / anything else = infinity (with approprate sign) */ | ||
249 | return dest; | ||
250 | } | ||
251 | if (IS_INF(src)) { | ||
252 | /* anything / infinity = zero (with appropriate sign) */ | ||
253 | dest->exp = 0; | ||
254 | dest->mant.m64 = 0; | ||
255 | dest->lowmant = 0; | ||
256 | |||
257 | return dest; | ||
258 | } | ||
259 | |||
260 | /* zeroes */ | ||
261 | if (IS_ZERO(dest)) { | ||
262 | /* zero / zero = NaN */ | ||
263 | if (IS_ZERO(src)) | ||
264 | fp_set_nan(dest); | ||
265 | /* zero / anything else = zero */ | ||
266 | return dest; | ||
267 | } | ||
268 | if (IS_ZERO(src)) { | ||
269 | /* anything / zero = infinity (with appropriate sign) */ | ||
270 | fp_set_sr(FPSR_EXC_DZ); | ||
271 | dest->exp = 0x7fff; | ||
272 | dest->mant.m64 = 0; | ||
273 | |||
274 | return dest; | ||
275 | } | ||
276 | |||
277 | exp = dest->exp - src->exp + 0x3fff; | ||
278 | |||
279 | /* shift up the mantissa for denormalized numbers, | ||
280 | so that the highest bit is set, this makes lots | ||
281 | of things below easier */ | ||
282 | if ((long)dest->mant.m32[0] >= 0) | ||
283 | exp -= fp_overnormalize(dest); | ||
284 | if ((long)src->mant.m32[0] >= 0) | ||
285 | exp -= fp_overnormalize(src); | ||
286 | |||
287 | /* now, do the 64-bit divide */ | ||
288 | fp_dividemant(&temp, dest, src); | ||
289 | |||
290 | /* normalize it back to 64 bits and stuff it back into the | ||
291 | destination struct */ | ||
292 | if (!temp.m32[0]) { | ||
293 | exp--; | ||
294 | fp_putmant128(dest, &temp, 32); | ||
295 | } else | ||
296 | fp_putmant128(dest, &temp, 31); | ||
297 | |||
298 | if (exp >= 0x7fff) { | ||
299 | fp_set_ovrflw(dest); | ||
300 | return dest; | ||
301 | } | ||
302 | dest->exp = exp; | ||
303 | if (exp < 0) { | ||
304 | fp_set_sr(FPSR_EXC_UNFL); | ||
305 | fp_denormalize(dest, -exp); | ||
306 | } | ||
307 | |||
308 | return dest; | ||
309 | } | ||
310 | |||
311 | struct fp_ext * | ||
312 | fp_fsglmul(struct fp_ext *dest, struct fp_ext *src) | ||
313 | { | ||
314 | int exp; | ||
315 | |||
316 | dprint(PINSTR, "fsglmul\n"); | ||
317 | |||
318 | fp_dyadic_check(dest, src); | ||
319 | |||
320 | /* calculate the correct sign now, as it's necessary for infinities */ | ||
321 | dest->sign = src->sign ^ dest->sign; | ||
322 | |||
323 | /* Handle infinities */ | ||
324 | if (IS_INF(dest)) { | ||
325 | if (IS_ZERO(src)) | ||
326 | fp_set_nan(dest); | ||
327 | return dest; | ||
328 | } | ||
329 | if (IS_INF(src)) { | ||
330 | if (IS_ZERO(dest)) | ||
331 | fp_set_nan(dest); | ||
332 | else | ||
333 | fp_copy_ext(dest, src); | ||
334 | return dest; | ||
335 | } | ||
336 | |||
337 | /* Of course, as we all know, zero * anything = zero. You may | ||
338 | not have known that it might be a positive or negative | ||
339 | zero... */ | ||
340 | if (IS_ZERO(dest) || IS_ZERO(src)) { | ||
341 | dest->exp = 0; | ||
342 | dest->mant.m64 = 0; | ||
343 | dest->lowmant = 0; | ||
344 | |||
345 | return dest; | ||
346 | } | ||
347 | |||
348 | exp = dest->exp + src->exp - 0x3ffe; | ||
349 | |||
350 | /* do a 32-bit multiply */ | ||
351 | fp_mul64(dest->mant.m32[0], dest->mant.m32[1], | ||
352 | dest->mant.m32[0] & 0xffffff00, | ||
353 | src->mant.m32[0] & 0xffffff00); | ||
354 | |||
355 | if (exp >= 0x7fff) { | ||
356 | fp_set_ovrflw(dest); | ||
357 | return dest; | ||
358 | } | ||
359 | dest->exp = exp; | ||
360 | if (exp < 0) { | ||
361 | fp_set_sr(FPSR_EXC_UNFL); | ||
362 | fp_denormalize(dest, -exp); | ||
363 | } | ||
364 | |||
365 | return dest; | ||
366 | } | ||
367 | |||
368 | struct fp_ext * | ||
369 | fp_fsgldiv(struct fp_ext *dest, struct fp_ext *src) | ||
370 | { | ||
371 | int exp; | ||
372 | unsigned long quot, rem; | ||
373 | |||
374 | dprint(PINSTR, "fsgldiv\n"); | ||
375 | |||
376 | fp_dyadic_check(dest, src); | ||
377 | |||
378 | /* calculate the correct sign now, as it's necessary for infinities */ | ||
379 | dest->sign = src->sign ^ dest->sign; | ||
380 | |||
381 | /* Handle infinities */ | ||
382 | if (IS_INF(dest)) { | ||
383 | /* infinity / infinity = NaN (quiet, as always) */ | ||
384 | if (IS_INF(src)) | ||
385 | fp_set_nan(dest); | ||
386 | /* infinity / anything else = infinity (with approprate sign) */ | ||
387 | return dest; | ||
388 | } | ||
389 | if (IS_INF(src)) { | ||
390 | /* anything / infinity = zero (with appropriate sign) */ | ||
391 | dest->exp = 0; | ||
392 | dest->mant.m64 = 0; | ||
393 | dest->lowmant = 0; | ||
394 | |||
395 | return dest; | ||
396 | } | ||
397 | |||
398 | /* zeroes */ | ||
399 | if (IS_ZERO(dest)) { | ||
400 | /* zero / zero = NaN */ | ||
401 | if (IS_ZERO(src)) | ||
402 | fp_set_nan(dest); | ||
403 | /* zero / anything else = zero */ | ||
404 | return dest; | ||
405 | } | ||
406 | if (IS_ZERO(src)) { | ||
407 | /* anything / zero = infinity (with appropriate sign) */ | ||
408 | fp_set_sr(FPSR_EXC_DZ); | ||
409 | dest->exp = 0x7fff; | ||
410 | dest->mant.m64 = 0; | ||
411 | |||
412 | return dest; | ||
413 | } | ||
414 | |||
415 | exp = dest->exp - src->exp + 0x3fff; | ||
416 | |||
417 | dest->mant.m32[0] &= 0xffffff00; | ||
418 | src->mant.m32[0] &= 0xffffff00; | ||
419 | |||
420 | /* do the 32-bit divide */ | ||
421 | if (dest->mant.m32[0] >= src->mant.m32[0]) { | ||
422 | fp_sub64(dest->mant, src->mant); | ||
423 | fp_div64(quot, rem, dest->mant.m32[0], 0, src->mant.m32[0]); | ||
424 | dest->mant.m32[0] = 0x80000000 | (quot >> 1); | ||
425 | dest->mant.m32[1] = (quot & 1) | rem; /* only for rounding */ | ||
426 | } else { | ||
427 | fp_div64(quot, rem, dest->mant.m32[0], 0, src->mant.m32[0]); | ||
428 | dest->mant.m32[0] = quot; | ||
429 | dest->mant.m32[1] = rem; /* only for rounding */ | ||
430 | exp--; | ||
431 | } | ||
432 | |||
433 | if (exp >= 0x7fff) { | ||
434 | fp_set_ovrflw(dest); | ||
435 | return dest; | ||
436 | } | ||
437 | dest->exp = exp; | ||
438 | if (exp < 0) { | ||
439 | fp_set_sr(FPSR_EXC_UNFL); | ||
440 | fp_denormalize(dest, -exp); | ||
441 | } | ||
442 | |||
443 | return dest; | ||
444 | } | ||
445 | |||
446 | /* fp_roundint: Internal rounding function for use by several of these | ||
447 | emulated instructions. | ||
448 | |||
449 | This one rounds off the fractional part using the rounding mode | ||
450 | specified. */ | ||
451 | |||
452 | static void fp_roundint(struct fp_ext *dest, int mode) | ||
453 | { | ||
454 | union fp_mant64 oldmant; | ||
455 | unsigned long mask; | ||
456 | |||
457 | if (!fp_normalize_ext(dest)) | ||
458 | return; | ||
459 | |||
460 | /* infinities and zeroes */ | ||
461 | if (IS_INF(dest) || IS_ZERO(dest)) | ||
462 | return; | ||
463 | |||
464 | /* first truncate the lower bits */ | ||
465 | oldmant = dest->mant; | ||
466 | switch (dest->exp) { | ||
467 | case 0 ... 0x3ffe: | ||
468 | dest->mant.m64 = 0; | ||
469 | break; | ||
470 | case 0x3fff ... 0x401e: | ||
471 | dest->mant.m32[0] &= 0xffffffffU << (0x401e - dest->exp); | ||
472 | dest->mant.m32[1] = 0; | ||
473 | if (oldmant.m64 == dest->mant.m64) | ||
474 | return; | ||
475 | break; | ||
476 | case 0x401f ... 0x403e: | ||
477 | dest->mant.m32[1] &= 0xffffffffU << (0x403e - dest->exp); | ||
478 | if (oldmant.m32[1] == dest->mant.m32[1]) | ||
479 | return; | ||
480 | break; | ||
481 | default: | ||
482 | return; | ||
483 | } | ||
484 | fp_set_sr(FPSR_EXC_INEX2); | ||
485 | |||
486 | /* We might want to normalize upwards here... however, since | ||
487 | we know that this is only called on the output of fp_fdiv, | ||
488 | or with the input to fp_fint or fp_fintrz, and the inputs | ||
489 | to all these functions are either normal or denormalized | ||
490 | (no subnormals allowed!), there's really no need. | ||
491 | |||
492 | In the case of fp_fdiv, observe that 0x80000000 / 0xffff = | ||
493 | 0xffff8000, and the same holds for 128-bit / 64-bit. (i.e. the | ||
494 | smallest possible normal dividend and the largest possible normal | ||
495 | divisor will still produce a normal quotient, therefore, (normal | ||
496 | << 64) / normal is normal in all cases) */ | ||
497 | |||
498 | switch (mode) { | ||
499 | case FPCR_ROUND_RN: | ||
500 | switch (dest->exp) { | ||
501 | case 0 ... 0x3ffd: | ||
502 | return; | ||
503 | case 0x3ffe: | ||
504 | /* As noted above, the input is always normal, so the | ||
505 | guard bit (bit 63) is always set. therefore, the | ||
506 | only case in which we will NOT round to 1.0 is when | ||
507 | the input is exactly 0.5. */ | ||
508 | if (oldmant.m64 == (1ULL << 63)) | ||
509 | return; | ||
510 | break; | ||
511 | case 0x3fff ... 0x401d: | ||
512 | mask = 1 << (0x401d - dest->exp); | ||
513 | if (!(oldmant.m32[0] & mask)) | ||
514 | return; | ||
515 | if (oldmant.m32[0] & (mask << 1)) | ||
516 | break; | ||
517 | if (!(oldmant.m32[0] << (dest->exp - 0x3ffd)) && | ||
518 | !oldmant.m32[1]) | ||
519 | return; | ||
520 | break; | ||
521 | case 0x401e: | ||
522 | if (!(oldmant.m32[1] >= 0)) | ||
523 | return; | ||
524 | if (oldmant.m32[0] & 1) | ||
525 | break; | ||
526 | if (!(oldmant.m32[1] << 1)) | ||
527 | return; | ||
528 | break; | ||
529 | case 0x401f ... 0x403d: | ||
530 | mask = 1 << (0x403d - dest->exp); | ||
531 | if (!(oldmant.m32[1] & mask)) | ||
532 | return; | ||
533 | if (oldmant.m32[1] & (mask << 1)) | ||
534 | break; | ||
535 | if (!(oldmant.m32[1] << (dest->exp - 0x401d))) | ||
536 | return; | ||
537 | break; | ||
538 | default: | ||
539 | return; | ||
540 | } | ||
541 | break; | ||
542 | case FPCR_ROUND_RZ: | ||
543 | return; | ||
544 | default: | ||
545 | if (dest->sign ^ (mode - FPCR_ROUND_RM)) | ||
546 | break; | ||
547 | return; | ||
548 | } | ||
549 | |||
550 | switch (dest->exp) { | ||
551 | case 0 ... 0x3ffe: | ||
552 | dest->exp = 0x3fff; | ||
553 | dest->mant.m64 = 1ULL << 63; | ||
554 | break; | ||
555 | case 0x3fff ... 0x401e: | ||
556 | mask = 1 << (0x401e - dest->exp); | ||
557 | if (dest->mant.m32[0] += mask) | ||
558 | break; | ||
559 | dest->mant.m32[0] = 0x80000000; | ||
560 | dest->exp++; | ||
561 | break; | ||
562 | case 0x401f ... 0x403e: | ||
563 | mask = 1 << (0x403e - dest->exp); | ||
564 | if (dest->mant.m32[1] += mask) | ||
565 | break; | ||
566 | if (dest->mant.m32[0] += 1) | ||
567 | break; | ||
568 | dest->mant.m32[0] = 0x80000000; | ||
569 | dest->exp++; | ||
570 | break; | ||
571 | } | ||
572 | } | ||
573 | |||
574 | /* modrem_kernel: Implementation of the FREM and FMOD instructions | ||
575 | (which are exactly the same, except for the rounding used on the | ||
576 | intermediate value) */ | ||
577 | |||
578 | static struct fp_ext * | ||
579 | modrem_kernel(struct fp_ext *dest, struct fp_ext *src, int mode) | ||
580 | { | ||
581 | struct fp_ext tmp; | ||
582 | |||
583 | fp_dyadic_check(dest, src); | ||
584 | |||
585 | /* Infinities and zeros */ | ||
586 | if (IS_INF(dest) || IS_ZERO(src)) { | ||
587 | fp_set_nan(dest); | ||
588 | return dest; | ||
589 | } | ||
590 | if (IS_ZERO(dest) || IS_INF(src)) | ||
591 | return dest; | ||
592 | |||
593 | /* FIXME: there is almost certainly a smarter way to do this */ | ||
594 | fp_copy_ext(&tmp, dest); | ||
595 | fp_fdiv(&tmp, src); /* NOTE: src might be modified */ | ||
596 | fp_roundint(&tmp, mode); | ||
597 | fp_fmul(&tmp, src); | ||
598 | fp_fsub(dest, &tmp); | ||
599 | |||
600 | /* set the quotient byte */ | ||
601 | fp_set_quotient((dest->mant.m64 & 0x7f) | (dest->sign << 7)); | ||
602 | return dest; | ||
603 | } | ||
604 | |||
605 | /* fp_fmod: Implements the kernel of the FMOD instruction. | ||
606 | |||
607 | Again, the argument order is backwards. The result, as defined in | ||
608 | the Motorola manuals, is: | ||
609 | |||
610 | fmod(src,dest) = (dest - (src * floor(dest / src))) */ | ||
611 | |||
612 | struct fp_ext * | ||
613 | fp_fmod(struct fp_ext *dest, struct fp_ext *src) | ||
614 | { | ||
615 | dprint(PINSTR, "fmod\n"); | ||
616 | return modrem_kernel(dest, src, FPCR_ROUND_RZ); | ||
617 | } | ||
618 | |||
619 | /* fp_frem: Implements the kernel of the FREM instruction. | ||
620 | |||
621 | frem(src,dest) = (dest - (src * round(dest / src))) | ||
622 | */ | ||
623 | |||
624 | struct fp_ext * | ||
625 | fp_frem(struct fp_ext *dest, struct fp_ext *src) | ||
626 | { | ||
627 | dprint(PINSTR, "frem\n"); | ||
628 | return modrem_kernel(dest, src, FPCR_ROUND_RN); | ||
629 | } | ||
630 | |||
631 | struct fp_ext * | ||
632 | fp_fint(struct fp_ext *dest, struct fp_ext *src) | ||
633 | { | ||
634 | dprint(PINSTR, "fint\n"); | ||
635 | |||
636 | fp_copy_ext(dest, src); | ||
637 | |||
638 | fp_roundint(dest, FPDATA->rnd); | ||
639 | |||
640 | return dest; | ||
641 | } | ||
642 | |||
643 | struct fp_ext * | ||
644 | fp_fintrz(struct fp_ext *dest, struct fp_ext *src) | ||
645 | { | ||
646 | dprint(PINSTR, "fintrz\n"); | ||
647 | |||
648 | fp_copy_ext(dest, src); | ||
649 | |||
650 | fp_roundint(dest, FPCR_ROUND_RZ); | ||
651 | |||
652 | return dest; | ||
653 | } | ||
654 | |||
655 | struct fp_ext * | ||
656 | fp_fscale(struct fp_ext *dest, struct fp_ext *src) | ||
657 | { | ||
658 | int scale, oldround; | ||
659 | |||
660 | dprint(PINSTR, "fscale\n"); | ||
661 | |||
662 | fp_dyadic_check(dest, src); | ||
663 | |||
664 | /* Infinities */ | ||
665 | if (IS_INF(src)) { | ||
666 | fp_set_nan(dest); | ||
667 | return dest; | ||
668 | } | ||
669 | if (IS_INF(dest)) | ||
670 | return dest; | ||
671 | |||
672 | /* zeroes */ | ||
673 | if (IS_ZERO(src) || IS_ZERO(dest)) | ||
674 | return dest; | ||
675 | |||
676 | /* Source exponent out of range */ | ||
677 | if (src->exp >= 0x400c) { | ||
678 | fp_set_ovrflw(dest); | ||
679 | return dest; | ||
680 | } | ||
681 | |||
682 | /* src must be rounded with round to zero. */ | ||
683 | oldround = FPDATA->rnd; | ||
684 | FPDATA->rnd = FPCR_ROUND_RZ; | ||
685 | scale = fp_conv_ext2long(src); | ||
686 | FPDATA->rnd = oldround; | ||
687 | |||
688 | /* new exponent */ | ||
689 | scale += dest->exp; | ||
690 | |||
691 | if (scale >= 0x7fff) { | ||
692 | fp_set_ovrflw(dest); | ||
693 | } else if (scale <= 0) { | ||
694 | fp_set_sr(FPSR_EXC_UNFL); | ||
695 | fp_denormalize(dest, -scale); | ||
696 | } else | ||
697 | dest->exp = scale; | ||
698 | |||
699 | return dest; | ||
700 | } | ||
701 | |||
diff --git a/arch/m68k/math-emu/fp_arith.h b/arch/m68k/math-emu/fp_arith.h new file mode 100644 index 000000000000..2cc3f846c393 --- /dev/null +++ b/arch/m68k/math-emu/fp_arith.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | |||
3 | fp_arith.h: floating-point math routines for the Linux-m68k | ||
4 | floating point emulator. | ||
5 | |||
6 | Copyright (c) 1998 David Huggins-Daines. | ||
7 | |||
8 | Somewhat based on the AlphaLinux floating point emulator, by David | ||
9 | Mosberger-Tang. | ||
10 | |||
11 | You may copy, modify, and redistribute this file under the terms of | ||
12 | the GNU General Public License, version 2, or any later version, at | ||
13 | your convenience. | ||
14 | |||
15 | */ | ||
16 | |||
17 | #ifndef FP_ARITH_H | ||
18 | #define FP_ARITH_H | ||
19 | |||
20 | /* easy ones */ | ||
21 | struct fp_ext * | ||
22 | fp_fabs(struct fp_ext *dest, struct fp_ext *src); | ||
23 | struct fp_ext * | ||
24 | fp_fneg(struct fp_ext *dest, struct fp_ext *src); | ||
25 | |||
26 | /* straightforward arithmetic */ | ||
27 | struct fp_ext * | ||
28 | fp_fadd(struct fp_ext *dest, struct fp_ext *src); | ||
29 | struct fp_ext * | ||
30 | fp_fsub(struct fp_ext *dest, struct fp_ext *src); | ||
31 | struct fp_ext * | ||
32 | fp_fcmp(struct fp_ext *dest, struct fp_ext *src); | ||
33 | struct fp_ext * | ||
34 | fp_ftst(struct fp_ext *dest, struct fp_ext *src); | ||
35 | struct fp_ext * | ||
36 | fp_fmul(struct fp_ext *dest, struct fp_ext *src); | ||
37 | struct fp_ext * | ||
38 | fp_fdiv(struct fp_ext *dest, struct fp_ext *src); | ||
39 | |||
40 | /* ones that do rounding and integer conversions */ | ||
41 | struct fp_ext * | ||
42 | fp_fmod(struct fp_ext *dest, struct fp_ext *src); | ||
43 | struct fp_ext * | ||
44 | fp_frem(struct fp_ext *dest, struct fp_ext *src); | ||
45 | struct fp_ext * | ||
46 | fp_fint(struct fp_ext *dest, struct fp_ext *src); | ||
47 | struct fp_ext * | ||
48 | fp_fintrz(struct fp_ext *dest, struct fp_ext *src); | ||
49 | struct fp_ext * | ||
50 | fp_fscale(struct fp_ext *dest, struct fp_ext *src); | ||
51 | |||
52 | #endif /* FP_ARITH__H */ | ||
diff --git a/arch/m68k/math-emu/fp_cond.S b/arch/m68k/math-emu/fp_cond.S new file mode 100644 index 000000000000..ddae8b1b8b83 --- /dev/null +++ b/arch/m68k/math-emu/fp_cond.S | |||
@@ -0,0 +1,334 @@ | |||
1 | /* | ||
2 | * fp_cond.S | ||
3 | * | ||
4 | * Copyright Roman Zippel, 1997. All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, and the entire permission notice in its entirety, | ||
11 | * including the disclaimer of warranties. | ||
12 | * 2. Redistributions in binary form must reproduce the above copyright | ||
13 | * notice, this list of conditions and the following disclaimer in the | ||
14 | * documentation and/or other materials provided with the distribution. | ||
15 | * 3. The name of the author may not be used to endorse or promote | ||
16 | * products derived from this software without specific prior | ||
17 | * written permission. | ||
18 | * | ||
19 | * ALTERNATIVELY, this product may be distributed under the terms of | ||
20 | * the GNU General Public License, in which case the provisions of the GPL are | ||
21 | * required INSTEAD OF the above restrictions. (This clause is | ||
22 | * necessary due to a potential bad interaction between the GPL and | ||
23 | * the restrictions contained in a BSD-style copyright.) | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
27 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
28 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | ||
29 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
33 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
35 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
36 | */ | ||
37 | |||
38 | #include "fp_emu.h" | ||
39 | #include "fp_decode.h" | ||
40 | |||
41 | .globl fp_fscc, fp_fbccw, fp_fbccl | ||
42 | |||
43 | #ifdef FPU_EMU_DEBUG | ||
44 | fp_fnop: | ||
45 | printf PDECODE,"fnop\n" | ||
46 | jra fp_end | ||
47 | #else | ||
48 | #define fp_fnop fp_end | ||
49 | #endif | ||
50 | |||
51 | fp_fbccw: | ||
52 | tst.w %d2 | ||
53 | jeq fp_fnop | ||
54 | printf PDECODE,"fbccw " | ||
55 | fp_get_pc %a0 | ||
56 | lea (-2,%a0,%d2.w),%a0 | ||
57 | jra 1f | ||
58 | |||
59 | fp_fbccl: | ||
60 | printf PDECODE,"fbccl " | ||
61 | fp_get_pc %a0 | ||
62 | move.l %d2,%d0 | ||
63 | swap %d0 | ||
64 | fp_get_instr_word %d0,fp_err_ua1 | ||
65 | lea (-2,%a0,%d0.l),%a0 | ||
66 | 1: printf PDECODE,"%x",1,%a0 | ||
67 | move.l %d2,%d0 | ||
68 | swap %d0 | ||
69 | jsr fp_compute_cond | ||
70 | tst.l %d0 | ||
71 | jeq 1f | ||
72 | fp_put_pc %a0,1 | ||
73 | 1: printf PDECODE,"\n" | ||
74 | jra fp_end | ||
75 | |||
76 | fp_fdbcc: | ||
77 | printf PDECODE,"fdbcc " | ||
78 | fp_get_pc %a1 | calculate new pc | ||
79 | fp_get_instr_word %d0,fp_err_ua1 | ||
80 | add.w %d0,%a1 | ||
81 | fp_decode_addr_reg | ||
82 | printf PDECODE,"d%d,%x\n",2,%d0,%a1 | ||
83 | swap %d1 | test condition in %d1 | ||
84 | tst.w %d1 | ||
85 | jne 2f | ||
86 | move.l %d0,%d1 | ||
87 | jsr fp_get_data_reg | ||
88 | subq.w #1,%d0 | ||
89 | jcs 1f | ||
90 | fp_put_pc %a1,1 | ||
91 | 1: jsr fp_put_data_reg | ||
92 | 2: jra fp_end | ||
93 | |||
94 | | set flags for decode macros for fs<cc> | ||
95 | do_fscc=1 | ||
96 | do_no_pc_mode=1 | ||
97 | |||
98 | fp_fscc: | ||
99 | printf PDECODE,"fscc " | ||
100 | move.l %d2,%d0 | ||
101 | jsr fp_compute_cond | ||
102 | move.w %d0,%d1 | ||
103 | swap %d1 | ||
104 | |||
105 | | decode addressing mode | ||
106 | fp_decode_addr_mode | ||
107 | |||
108 | .long fp_data, fp_fdbcc | ||
109 | .long fp_indirect, fp_postinc | ||
110 | .long fp_predecr, fp_disp16 | ||
111 | .long fp_extmode0, fp_extmode1 | ||
112 | |||
113 | | addressing mode: data register direct | ||
114 | fp_data: | ||
115 | fp_mode_data_direct | ||
116 | move.w %d0,%d1 | save register nr | ||
117 | jsr fp_get_data_reg | ||
118 | swap %d1 | ||
119 | move.b %d1,%d0 | ||
120 | swap %d1 | ||
121 | jsr fp_put_data_reg | ||
122 | printf PDECODE,"\n" | ||
123 | jra fp_end | ||
124 | |||
125 | fp_indirect: | ||
126 | fp_mode_addr_indirect | ||
127 | jra fp_do_scc | ||
128 | |||
129 | fp_postinc: | ||
130 | fp_mode_addr_indirect_postinc | ||
131 | jra fp_do_scc | ||
132 | |||
133 | fp_predecr: | ||
134 | fp_mode_addr_indirect_predec | ||
135 | jra fp_do_scc | ||
136 | |||
137 | fp_disp16: | ||
138 | fp_mode_addr_indirect_disp16 | ||
139 | jra fp_do_scc | ||
140 | |||
141 | fp_extmode0: | ||
142 | fp_mode_addr_indirect_extmode0 | ||
143 | jra fp_do_scc | ||
144 | |||
145 | fp_extmode1: | ||
146 | bfextu %d2{#13,#3},%d0 | ||
147 | jmp ([0f:w,%pc,%d0*4]) | ||
148 | |||
149 | .align 4 | ||
150 | 0: | ||
151 | .long fp_absolute_short, fp_absolute_long | ||
152 | .long fp_ill, fp_ill | NOTE: jump here to ftrap.x | ||
153 | .long fp_ill, fp_ill | ||
154 | .long fp_ill, fp_ill | ||
155 | |||
156 | fp_absolute_short: | ||
157 | fp_mode_abs_short | ||
158 | jra fp_do_scc | ||
159 | |||
160 | fp_absolute_long: | ||
161 | fp_mode_abs_long | ||
162 | | jra fp_do_scc | ||
163 | |||
164 | fp_do_scc: | ||
165 | swap %d1 | ||
166 | putuser.b %d1,(%a0),fp_err_ua1,%a0 | ||
167 | printf PDECODE,"\n" | ||
168 | jra fp_end | ||
169 | |||
170 | |||
171 | #define tst_NAN btst #24,%d1 | ||
172 | #define tst_Z btst #26,%d1 | ||
173 | #define tst_N btst #27,%d1 | ||
174 | |||
175 | fp_compute_cond: | ||
176 | move.l (FPD_FPSR,FPDATA),%d1 | ||
177 | btst #4,%d0 | ||
178 | jeq 1f | ||
179 | tst_NAN | ||
180 | jeq 1f | ||
181 | bset #15,%d1 | ||
182 | bset #7,%d1 | ||
183 | move.l %d1,(FPD_FPSR,FPDATA) | ||
184 | 1: and.w #0xf,%d0 | ||
185 | jmp ([0f:w,%pc,%d0.w*4]) | ||
186 | |||
187 | .align 4 | ||
188 | 0: | ||
189 | .long fp_f , fp_eq , fp_ogt, fp_oge | ||
190 | .long fp_olt, fp_ole, fp_ogl, fp_or | ||
191 | .long fp_un , fp_ueq, fp_ugt, fp_uge | ||
192 | .long fp_ult, fp_ule, fp_ne , fp_t | ||
193 | |||
194 | fp_f: | ||
195 | moveq #0,%d0 | ||
196 | rts | ||
197 | |||
198 | fp_eq: | ||
199 | moveq #0,%d0 | ||
200 | tst_Z | ||
201 | jeq 1f | ||
202 | moveq #-1,%d0 | ||
203 | 1: rts | ||
204 | |||
205 | fp_ogt: | ||
206 | moveq #0,%d0 | ||
207 | tst_NAN | ||
208 | jne 1f | ||
209 | tst_Z | ||
210 | jne 1f | ||
211 | tst_N | ||
212 | jne 1f | ||
213 | moveq #-1,%d0 | ||
214 | 1: rts | ||
215 | |||
216 | fp_oge: | ||
217 | moveq #-1,%d0 | ||
218 | tst_Z | ||
219 | jne 2f | ||
220 | tst_NAN | ||
221 | jne 1f | ||
222 | tst_N | ||
223 | jeq 2f | ||
224 | 1: moveq #0,%d0 | ||
225 | 2: rts | ||
226 | |||
227 | fp_olt: | ||
228 | moveq #0,%d0 | ||
229 | tst_NAN | ||
230 | jne 1f | ||
231 | tst_Z | ||
232 | jne 1f | ||
233 | tst_N | ||
234 | jeq 1f | ||
235 | moveq #-1,%d0 | ||
236 | 1: rts | ||
237 | |||
238 | fp_ole: | ||
239 | moveq #-1,%d0 | ||
240 | tst_Z | ||
241 | jne 2f | ||
242 | tst_NAN | ||
243 | jne 1f | ||
244 | tst_N | ||
245 | jne 2f | ||
246 | 1: moveq #0,%d0 | ||
247 | 2: rts | ||
248 | |||
249 | fp_ogl: | ||
250 | moveq #0,%d0 | ||
251 | tst_NAN | ||
252 | jne 1f | ||
253 | tst_Z | ||
254 | jne 1f | ||
255 | moveq #-1,%d0 | ||
256 | 1: rts | ||
257 | |||
258 | fp_or: | ||
259 | moveq #0,%d0 | ||
260 | tst_NAN | ||
261 | jne 1f | ||
262 | moveq #-1,%d0 | ||
263 | 1: rts | ||
264 | |||
265 | fp_un: | ||
266 | moveq #0,%d0 | ||
267 | tst_NAN | ||
268 | jeq 1f | ||
269 | moveq #-1,%d0 | ||
270 | rts | ||
271 | |||
272 | fp_ueq: | ||
273 | moveq #-1,%d0 | ||
274 | tst_NAN | ||
275 | jne 1f | ||
276 | tst_Z | ||
277 | jne 1f | ||
278 | moveq #0,%d0 | ||
279 | 1: rts | ||
280 | |||
281 | fp_ugt: | ||
282 | moveq #-1,%d0 | ||
283 | tst_NAN | ||
284 | jne 2f | ||
285 | tst_N | ||
286 | jne 1f | ||
287 | tst_Z | ||
288 | jeq 2f | ||
289 | 1: moveq #0,%d0 | ||
290 | 2: rts | ||
291 | |||
292 | fp_uge: | ||
293 | moveq #-1,%d0 | ||
294 | tst_NAN | ||
295 | jne 1f | ||
296 | tst_Z | ||
297 | jne 1f | ||
298 | tst_N | ||
299 | jeq 1f | ||
300 | moveq #0,%d0 | ||
301 | 1: rts | ||
302 | |||
303 | fp_ult: | ||
304 | moveq #-1,%d0 | ||
305 | tst_NAN | ||
306 | jne 2f | ||
307 | tst_Z | ||
308 | jne 1f | ||
309 | tst_N | ||
310 | jne 2f | ||
311 | 1: moveq #0,%d0 | ||
312 | 2: rts | ||
313 | |||
314 | fp_ule: | ||
315 | moveq #-1,%d0 | ||
316 | tst_NAN | ||
317 | jne 1f | ||
318 | tst_Z | ||
319 | jne 1f | ||
320 | tst_N | ||
321 | jne 1f | ||
322 | moveq #0,%d0 | ||
323 | 1: rts | ||
324 | |||
325 | fp_ne: | ||
326 | moveq #0,%d0 | ||
327 | tst_Z | ||
328 | jne 1f | ||
329 | moveq #-1,%d0 | ||
330 | 1: rts | ||
331 | |||
332 | fp_t: | ||
333 | moveq #-1,%d0 | ||
334 | rts | ||
diff --git a/arch/m68k/math-emu/fp_decode.h b/arch/m68k/math-emu/fp_decode.h new file mode 100644 index 000000000000..759679d9ab96 --- /dev/null +++ b/arch/m68k/math-emu/fp_decode.h | |||
@@ -0,0 +1,417 @@ | |||
1 | /* | ||
2 | * fp_decode.h | ||
3 | * | ||
4 | * Copyright Roman Zippel, 1997. All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, and the entire permission notice in its entirety, | ||
11 | * including the disclaimer of warranties. | ||
12 | * 2. Redistributions in binary form must reproduce the above copyright | ||
13 | * notice, this list of conditions and the following disclaimer in the | ||
14 | * documentation and/or other materials provided with the distribution. | ||
15 | * 3. The name of the author may not be used to endorse or promote | ||
16 | * products derived from this software without specific prior | ||
17 | * written permission. | ||
18 | * | ||
19 | * ALTERNATIVELY, this product may be distributed under the terms of | ||
20 | * the GNU General Public License, in which case the provisions of the GPL are | ||
21 | * required INSTEAD OF the above restrictions. (This clause is | ||
22 | * necessary due to a potential bad interaction between the GPL and | ||
23 | * the restrictions contained in a BSD-style copyright.) | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
27 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
28 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | ||
29 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
33 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
35 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
36 | */ | ||
37 | |||
38 | #ifndef _FP_DECODE_H | ||
39 | #define _FP_DECODE_H | ||
40 | |||
41 | /* These macros do the dirty work of the instr decoding, several variables | ||
42 | * can be defined in the source file to modify the work of these macros, | ||
43 | * currently the following variables are used: | ||
44 | * ... | ||
45 | * The register usage: | ||
46 | * d0 - will contain source operand for data direct mode, | ||
47 | * otherwise scratch register | ||
48 | * d1 - upper 16bit are reserved for caller | ||
49 | * lower 16bit may contain further arguments, | ||
50 | * is destroyed during decoding | ||
51 | * d2 - contains first two instruction words, | ||
52 | * first word will be used for extension word | ||
53 | * a0 - will point to source/dest operand for any indirect mode | ||
54 | * otherwise scratch register | ||
55 | * a1 - scratch register | ||
56 | * a2 - base addr to the task structure | ||
57 | * | ||
58 | * the current implementation doesn't check for every disallowed | ||
59 | * addressing mode (e.g. pc relative modes as destination), as long | ||
60 | * as it only means a new addressing mode, which should not appear | ||
61 | * in a program and that doesn't crash the emulation, I think it's | ||
62 | * not a problem to allow these modes. | ||
63 | */ | ||
64 | |||
65 | do_fmovem=0 | ||
66 | do_fmovem_cr=0 | ||
67 | do_no_pc_mode=0 | ||
68 | do_fscc=0 | ||
69 | |||
70 | | first decoding of the instr type | ||
71 | | this separates the conditional instr | ||
72 | .macro fp_decode_cond_instr_type | ||
73 | bfextu %d2{#8,#2},%d0 | ||
74 | jmp ([0f:w,%pc,%d0*4]) | ||
75 | |||
76 | .align 4 | ||
77 | 0: | ||
78 | | .long "f<op>","fscc/fdbcc" | ||
79 | | .long "fbccw","fbccl" | ||
80 | .endm | ||
81 | |||
82 | | second decoding of the instr type | ||
83 | | this separates most move instr | ||
84 | .macro fp_decode_move_instr_type | ||
85 | bfextu %d2{#16,#3},%d0 | ||
86 | jmp ([0f:w,%pc,%d0*4]) | ||
87 | |||
88 | .align 4 | ||
89 | 0: | ||
90 | | .long "f<op> fpx,fpx","invalid instr" | ||
91 | | .long "f<op> <ea>,fpx","fmove fpx,<ea>" | ||
92 | | .long "fmovem <ea>,fpcr","fmovem <ea>,fpx" | ||
93 | | .long "fmovem fpcr,<ea>","fmovem fpx,<ea>" | ||
94 | .endm | ||
95 | |||
96 | | extract the source specifier, specifies | ||
97 | | either source fp register or data format | ||
98 | .macro fp_decode_sourcespec | ||
99 | bfextu %d2{#19,#3},%d0 | ||
100 | .endm | ||
101 | |||
102 | | decode destination format for fmove reg,ea | ||
103 | .macro fp_decode_dest_format | ||
104 | bfextu %d2{#19,#3},%d0 | ||
105 | .endm | ||
106 | |||
107 | | decode source register for fmove reg,ea | ||
108 | .macro fp_decode_src_reg | ||
109 | bfextu %d2{#22,#3},%d0 | ||
110 | .endm | ||
111 | |||
112 | | extract the addressing mode | ||
113 | | it depends on the instr which of the modes is valid | ||
114 | .macro fp_decode_addr_mode | ||
115 | bfextu %d2{#10,#3},%d0 | ||
116 | jmp ([0f:w,%pc,%d0*4]) | ||
117 | |||
118 | .align 4 | ||
119 | 0: | ||
120 | | .long "data register direct","addr register direct" | ||
121 | | .long "addr register indirect" | ||
122 | | .long "addr register indirect postincrement" | ||
123 | | .long "addr register indirect predecrement" | ||
124 | | .long "addr register + index16" | ||
125 | | .long "extension mode1","extension mode2" | ||
126 | .endm | ||
127 | |||
128 | | extract the register for the addressing mode | ||
129 | .macro fp_decode_addr_reg | ||
130 | bfextu %d2{#13,#3},%d0 | ||
131 | .endm | ||
132 | |||
133 | | decode the 8bit diplacement from the brief extension word | ||
134 | .macro fp_decode_disp8 | ||
135 | move.b %d2,%d0 | ||
136 | ext.w %d0 | ||
137 | .endm | ||
138 | |||
139 | | decode the index of the brief/full extension word | ||
140 | .macro fp_decode_index | ||
141 | bfextu %d2{#17,#3},%d0 | get the register nr | ||
142 | btst #15,%d2 | test for data/addr register | ||
143 | jne 1\@f | ||
144 | printf PDECODE,"d%d",1,%d0 | ||
145 | jsr fp_get_data_reg | ||
146 | jra 2\@f | ||
147 | 1\@: printf PDECODE,"a%d",1,%d0 | ||
148 | jsr fp_get_addr_reg | ||
149 | move.l %a0,%d0 | ||
150 | 2\@: | ||
151 | debug lea "'l'.w,%a0" | ||
152 | btst #11,%d2 | 16/32 bit size? | ||
153 | jne 3\@f | ||
154 | debug lea "'w'.w,%a0" | ||
155 | ext.l %d0 | ||
156 | 3\@: printf PDECODE,":%c",1,%a0 | ||
157 | move.w %d2,%d1 | scale factor | ||
158 | rol.w #7,%d1 | ||
159 | and.w #3,%d1 | ||
160 | debug move.l "%d1,-(%sp)" | ||
161 | debug ext.l "%d1" | ||
162 | printf PDECODE,":%d",1,%d1 | ||
163 | debug move.l "(%sp)+,%d1" | ||
164 | lsl.l %d1,%d0 | ||
165 | .endm | ||
166 | |||
167 | | decode the base displacement size | ||
168 | .macro fp_decode_basedisp | ||
169 | bfextu %d2{#26,#2},%d0 | ||
170 | jmp ([0f:w,%pc,%d0*4]) | ||
171 | |||
172 | .align 4 | ||
173 | 0: | ||
174 | | .long "reserved","null displacement" | ||
175 | | .long "word displacement","long displacement" | ||
176 | .endm | ||
177 | |||
178 | .macro fp_decode_outerdisp | ||
179 | bfextu %d2{#30,#2},%d0 | ||
180 | jmp ([0f:w,%pc,%d0*4]) | ||
181 | |||
182 | .align 4 | ||
183 | 0: | ||
184 | | .long "no memory indirect action/reserved","null outer displacement" | ||
185 | | .long "word outer displacement","long outer displacement" | ||
186 | .endm | ||
187 | |||
188 | | get the extension word and test for brief or full extension type | ||
189 | .macro fp_get_test_extword label | ||
190 | fp_get_instr_word %d2,fp_err_ua1 | ||
191 | btst #8,%d2 | ||
192 | jne \label | ||
193 | .endm | ||
194 | |||
195 | |||
196 | | test if %pc is the base register for the indirect addr mode | ||
197 | .macro fp_test_basereg_d16 label | ||
198 | btst #20,%d2 | ||
199 | jeq \label | ||
200 | .endm | ||
201 | |||
202 | | test if %pc is the base register for one of the extended modes | ||
203 | .macro fp_test_basereg_ext label | ||
204 | btst #19,%d2 | ||
205 | jeq \label | ||
206 | .endm | ||
207 | |||
208 | .macro fp_test_suppr_index label | ||
209 | btst #6,%d2 | ||
210 | jne \label | ||
211 | .endm | ||
212 | |||
213 | |||
214 | | addressing mode: data register direct | ||
215 | .macro fp_mode_data_direct | ||
216 | fp_decode_addr_reg | ||
217 | printf PDECODE,"d%d",1,%d0 | ||
218 | .endm | ||
219 | |||
220 | | addressing mode: address register indirect | ||
221 | .macro fp_mode_addr_indirect | ||
222 | fp_decode_addr_reg | ||
223 | printf PDECODE,"(a%d)",1,%d0 | ||
224 | jsr fp_get_addr_reg | ||
225 | .endm | ||
226 | |||
227 | | adjust stack for byte moves from/to stack | ||
228 | .macro fp_test_sp_byte_move | ||
229 | .if !do_fmovem | ||
230 | .if do_fscc | ||
231 | move.w #6,%d1 | ||
232 | .endif | ||
233 | cmp.w #7,%d0 | ||
234 | jne 1\@f | ||
235 | .if !do_fscc | ||
236 | cmp.w #6,%d1 | ||
237 | jne 1\@f | ||
238 | .endif | ||
239 | move.w #4,%d1 | ||
240 | 1\@: | ||
241 | .endif | ||
242 | .endm | ||
243 | |||
244 | | addressing mode: address register indirect with postincrement | ||
245 | .macro fp_mode_addr_indirect_postinc | ||
246 | fp_decode_addr_reg | ||
247 | printf PDECODE,"(a%d)+",1,%d0 | ||
248 | fp_test_sp_byte_move | ||
249 | jsr fp_get_addr_reg | ||
250 | move.l %a0,%a1 | save addr | ||
251 | .if do_fmovem | ||
252 | lea (%a0,%d1.w*4),%a0 | ||
253 | .if !do_fmovem_cr | ||
254 | lea (%a0,%d1.w*8),%a0 | ||
255 | .endif | ||
256 | .else | ||
257 | add.w (fp_datasize,%d1.w*2),%a0 | ||
258 | .endif | ||
259 | jsr fp_put_addr_reg | ||
260 | move.l %a1,%a0 | ||
261 | .endm | ||
262 | |||
263 | | addressing mode: address register indirect with predecrement | ||
264 | .macro fp_mode_addr_indirect_predec | ||
265 | fp_decode_addr_reg | ||
266 | printf PDECODE,"-(a%d)",1,%d0 | ||
267 | fp_test_sp_byte_move | ||
268 | jsr fp_get_addr_reg | ||
269 | .if do_fmovem | ||
270 | .if !do_fmovem_cr | ||
271 | lea (-12,%a0),%a1 | setup to addr of 1st reg to move | ||
272 | neg.w %d1 | ||
273 | lea (%a0,%d1.w*4),%a0 | ||
274 | add.w %d1,%d1 | ||
275 | lea (%a0,%d1.w*4),%a0 | ||
276 | jsr fp_put_addr_reg | ||
277 | move.l %a1,%a0 | ||
278 | .else | ||
279 | neg.w %d1 | ||
280 | lea (%a0,%d1.w*4),%a0 | ||
281 | jsr fp_put_addr_reg | ||
282 | .endif | ||
283 | .else | ||
284 | sub.w (fp_datasize,%d1.w*2),%a0 | ||
285 | jsr fp_put_addr_reg | ||
286 | .endif | ||
287 | .endm | ||
288 | |||
289 | | addressing mode: address register/programm counter indirect | ||
290 | | with 16bit displacement | ||
291 | .macro fp_mode_addr_indirect_disp16 | ||
292 | .if !do_no_pc_mode | ||
293 | fp_test_basereg_d16 1f | ||
294 | printf PDECODE,"pc" | ||
295 | fp_get_pc %a0 | ||
296 | jra 2f | ||
297 | .endif | ||
298 | 1: fp_decode_addr_reg | ||
299 | printf PDECODE,"a%d",1,%d0 | ||
300 | jsr fp_get_addr_reg | ||
301 | 2: fp_get_instr_word %a1,fp_err_ua1 | ||
302 | printf PDECODE,"@(%x)",1,%a1 | ||
303 | add.l %a1,%a0 | ||
304 | .endm | ||
305 | |||
306 | | perform preindex (if I/IS == 0xx and xx != 00) | ||
307 | .macro fp_do_preindex | ||
308 | moveq #3,%d0 | ||
309 | and.w %d2,%d0 | ||
310 | jeq 1f | ||
311 | btst #2,%d2 | ||
312 | jne 1f | ||
313 | printf PDECODE,")@(" | ||
314 | getuser.l (%a1),%a1,fp_err_ua1,%a1 | ||
315 | debug jra "2f" | ||
316 | 1: printf PDECODE,"," | ||
317 | 2: | ||
318 | .endm | ||
319 | |||
320 | | perform postindex (if I/IS == 1xx) | ||
321 | .macro fp_do_postindex | ||
322 | btst #2,%d2 | ||
323 | jeq 1f | ||
324 | printf PDECODE,")@(" | ||
325 | getuser.l (%a1),%a1,fp_err_ua1,%a1 | ||
326 | debug jra "2f" | ||
327 | 1: printf PDECODE,"," | ||
328 | 2: | ||
329 | .endm | ||
330 | |||
331 | | all other indirect addressing modes will finally end up here | ||
332 | .macro fp_mode_addr_indirect_extmode0 | ||
333 | .if !do_no_pc_mode | ||
334 | fp_test_basereg_ext 1f | ||
335 | printf PDECODE,"pc" | ||
336 | fp_get_pc %a0 | ||
337 | jra 2f | ||
338 | .endif | ||
339 | 1: fp_decode_addr_reg | ||
340 | printf PDECODE,"a%d",1,%d0 | ||
341 | jsr fp_get_addr_reg | ||
342 | 2: move.l %a0,%a1 | ||
343 | swap %d2 | ||
344 | fp_get_test_extword 3f | ||
345 | | addressing mode: address register/programm counter indirect | ||
346 | | with index and 8bit displacement | ||
347 | fp_decode_disp8 | ||
348 | debug ext.l "%d0" | ||
349 | printf PDECODE,"@(%x,",1,%d0 | ||
350 | add.w %d0,%a1 | ||
351 | fp_decode_index | ||
352 | add.l %d0,%a1 | ||
353 | printf PDECODE,")" | ||
354 | jra 9f | ||
355 | 3: | addressing mode: address register/programm counter memory indirect | ||
356 | | with base and/or outer displacement | ||
357 | btst #7,%d2 | base register suppressed? | ||
358 | jeq 1f | ||
359 | printf PDECODE,"!" | ||
360 | sub.l %a1,%a1 | ||
361 | 1: printf PDECODE,"@(" | ||
362 | fp_decode_basedisp | ||
363 | |||
364 | .long fp_ill,1f | ||
365 | .long 2f,3f | ||
366 | |||
367 | #ifdef FPU_EMU_DEBUG | ||
368 | 1: printf PDECODE,"0" | null base displacement | ||
369 | jra 1f | ||
370 | #endif | ||
371 | 2: fp_get_instr_word %a0,fp_err_ua1 | 16bit base displacement | ||
372 | printf PDECODE,"%x:w",1,%a0 | ||
373 | jra 4f | ||
374 | 3: fp_get_instr_long %a0,fp_err_ua1 | 32bit base displacement | ||
375 | printf PDECODE,"%x:l",1,%a0 | ||
376 | 4: add.l %a0,%a1 | ||
377 | 1: | ||
378 | fp_do_postindex | ||
379 | fp_test_suppr_index 1f | ||
380 | fp_decode_index | ||
381 | add.l %d0,%a1 | ||
382 | 1: fp_do_preindex | ||
383 | |||
384 | fp_decode_outerdisp | ||
385 | |||
386 | .long 5f,1f | ||
387 | .long 2f,3f | ||
388 | |||
389 | #ifdef FPU_EMU_DEBUG | ||
390 | 1: printf PDECODE,"0" | null outer displacement | ||
391 | jra 1f | ||
392 | #endif | ||
393 | 2: fp_get_instr_word %a0,fp_err_ua1 | 16bit outer displacement | ||
394 | printf PDECODE,"%x:w",1,%a0 | ||
395 | jra 4f | ||
396 | 3: fp_get_instr_long %a0,fp_err_ua1 | 32bit outer displacement | ||
397 | printf PDECODE,"%x:l",1,%a0 | ||
398 | 4: add.l %a0,%a1 | ||
399 | 1: | ||
400 | 5: printf PDECODE,")" | ||
401 | 9: move.l %a1,%a0 | ||
402 | swap %d2 | ||
403 | .endm | ||
404 | |||
405 | | get the absolute short address from user space | ||
406 | .macro fp_mode_abs_short | ||
407 | fp_get_instr_word %a0,fp_err_ua1 | ||
408 | printf PDECODE,"%x.w",1,%a0 | ||
409 | .endm | ||
410 | |||
411 | | get the absolute long address from user space | ||
412 | .macro fp_mode_abs_long | ||
413 | fp_get_instr_long %a0,fp_err_ua1 | ||
414 | printf PDECODE,"%x.l",1,%a0 | ||
415 | .endm | ||
416 | |||
417 | #endif /* _FP_DECODE_H */ | ||
diff --git a/arch/m68k/math-emu/fp_emu.h b/arch/m68k/math-emu/fp_emu.h new file mode 100644 index 000000000000..1d6edc975d89 --- /dev/null +++ b/arch/m68k/math-emu/fp_emu.h | |||
@@ -0,0 +1,146 @@ | |||
1 | /* | ||
2 | * fp_emu.h | ||
3 | * | ||
4 | * Copyright Roman Zippel, 1997. All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, and the entire permission notice in its entirety, | ||
11 | * including the disclaimer of warranties. | ||
12 | * 2. Redistributions in binary form must reproduce the above copyright | ||
13 | * notice, this list of conditions and the following disclaimer in the | ||
14 | * documentation and/or other materials provided with the distribution. | ||
15 | * 3. The name of the author may not be used to endorse or promote | ||
16 | * products derived from this software without specific prior | ||
17 | * written permission. | ||
18 | * | ||
19 | * ALTERNATIVELY, this product may be distributed under the terms of | ||
20 | * the GNU General Public License, in which case the provisions of the GPL are | ||
21 | * required INSTEAD OF the above restrictions. (This clause is | ||
22 | * necessary due to a potential bad interaction between the GPL and | ||
23 | * the restrictions contained in a BSD-style copyright.) | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
27 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
28 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | ||
29 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
33 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
35 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
36 | */ | ||
37 | |||
38 | #ifndef _FP_EMU_H | ||
39 | #define _FP_EMU_H | ||
40 | |||
41 | #ifdef __ASSEMBLY__ | ||
42 | #include <asm/offsets.h> | ||
43 | #endif | ||
44 | #include <asm/math-emu.h> | ||
45 | |||
46 | #ifndef __ASSEMBLY__ | ||
47 | |||
48 | #define IS_INF(a) ((a)->exp == 0x7fff) | ||
49 | #define IS_ZERO(a) ((a)->mant.m64 == 0) | ||
50 | |||
51 | |||
52 | #define fp_set_sr(bit) ({ \ | ||
53 | FPDATA->fpsr |= 1 << (bit); \ | ||
54 | }) | ||
55 | |||
56 | #define fp_set_quotient(quotient) ({ \ | ||
57 | FPDATA->fpsr &= 0xff00ffff; \ | ||
58 | FPDATA->fpsr |= ((quotient) & 0xff) << 16; \ | ||
59 | }) | ||
60 | |||
61 | /* linkage for several useful functions */ | ||
62 | |||
63 | /* Normalize the extended struct, return 0 for a NaN */ | ||
64 | #define fp_normalize_ext(fpreg) ({ \ | ||
65 | register struct fp_ext *reg asm ("a0") = fpreg; \ | ||
66 | register int res asm ("d0"); \ | ||
67 | \ | ||
68 | asm volatile ("jsr fp_conv_ext2ext" \ | ||
69 | : "=d" (res) : "a" (reg) \ | ||
70 | : "a1", "d1", "d2", "memory"); \ | ||
71 | res; \ | ||
72 | }) | ||
73 | |||
74 | #define fp_copy_ext(dest, src) ({ \ | ||
75 | *dest = *src; \ | ||
76 | }) | ||
77 | |||
78 | #define fp_monadic_check(dest, src) ({ \ | ||
79 | fp_copy_ext(dest, src); \ | ||
80 | if (!fp_normalize_ext(dest)) \ | ||
81 | return dest; \ | ||
82 | }) | ||
83 | |||
84 | #define fp_dyadic_check(dest, src) ({ \ | ||
85 | if (!fp_normalize_ext(dest)) \ | ||
86 | return dest; \ | ||
87 | if (!fp_normalize_ext(src)) { \ | ||
88 | fp_copy_ext(dest, src); \ | ||
89 | return dest; \ | ||
90 | } \ | ||
91 | }) | ||
92 | |||
93 | extern const struct fp_ext fp_QNaN; | ||
94 | extern const struct fp_ext fp_Inf; | ||
95 | |||
96 | #define fp_set_nan(dest) ({ \ | ||
97 | fp_set_sr(FPSR_EXC_OPERR); \ | ||
98 | *dest = fp_QNaN; \ | ||
99 | }) | ||
100 | |||
101 | /* TODO check rounding mode? */ | ||
102 | #define fp_set_ovrflw(dest) ({ \ | ||
103 | fp_set_sr(FPSR_EXC_OVFL); \ | ||
104 | dest->exp = 0x7fff; \ | ||
105 | dest->mant.m64 = 0; \ | ||
106 | }) | ||
107 | |||
108 | #define fp_conv_ext2long(src) ({ \ | ||
109 | register struct fp_ext *__src asm ("a0") = src; \ | ||
110 | register int __res asm ("d0"); \ | ||
111 | \ | ||
112 | asm volatile ("jsr fp_conv_ext2long" \ | ||
113 | : "=d" (__res) : "a" (__src) \ | ||
114 | : "a1", "d1", "d2", "memory"); \ | ||
115 | __res; \ | ||
116 | }) | ||
117 | |||
118 | #define fp_conv_long2ext(dest, src) ({ \ | ||
119 | register struct fp_ext *__dest asm ("a0") = dest; \ | ||
120 | register int __src asm ("d0") = src; \ | ||
121 | \ | ||
122 | asm volatile ("jsr fp_conv_ext2long" \ | ||
123 | : : "d" (__src), "a" (__dest) \ | ||
124 | : "a1", "d1", "d2", "memory"); \ | ||
125 | }) | ||
126 | |||
127 | #else /* __ASSEMBLY__ */ | ||
128 | |||
129 | /* | ||
130 | * set, reset or clear a bit in the fp status register | ||
131 | */ | ||
132 | .macro fp_set_sr bit | ||
133 | bset #(\bit&7),(FPD_FPSR+3-(\bit/8),FPDATA) | ||
134 | .endm | ||
135 | |||
136 | .macro fp_clr_sr bit | ||
137 | bclr #(\bit&7),(FPD_FPSR+3-(\bit/8),FPDATA) | ||
138 | .endm | ||
139 | |||
140 | .macro fp_tst_sr bit | ||
141 | btst #(\bit&7),(FPD_FPSR+3-(\bit/8),FPDATA) | ||
142 | .endm | ||
143 | |||
144 | #endif /* __ASSEMBLY__ */ | ||
145 | |||
146 | #endif /* _FP_EMU_H */ | ||
diff --git a/arch/m68k/math-emu/fp_entry.S b/arch/m68k/math-emu/fp_entry.S new file mode 100644 index 000000000000..5ec2d9101ea3 --- /dev/null +++ b/arch/m68k/math-emu/fp_entry.S | |||
@@ -0,0 +1,325 @@ | |||
1 | /* | ||
2 | * fp_emu.S | ||
3 | * | ||
4 | * Copyright Roman Zippel, 1997. All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, and the entire permission notice in its entirety, | ||
11 | * including the disclaimer of warranties. | ||
12 | * 2. Redistributions in binary form must reproduce the above copyright | ||
13 | * notice, this list of conditions and the following disclaimer in the | ||
14 | * documentation and/or other materials provided with the distribution. | ||
15 | * 3. The name of the author may not be used to endorse or promote | ||
16 | * products derived from this software without specific prior | ||
17 | * written permission. | ||
18 | * | ||
19 | * ALTERNATIVELY, this product may be distributed under the terms of | ||
20 | * the GNU General Public License, in which case the provisions of the GPL are | ||
21 | * required INSTEAD OF the above restrictions. (This clause is | ||
22 | * necessary due to a potential bad interaction between the GPL and | ||
23 | * the restrictions contained in a BSD-style copyright.) | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
27 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
28 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | ||
29 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
33 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
35 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
36 | */ | ||
37 | |||
38 | #include <linux/config.h> | ||
39 | #include <linux/linkage.h> | ||
40 | #include <asm/entry.h> | ||
41 | |||
42 | #include "fp_emu.h" | ||
43 | |||
44 | .globl fpu_emu | ||
45 | .globl fp_debugprint | ||
46 | .globl fp_err_ua1,fp_err_ua2 | ||
47 | |||
48 | .text | ||
49 | fpu_emu: | ||
50 | SAVE_ALL_INT | ||
51 | GET_CURRENT(%d0) | ||
52 | |||
53 | #if defined(CPU_M68020_OR_M68030) && defined(CPU_M68040_OR_M68060) | ||
54 | tst.l m68k_is040or060 | ||
55 | jeq 1f | ||
56 | #endif | ||
57 | #if defined(CPU_M68040_OR_M68060) | ||
58 | move.l (FPS_PC2,%sp),(FPS_PC,%sp) | ||
59 | #endif | ||
60 | 1: | ||
61 | | emulate the instruction | ||
62 | jsr fp_scan | ||
63 | |||
64 | #if defined(CONFIG_M68060) | ||
65 | #if !defined(CPU_M68060_ONLY) | ||
66 | btst #3,m68k_cputype+3 | ||
67 | jeq 1f | ||
68 | #endif | ||
69 | btst #7,(FPS_SR,%sp) | ||
70 | jne fp_sendtrace060 | ||
71 | #endif | ||
72 | 1: | ||
73 | | emulation successful? | ||
74 | tst.l %d0 | ||
75 | jeq ret_from_exception | ||
76 | |||
77 | | send some signal to program here | ||
78 | |||
79 | jra ret_from_exception | ||
80 | |||
81 | | we jump here after an access error while trying to access | ||
82 | | user space, we correct stackpointer and send a SIGSEGV to | ||
83 | | the user process | ||
84 | fp_err_ua2: | ||
85 | addq.l #4,%sp | ||
86 | fp_err_ua1: | ||
87 | addq.l #4,%sp | ||
88 | move.l %a0,-(%sp) | ||
89 | pea SEGV_MAPERR | ||
90 | pea SIGSEGV | ||
91 | jsr fpemu_signal | ||
92 | add.w #12,%sp | ||
93 | jra ret_from_exception | ||
94 | |||
95 | #if defined(CONFIG_M68060) | ||
96 | | send a trace signal if we are debugged | ||
97 | | it does not really belong here, but... | ||
98 | fp_sendtrace060: | ||
99 | move.l (FPS_PC,%sp),-(%sp) | ||
100 | pea TRAP_TRACE | ||
101 | pea SIGTRAP | ||
102 | jsr fpemu_signal | ||
103 | add.w #12,%sp | ||
104 | jra ret_from_exception | ||
105 | #endif | ||
106 | |||
107 | .globl fp_get_data_reg, fp_put_data_reg | ||
108 | .globl fp_get_addr_reg, fp_put_addr_reg | ||
109 | |||
110 | | Entry points to get/put a register. Some of them can be get/put | ||
111 | | directly, others are on the stack, as we read/write the stack | ||
112 | | directly here, these function may only be called from within | ||
113 | | instruction decoding, otherwise the stack pointer is incorrect | ||
114 | | and the stack gets corrupted. | ||
115 | fp_get_data_reg: | ||
116 | jmp ([0f:w,%pc,%d0.w*4]) | ||
117 | |||
118 | .align 4 | ||
119 | 0: | ||
120 | .long fp_get_d0, fp_get_d1 | ||
121 | .long fp_get_d2, fp_get_d3 | ||
122 | .long fp_get_d4, fp_get_d5 | ||
123 | .long fp_get_d6, fp_get_d7 | ||
124 | |||
125 | fp_get_d0: | ||
126 | move.l (PT_D0+8,%sp),%d0 | ||
127 | printf PREGISTER,"{d0->%08x}",1,%d0 | ||
128 | rts | ||
129 | |||
130 | fp_get_d1: | ||
131 | move.l (PT_D1+8,%sp),%d0 | ||
132 | printf PREGISTER,"{d1->%08x}",1,%d0 | ||
133 | rts | ||
134 | |||
135 | fp_get_d2: | ||
136 | move.l (PT_D2+8,%sp),%d0 | ||
137 | printf PREGISTER,"{d2->%08x}",1,%d0 | ||
138 | rts | ||
139 | |||
140 | fp_get_d3: | ||
141 | move.l %d3,%d0 | ||
142 | printf PREGISTER,"{d3->%08x}",1,%d0 | ||
143 | rts | ||
144 | |||
145 | fp_get_d4: | ||
146 | move.l %d4,%d0 | ||
147 | printf PREGISTER,"{d4->%08x}",1,%d0 | ||
148 | rts | ||
149 | |||
150 | fp_get_d5: | ||
151 | move.l %d5,%d0 | ||
152 | printf PREGISTER,"{d5->%08x}",1,%d0 | ||
153 | rts | ||
154 | |||
155 | fp_get_d6: | ||
156 | move.l %d6,%d0 | ||
157 | printf PREGISTER,"{d6->%08x}",1,%d0 | ||
158 | rts | ||
159 | |||
160 | fp_get_d7: | ||
161 | move.l %d7,%d0 | ||
162 | printf PREGISTER,"{d7->%08x}",1,%d0 | ||
163 | rts | ||
164 | |||
165 | fp_put_data_reg: | ||
166 | jmp ([0f:w,%pc,%d1.w*4]) | ||
167 | |||
168 | .align 4 | ||
169 | 0: | ||
170 | .long fp_put_d0, fp_put_d1 | ||
171 | .long fp_put_d2, fp_put_d3 | ||
172 | .long fp_put_d4, fp_put_d5 | ||
173 | .long fp_put_d6, fp_put_d7 | ||
174 | |||
175 | fp_put_d0: | ||
176 | printf PREGISTER,"{d0<-%08x}",1,%d0 | ||
177 | move.l %d0,(PT_D0+8,%sp) | ||
178 | rts | ||
179 | |||
180 | fp_put_d1: | ||
181 | printf PREGISTER,"{d1<-%08x}",1,%d0 | ||
182 | move.l %d0,(PT_D1+8,%sp) | ||
183 | rts | ||
184 | |||
185 | fp_put_d2: | ||
186 | printf PREGISTER,"{d2<-%08x}",1,%d0 | ||
187 | move.l %d0,(PT_D2+8,%sp) | ||
188 | rts | ||
189 | |||
190 | fp_put_d3: | ||
191 | printf PREGISTER,"{d3<-%08x}",1,%d0 | ||
192 | | move.l %d0,%d3 | ||
193 | move.l %d0,(PT_D3+8,%sp) | ||
194 | rts | ||
195 | |||
196 | fp_put_d4: | ||
197 | printf PREGISTER,"{d4<-%08x}",1,%d0 | ||
198 | | move.l %d0,%d4 | ||
199 | move.l %d0,(PT_D4+8,%sp) | ||
200 | rts | ||
201 | |||
202 | fp_put_d5: | ||
203 | printf PREGISTER,"{d5<-%08x}",1,%d0 | ||
204 | | move.l %d0,%d5 | ||
205 | move.l %d0,(PT_D5+8,%sp) | ||
206 | rts | ||
207 | |||
208 | fp_put_d6: | ||
209 | printf PREGISTER,"{d6<-%08x}",1,%d0 | ||
210 | move.l %d0,%d6 | ||
211 | rts | ||
212 | |||
213 | fp_put_d7: | ||
214 | printf PREGISTER,"{d7<-%08x}",1,%d0 | ||
215 | move.l %d0,%d7 | ||
216 | rts | ||
217 | |||
218 | fp_get_addr_reg: | ||
219 | jmp ([0f:w,%pc,%d0.w*4]) | ||
220 | |||
221 | .align 4 | ||
222 | 0: | ||
223 | .long fp_get_a0, fp_get_a1 | ||
224 | .long fp_get_a2, fp_get_a3 | ||
225 | .long fp_get_a4, fp_get_a5 | ||
226 | .long fp_get_a6, fp_get_a7 | ||
227 | |||
228 | fp_get_a0: | ||
229 | move.l (PT_A0+8,%sp),%a0 | ||
230 | printf PREGISTER,"{a0->%08x}",1,%a0 | ||
231 | rts | ||
232 | |||
233 | fp_get_a1: | ||
234 | move.l (PT_A1+8,%sp),%a0 | ||
235 | printf PREGISTER,"{a1->%08x}",1,%a0 | ||
236 | rts | ||
237 | |||
238 | fp_get_a2: | ||
239 | move.l (PT_A2+8,%sp),%a0 | ||
240 | printf PREGISTER,"{a2->%08x}",1,%a0 | ||
241 | rts | ||
242 | |||
243 | fp_get_a3: | ||
244 | move.l %a3,%a0 | ||
245 | printf PREGISTER,"{a3->%08x}",1,%a0 | ||
246 | rts | ||
247 | |||
248 | fp_get_a4: | ||
249 | move.l %a4,%a0 | ||
250 | printf PREGISTER,"{a4->%08x}",1,%a0 | ||
251 | rts | ||
252 | |||
253 | fp_get_a5: | ||
254 | move.l %a5,%a0 | ||
255 | printf PREGISTER,"{a5->%08x}",1,%a0 | ||
256 | rts | ||
257 | |||
258 | fp_get_a6: | ||
259 | move.l %a6,%a0 | ||
260 | printf PREGISTER,"{a6->%08x}",1,%a0 | ||
261 | rts | ||
262 | |||
263 | fp_get_a7: | ||
264 | move.l %usp,%a0 | ||
265 | printf PREGISTER,"{a7->%08x}",1,%a0 | ||
266 | rts | ||
267 | |||
268 | fp_put_addr_reg: | ||
269 | jmp ([0f:w,%pc,%d0.w*4]) | ||
270 | |||
271 | .align 4 | ||
272 | 0: | ||
273 | .long fp_put_a0, fp_put_a1 | ||
274 | .long fp_put_a2, fp_put_a3 | ||
275 | .long fp_put_a4, fp_put_a5 | ||
276 | .long fp_put_a6, fp_put_a7 | ||
277 | |||
278 | fp_put_a0: | ||
279 | printf PREGISTER,"{a0<-%08x}",1,%a0 | ||
280 | move.l %a0,(PT_A0+8,%sp) | ||
281 | rts | ||
282 | |||
283 | fp_put_a1: | ||
284 | printf PREGISTER,"{a1<-%08x}",1,%a0 | ||
285 | move.l %a0,(PT_A1+8,%sp) | ||
286 | rts | ||
287 | |||
288 | fp_put_a2: | ||
289 | printf PREGISTER,"{a2<-%08x}",1,%a0 | ||
290 | move.l %a0,(PT_A2+8,%sp) | ||
291 | rts | ||
292 | |||
293 | fp_put_a3: | ||
294 | printf PREGISTER,"{a3<-%08x}",1,%a0 | ||
295 | move.l %a0,%a3 | ||
296 | rts | ||
297 | |||
298 | fp_put_a4: | ||
299 | printf PREGISTER,"{a4<-%08x}",1,%a0 | ||
300 | move.l %a0,%a4 | ||
301 | rts | ||
302 | |||
303 | fp_put_a5: | ||
304 | printf PREGISTER,"{a5<-%08x}",1,%a0 | ||
305 | move.l %a0,%a5 | ||
306 | rts | ||
307 | |||
308 | fp_put_a6: | ||
309 | printf PREGISTER,"{a6<-%08x}",1,%a0 | ||
310 | move.l %a0,%a6 | ||
311 | rts | ||
312 | |||
313 | fp_put_a7: | ||
314 | printf PREGISTER,"{a7<-%08x}",1,%a0 | ||
315 | move.l %a0,%usp | ||
316 | rts | ||
317 | |||
318 | .data | ||
319 | .align 4 | ||
320 | |||
321 | fp_debugprint: | ||
322 | | .long PMDECODE | ||
323 | .long PMINSTR+PMDECODE+PMCONV+PMNORM | ||
324 | | .long PMCONV+PMNORM+PMINSTR | ||
325 | | .long 0 | ||
diff --git a/arch/m68k/math-emu/fp_log.c b/arch/m68k/math-emu/fp_log.c new file mode 100644 index 000000000000..87b4f0158560 --- /dev/null +++ b/arch/m68k/math-emu/fp_log.c | |||
@@ -0,0 +1,223 @@ | |||
1 | /* | ||
2 | |||
3 | fp_trig.c: floating-point math routines for the Linux-m68k | ||
4 | floating point emulator. | ||
5 | |||
6 | Copyright (c) 1998-1999 David Huggins-Daines / Roman Zippel. | ||
7 | |||
8 | I hereby give permission, free of charge, to copy, modify, and | ||
9 | redistribute this software, in source or binary form, provided that | ||
10 | the above copyright notice and the following disclaimer are included | ||
11 | in all such copies. | ||
12 | |||
13 | THIS SOFTWARE IS PROVIDED "AS IS", WITH ABSOLUTELY NO WARRANTY, REAL | ||
14 | OR IMPLIED. | ||
15 | |||
16 | */ | ||
17 | |||
18 | #include "fp_emu.h" | ||
19 | |||
20 | static const struct fp_ext fp_one = | ||
21 | { | ||
22 | .exp = 0x3fff, | ||
23 | }; | ||
24 | |||
25 | extern struct fp_ext *fp_fadd(struct fp_ext *dest, const struct fp_ext *src); | ||
26 | extern struct fp_ext *fp_fdiv(struct fp_ext *dest, const struct fp_ext *src); | ||
27 | extern struct fp_ext *fp_fmul(struct fp_ext *dest, const struct fp_ext *src); | ||
28 | |||
29 | struct fp_ext * | ||
30 | fp_fsqrt(struct fp_ext *dest, struct fp_ext *src) | ||
31 | { | ||
32 | struct fp_ext tmp, src2; | ||
33 | int i, exp; | ||
34 | |||
35 | dprint(PINSTR, "fsqrt\n"); | ||
36 | |||
37 | fp_monadic_check(dest, src); | ||
38 | |||
39 | if (IS_ZERO(dest)) | ||
40 | return dest; | ||
41 | |||
42 | if (dest->sign) { | ||
43 | fp_set_nan(dest); | ||
44 | return dest; | ||
45 | } | ||
46 | if (IS_INF(dest)) | ||
47 | return dest; | ||
48 | |||
49 | /* | ||
50 | * sqrt(m) * 2^(p) , if e = 2*p | ||
51 | * sqrt(m*2^e) = | ||
52 | * sqrt(2*m) * 2^(p) , if e = 2*p + 1 | ||
53 | * | ||
54 | * So we use the last bit of the exponent to decide wether to | ||
55 | * use the m or 2*m. | ||
56 | * | ||
57 | * Since only the fractional part of the mantissa is stored and | ||
58 | * the integer part is assumed to be one, we place a 1 or 2 into | ||
59 | * the fixed point representation. | ||
60 | */ | ||
61 | exp = dest->exp; | ||
62 | dest->exp = 0x3FFF; | ||
63 | if (!(exp & 1)) /* lowest bit of exponent is set */ | ||
64 | dest->exp++; | ||
65 | fp_copy_ext(&src2, dest); | ||
66 | |||
67 | /* | ||
68 | * The taylor row arround a for sqrt(x) is: | ||
69 | * sqrt(x) = sqrt(a) + 1/(2*sqrt(a))*(x-a) + R | ||
70 | * With a=1 this gives: | ||
71 | * sqrt(x) = 1 + 1/2*(x-1) | ||
72 | * = 1/2*(1+x) | ||
73 | */ | ||
74 | fp_fadd(dest, &fp_one); | ||
75 | dest->exp--; /* * 1/2 */ | ||
76 | |||
77 | /* | ||
78 | * We now apply the newton rule to the function | ||
79 | * f(x) := x^2 - r | ||
80 | * which has a null point on x = sqrt(r). | ||
81 | * | ||
82 | * It gives: | ||
83 | * x' := x - f(x)/f'(x) | ||
84 | * = x - (x^2 -r)/(2*x) | ||
85 | * = x - (x - r/x)/2 | ||
86 | * = (2*x - x + r/x)/2 | ||
87 | * = (x + r/x)/2 | ||
88 | */ | ||
89 | for (i = 0; i < 9; i++) { | ||
90 | fp_copy_ext(&tmp, &src2); | ||
91 | |||
92 | fp_fdiv(&tmp, dest); | ||
93 | fp_fadd(dest, &tmp); | ||
94 | dest->exp--; | ||
95 | } | ||
96 | |||
97 | dest->exp += (exp - 0x3FFF) / 2; | ||
98 | |||
99 | return dest; | ||
100 | } | ||
101 | |||
102 | struct fp_ext * | ||
103 | fp_fetoxm1(struct fp_ext *dest, struct fp_ext *src) | ||
104 | { | ||
105 | uprint("fetoxm1\n"); | ||
106 | |||
107 | fp_monadic_check(dest, src); | ||
108 | |||
109 | if (IS_ZERO(dest)) | ||
110 | return dest; | ||
111 | |||
112 | return dest; | ||
113 | } | ||
114 | |||
115 | struct fp_ext * | ||
116 | fp_fetox(struct fp_ext *dest, struct fp_ext *src) | ||
117 | { | ||
118 | uprint("fetox\n"); | ||
119 | |||
120 | fp_monadic_check(dest, src); | ||
121 | |||
122 | return dest; | ||
123 | } | ||
124 | |||
125 | struct fp_ext * | ||
126 | fp_ftwotox(struct fp_ext *dest, struct fp_ext *src) | ||
127 | { | ||
128 | uprint("ftwotox\n"); | ||
129 | |||
130 | fp_monadic_check(dest, src); | ||
131 | |||
132 | return dest; | ||
133 | } | ||
134 | |||
135 | struct fp_ext * | ||
136 | fp_ftentox(struct fp_ext *dest, struct fp_ext *src) | ||
137 | { | ||
138 | uprint("ftentox\n"); | ||
139 | |||
140 | fp_monadic_check(dest, src); | ||
141 | |||
142 | return dest; | ||
143 | } | ||
144 | |||
145 | struct fp_ext * | ||
146 | fp_flogn(struct fp_ext *dest, struct fp_ext *src) | ||
147 | { | ||
148 | uprint("flogn\n"); | ||
149 | |||
150 | fp_monadic_check(dest, src); | ||
151 | |||
152 | return dest; | ||
153 | } | ||
154 | |||
155 | struct fp_ext * | ||
156 | fp_flognp1(struct fp_ext *dest, struct fp_ext *src) | ||
157 | { | ||
158 | uprint("flognp1\n"); | ||
159 | |||
160 | fp_monadic_check(dest, src); | ||
161 | |||
162 | return dest; | ||
163 | } | ||
164 | |||
165 | struct fp_ext * | ||
166 | fp_flog10(struct fp_ext *dest, struct fp_ext *src) | ||
167 | { | ||
168 | uprint("flog10\n"); | ||
169 | |||
170 | fp_monadic_check(dest, src); | ||
171 | |||
172 | return dest; | ||
173 | } | ||
174 | |||
175 | struct fp_ext * | ||
176 | fp_flog2(struct fp_ext *dest, struct fp_ext *src) | ||
177 | { | ||
178 | uprint("flog2\n"); | ||
179 | |||
180 | fp_monadic_check(dest, src); | ||
181 | |||
182 | return dest; | ||
183 | } | ||
184 | |||
185 | struct fp_ext * | ||
186 | fp_fgetexp(struct fp_ext *dest, struct fp_ext *src) | ||
187 | { | ||
188 | dprint(PINSTR, "fgetexp\n"); | ||
189 | |||
190 | fp_monadic_check(dest, src); | ||
191 | |||
192 | if (IS_INF(dest)) { | ||
193 | fp_set_nan(dest); | ||
194 | return dest; | ||
195 | } | ||
196 | if (IS_ZERO(dest)) | ||
197 | return dest; | ||
198 | |||
199 | fp_conv_long2ext(dest, (int)dest->exp - 0x3FFF); | ||
200 | |||
201 | fp_normalize_ext(dest); | ||
202 | |||
203 | return dest; | ||
204 | } | ||
205 | |||
206 | struct fp_ext * | ||
207 | fp_fgetman(struct fp_ext *dest, struct fp_ext *src) | ||
208 | { | ||
209 | dprint(PINSTR, "fgetman\n"); | ||
210 | |||
211 | fp_monadic_check(dest, src); | ||
212 | |||
213 | if (IS_ZERO(dest)) | ||
214 | return dest; | ||
215 | |||
216 | if (IS_INF(dest)) | ||
217 | return dest; | ||
218 | |||
219 | dest->exp = 0x3FFF; | ||
220 | |||
221 | return dest; | ||
222 | } | ||
223 | |||
diff --git a/arch/m68k/math-emu/fp_move.S b/arch/m68k/math-emu/fp_move.S new file mode 100644 index 000000000000..71bdf83ba61a --- /dev/null +++ b/arch/m68k/math-emu/fp_move.S | |||
@@ -0,0 +1,244 @@ | |||
1 | /* | ||
2 | * fp_move.S | ||
3 | * | ||
4 | * Copyright Roman Zippel, 1997. All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, and the entire permission notice in its entirety, | ||
11 | * including the disclaimer of warranties. | ||
12 | * 2. Redistributions in binary form must reproduce the above copyright | ||
13 | * notice, this list of conditions and the following disclaimer in the | ||
14 | * documentation and/or other materials provided with the distribution. | ||
15 | * 3. The name of the author may not be used to endorse or promote | ||
16 | * products derived from this software without specific prior | ||
17 | * written permission. | ||
18 | * | ||
19 | * ALTERNATIVELY, this product may be distributed under the terms of | ||
20 | * the GNU General Public License, in which case the provisions of the GPL are | ||
21 | * required INSTEAD OF the above restrictions. (This clause is | ||
22 | * necessary due to a potential bad interaction between the GPL and | ||
23 | * the restrictions contained in a BSD-style copyright.) | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
27 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
28 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | ||
29 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
33 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
35 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
36 | */ | ||
37 | |||
38 | #include "fp_emu.h" | ||
39 | #include "fp_decode.h" | ||
40 | |||
41 | do_no_pc_mode=1 | ||
42 | |||
43 | .globl fp_fmove_fp2mem | ||
44 | |||
45 | fp_fmove_fp2mem: | ||
46 | clr.b (2+FPD_FPSR,FPDATA) | ||
47 | fp_decode_dest_format | ||
48 | move.w %d0,%d1 | store data size twice in %d1 | ||
49 | swap %d1 | one can be trashed below | ||
50 | move.w %d0,%d1 | ||
51 | #ifdef FPU_EMU_DEBUG | ||
52 | lea 0f,%a0 | ||
53 | clr.l %d0 | ||
54 | move.b (%a0,%d1.w),%d0 | ||
55 | printf PDECODE,"fmove.%c ",1,%d0 | ||
56 | fp_decode_src_reg | ||
57 | printf PDECODE,"fp%d,",1,%d0 | ||
58 | |||
59 | .data | ||
60 | 0: .byte 'l','s','x','p','w','d','b','p' | ||
61 | .previous | ||
62 | #endif | ||
63 | |||
64 | | encode addressing mode for dest | ||
65 | fp_decode_addr_mode | ||
66 | |||
67 | .long fp_data, fp_ill | ||
68 | .long fp_indirect, fp_postinc | ||
69 | .long fp_predecr, fp_disp16 | ||
70 | .long fp_extmode0, fp_extmode1 | ||
71 | |||
72 | | addressing mode: data register direct | ||
73 | fp_data: | ||
74 | fp_mode_data_direct | ||
75 | move.w %d0,%d1 | ||
76 | fp_decode_src_reg | ||
77 | fp_get_fp_reg | ||
78 | lea (FPD_TEMPFP1,FPDATA),%a1 | ||
79 | move.l (%a0)+,(%a1)+ | ||
80 | move.l (%a0)+,(%a1)+ | ||
81 | move.l (%a0),(%a1) | ||
82 | lea (-8,%a1),%a0 | ||
83 | swap %d1 | ||
84 | move.l %d1,%d2 | ||
85 | printf PDECODE,"\n" | ||
86 | jmp ([0f:w,%pc,%d1.w*4]) | ||
87 | |||
88 | .align 4 | ||
89 | 0: | ||
90 | .long fp_data_long, fp_data_single | ||
91 | .long fp_ill, fp_ill | ||
92 | .long fp_data_word, fp_ill | ||
93 | .long fp_data_byte, fp_ill | ||
94 | |||
95 | fp_data_byte: | ||
96 | jsr fp_normalize_ext | ||
97 | jsr fp_conv_ext2byte | ||
98 | move.l %d0,%d1 | ||
99 | swap %d2 | ||
100 | move.w %d2,%d0 | ||
101 | jsr fp_get_data_reg | ||
102 | move.b %d1,%d0 | ||
103 | move.w %d2,%d1 | ||
104 | jsr fp_put_data_reg | ||
105 | jra fp_final | ||
106 | |||
107 | fp_data_word: | ||
108 | jsr fp_normalize_ext | ||
109 | jsr fp_conv_ext2short | ||
110 | move.l %d0,%d1 | ||
111 | swap %d2 | ||
112 | move.w %d2,%d0 | ||
113 | jsr fp_get_data_reg | ||
114 | move.w %d1,%d0 | ||
115 | move.l %d2,%d1 | ||
116 | jsr fp_put_data_reg | ||
117 | jra fp_final | ||
118 | |||
119 | fp_data_long: | ||
120 | jsr fp_normalize_ext | ||
121 | jsr fp_conv_ext2long | ||
122 | swap %d2 | ||
123 | move.w %d2,%d1 | ||
124 | jsr fp_put_data_reg | ||
125 | jra fp_final | ||
126 | |||
127 | fp_data_single: | ||
128 | jsr fp_normalize_ext | ||
129 | jsr fp_conv_ext2single | ||
130 | swap %d2 | ||
131 | move.w %d2,%d1 | ||
132 | jsr fp_put_data_reg | ||
133 | jra fp_final | ||
134 | |||
135 | | addressing mode: address register indirect | ||
136 | fp_indirect: | ||
137 | fp_mode_addr_indirect | ||
138 | jra fp_putdest | ||
139 | |||
140 | | addressing mode: address register indirect with postincrement | ||
141 | fp_postinc: | ||
142 | fp_mode_addr_indirect_postinc | ||
143 | jra fp_putdest | ||
144 | |||
145 | | addressing mode: address register indirect with predecrement | ||
146 | fp_predecr: | ||
147 | fp_mode_addr_indirect_predec | ||
148 | jra fp_putdest | ||
149 | |||
150 | | addressing mode: address register indirect with 16bit displacement | ||
151 | fp_disp16: | ||
152 | fp_mode_addr_indirect_disp16 | ||
153 | jra fp_putdest | ||
154 | |||
155 | fp_extmode0: | ||
156 | fp_mode_addr_indirect_extmode0 | ||
157 | jra fp_putdest | ||
158 | |||
159 | fp_extmode1: | ||
160 | fp_decode_addr_reg | ||
161 | jmp ([0f:w,%pc,%d0*4]) | ||
162 | |||
163 | .align 4 | ||
164 | 0: | ||
165 | .long fp_abs_short, fp_abs_long | ||
166 | .long fp_ill, fp_ill | ||
167 | .long fp_ill, fp_ill | ||
168 | .long fp_ill, fp_ill | ||
169 | |||
170 | fp_abs_short: | ||
171 | fp_mode_abs_short | ||
172 | jra fp_putdest | ||
173 | |||
174 | fp_abs_long: | ||
175 | fp_mode_abs_long | ||
176 | jra fp_putdest | ||
177 | |||
178 | fp_putdest: | ||
179 | move.l %a0,%a1 | ||
180 | fp_decode_src_reg | ||
181 | move.l %d1,%d2 | save size | ||
182 | fp_get_fp_reg | ||
183 | printf PDECODE,"\n" | ||
184 | addq.l #8,%a0 | ||
185 | move.l (%a0),-(%sp) | ||
186 | move.l -(%a0),-(%sp) | ||
187 | move.l -(%a0),-(%sp) | ||
188 | move.l %sp,%a0 | ||
189 | jsr fp_normalize_ext | ||
190 | |||
191 | swap %d2 | ||
192 | jmp ([0f:w,%pc,%d2.w*4]) | ||
193 | |||
194 | .align 4 | ||
195 | 0: | ||
196 | .long fp_format_long, fp_format_single | ||
197 | .long fp_format_extended, fp_format_packed | ||
198 | .long fp_format_word, fp_format_double | ||
199 | .long fp_format_byte, fp_format_packed | ||
200 | |||
201 | fp_format_long: | ||
202 | jsr fp_conv_ext2long | ||
203 | putuser.l %d0,(%a1),fp_err_ua1,%a1 | ||
204 | jra fp_finish_move | ||
205 | |||
206 | fp_format_single: | ||
207 | jsr fp_conv_ext2single | ||
208 | putuser.l %d0,(%a1),fp_err_ua1,%a1 | ||
209 | jra fp_finish_move | ||
210 | |||
211 | fp_format_extended: | ||
212 | move.l (%a0)+,%d0 | ||
213 | lsl.w #1,%d0 | ||
214 | lsl.l #7,%d0 | ||
215 | lsl.l #8,%d0 | ||
216 | putuser.l %d0,(%a1)+,fp_err_ua1,%a1 | ||
217 | move.l (%a0)+,%d0 | ||
218 | putuser.l %d0,(%a1)+,fp_err_ua1,%a1 | ||
219 | move.l (%a0),%d0 | ||
220 | putuser.l %d0,(%a1),fp_err_ua1,%a1 | ||
221 | jra fp_finish_move | ||
222 | |||
223 | fp_format_packed: | ||
224 | /* not supported yet */ | ||
225 | lea (12,%sp),%sp | ||
226 | jra fp_ill | ||
227 | |||
228 | fp_format_word: | ||
229 | jsr fp_conv_ext2short | ||
230 | putuser.w %d0,(%a1),fp_err_ua1,%a1 | ||
231 | jra fp_finish_move | ||
232 | |||
233 | fp_format_double: | ||
234 | jsr fp_conv_ext2double | ||
235 | jra fp_finish_move | ||
236 | |||
237 | fp_format_byte: | ||
238 | jsr fp_conv_ext2byte | ||
239 | putuser.b %d0,(%a1),fp_err_ua1,%a1 | ||
240 | | jra fp_finish_move | ||
241 | |||
242 | fp_finish_move: | ||
243 | lea (12,%sp),%sp | ||
244 | jra fp_final | ||
diff --git a/arch/m68k/math-emu/fp_movem.S b/arch/m68k/math-emu/fp_movem.S new file mode 100644 index 000000000000..8354d39e6c47 --- /dev/null +++ b/arch/m68k/math-emu/fp_movem.S | |||
@@ -0,0 +1,368 @@ | |||
1 | /* | ||
2 | * fp_movem.S | ||
3 | * | ||
4 | * Copyright Roman Zippel, 1997. All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, and the entire permission notice in its entirety, | ||
11 | * including the disclaimer of warranties. | ||
12 | * 2. Redistributions in binary form must reproduce the above copyright | ||
13 | * notice, this list of conditions and the following disclaimer in the | ||
14 | * documentation and/or other materials provided with the distribution. | ||
15 | * 3. The name of the author may not be used to endorse or promote | ||
16 | * products derived from this software without specific prior | ||
17 | * written permission. | ||
18 | * | ||
19 | * ALTERNATIVELY, this product may be distributed under the terms of | ||
20 | * the GNU General Public License, in which case the provisions of the GPL are | ||
21 | * required INSTEAD OF the above restrictions. (This clause is | ||
22 | * necessary due to a potential bad interaction between the GPL and | ||
23 | * the restrictions contained in a BSD-style copyright.) | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
27 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
28 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | ||
29 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
33 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
35 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
36 | */ | ||
37 | |||
38 | #include "fp_emu.h" | ||
39 | #include "fp_decode.h" | ||
40 | |||
41 | | set flags for decode macros for fmovem | ||
42 | do_fmovem=1 | ||
43 | |||
44 | .globl fp_fmovem_fp, fp_fmovem_cr | ||
45 | |||
46 | | %d1 contains the mask and count of the register list | ||
47 | | for other register usage see fp_decode.h | ||
48 | |||
49 | fp_fmovem_fp: | ||
50 | printf PDECODE,"fmovem.x " | ||
51 | | get register list and count them | ||
52 | btst #11,%d2 | ||
53 | jne 1f | ||
54 | bfextu %d2{#24,#8},%d0 | static register list | ||
55 | jra 2f | ||
56 | 1: bfextu %d2{#25,#3},%d0 | dynamic register list | ||
57 | jsr fp_get_data_reg | ||
58 | 2: move.l %d0,%d1 | ||
59 | swap %d1 | ||
60 | jra 2f | ||
61 | 1: addq.w #1,%d1 | count the # of registers in | ||
62 | 2: lsr.b #1,%d0 | register list and keep it in %d1 | ||
63 | jcs 1b | ||
64 | jne 2b | ||
65 | printf PDECODE,"#%08x",1,%d1 | ||
66 | #ifdef FPU_EMU_DEBUG | ||
67 | btst #12,%d2 | ||
68 | jne 1f | ||
69 | printf PDECODE,"-" | decremental move | ||
70 | jra 2f | ||
71 | 1: printf PDECODE,"+" | incremental move | ||
72 | 2: btst #13,%d2 | ||
73 | jeq 1f | ||
74 | printf PDECODE,"->" | fpu -> cpu | ||
75 | jra 2f | ||
76 | 1: printf PDECODE,"<-" | fpu <- cpu | ||
77 | 2: | ||
78 | #endif | ||
79 | |||
80 | | decode address mode | ||
81 | fp_decode_addr_mode | ||
82 | |||
83 | .long fp_ill, fp_ill | ||
84 | .long fpr_indirect, fpr_postinc | ||
85 | .long fpr_predecr, fpr_disp16 | ||
86 | .long fpr_extmode0, fpr_extmode1 | ||
87 | |||
88 | | addressing mode: address register indirect | ||
89 | fpr_indirect: | ||
90 | fp_mode_addr_indirect | ||
91 | jra fpr_do_movem | ||
92 | |||
93 | | addressing mode: address register indirect with postincrement | ||
94 | fpr_postinc: | ||
95 | fp_mode_addr_indirect_postinc | ||
96 | jra fpr_do_movem | ||
97 | |||
98 | fpr_predecr: | ||
99 | fp_mode_addr_indirect_predec | ||
100 | jra fpr_do_movem | ||
101 | |||
102 | | addressing mode: address register/programm counter indirect | ||
103 | | with 16bit displacement | ||
104 | fpr_disp16: | ||
105 | fp_mode_addr_indirect_disp16 | ||
106 | jra fpr_do_movem | ||
107 | |||
108 | fpr_extmode0: | ||
109 | fp_mode_addr_indirect_extmode0 | ||
110 | jra fpr_do_movem | ||
111 | |||
112 | fpr_extmode1: | ||
113 | fp_decode_addr_reg | ||
114 | jmp ([0f:w,%pc,%d0*4]) | ||
115 | |||
116 | .align 4 | ||
117 | 0: | ||
118 | .long fpr_absolute_short, fpr_absolute_long | ||
119 | .long fpr_disp16, fpr_extmode0 | ||
120 | .long fp_ill, fp_ill | ||
121 | .long fp_ill, fp_ill | ||
122 | |||
123 | fpr_absolute_short: | ||
124 | fp_mode_abs_short | ||
125 | jra fpr_do_movem | ||
126 | |||
127 | fpr_absolute_long: | ||
128 | fp_mode_abs_long | ||
129 | | jra fpr_do_movem | ||
130 | |||
131 | fpr_do_movem: | ||
132 | swap %d1 | get fpu register list | ||
133 | lea (FPD_FPREG,FPDATA),%a1 | ||
134 | moveq #12,%d0 | ||
135 | btst #12,%d2 | ||
136 | jne 1f | ||
137 | lea (-12,%a1,%d0*8),%a1 | ||
138 | neg.l %d0 | ||
139 | 1: btst #13,%d2 | ||
140 | jne 4f | ||
141 | | move register from memory into fpu | ||
142 | jra 3f | ||
143 | 1: printf PMOVEM,"(%p>%p)",2,%a0,%a1 | ||
144 | getuser.l (%a0)+,%d2,fp_err_ua1,%a0 | ||
145 | lsr.l #8,%d2 | ||
146 | lsr.l #7,%d2 | ||
147 | lsr.w #1,%d2 | ||
148 | move.l %d2,(%a1)+ | ||
149 | getuser.l (%a0)+,%d2,fp_err_ua1,%a0 | ||
150 | move.l %d2,(%a1)+ | ||
151 | getuser.l (%a0),%d2,fp_err_ua1,%a0 | ||
152 | move.l %d2,(%a1) | ||
153 | subq.l #8,%a0 | ||
154 | subq.l #8,%a1 | ||
155 | add.l %d0,%a0 | ||
156 | 2: add.l %d0,%a1 | ||
157 | 3: lsl.b #1,%d1 | ||
158 | jcs 1b | ||
159 | jne 2b | ||
160 | jra 5f | ||
161 | | move register from fpu into memory | ||
162 | 1: printf PMOVEM,"(%p>%p)",2,%a1,%a0 | ||
163 | move.l (%a1)+,%d2 | ||
164 | lsl.w #1,%d2 | ||
165 | lsl.l #7,%d2 | ||
166 | lsl.l #8,%d2 | ||
167 | putuser.l %d2,(%a0)+,fp_err_ua1,%a0 | ||
168 | move.l (%a1)+,%d2 | ||
169 | putuser.l %d2,(%a0)+,fp_err_ua1,%a0 | ||
170 | move.l (%a1),%d2 | ||
171 | putuser.l %d2,(%a0),fp_err_ua1,%a0 | ||
172 | subq.l #8,%a1 | ||
173 | subq.l #8,%a0 | ||
174 | add.l %d0,%a0 | ||
175 | 2: add.l %d0,%a1 | ||
176 | 4: lsl.b #1,%d1 | ||
177 | jcs 1b | ||
178 | jne 2b | ||
179 | 5: | ||
180 | printf PDECODE,"\n" | ||
181 | #if 0 | ||
182 | lea (FPD_FPREG,FPDATA),%a0 | ||
183 | printf PMOVEM,"fp:" | ||
184 | printx PMOVEM,%a0@(0) | ||
185 | printx PMOVEM,%a0@(12) | ||
186 | printf PMOVEM,"\n " | ||
187 | printx PMOVEM,%a0@(24) | ||
188 | printx PMOVEM,%a0@(36) | ||
189 | printf PMOVEM,"\n " | ||
190 | printx PMOVEM,%a0@(48) | ||
191 | printx PMOVEM,%a0@(60) | ||
192 | printf PMOVEM,"\n " | ||
193 | printx PMOVEM,%a0@(72) | ||
194 | printx PMOVEM,%a0@(84) | ||
195 | printf PMOVEM,"\n" | ||
196 | #endif | ||
197 | jra fp_end | ||
198 | |||
199 | | set flags for decode macros for fmovem control register | ||
200 | do_fmovem=1 | ||
201 | do_fmovem_cr=1 | ||
202 | |||
203 | fp_fmovem_cr: | ||
204 | printf PDECODE,"fmovem.cr " | ||
205 | | get register list and count them | ||
206 | bfextu %d2{#19,#3},%d0 | ||
207 | move.l %d0,%d1 | ||
208 | swap %d1 | ||
209 | jra 2f | ||
210 | 1: addq.w #1,%d1 | ||
211 | 2: lsr.l #1,%d0 | ||
212 | jcs 1b | ||
213 | jne 2b | ||
214 | printf PDECODE,"#%08x",1,%d1 | ||
215 | #ifdef FPU_EMU_DEBUG | ||
216 | btst #13,%d2 | ||
217 | jeq 1f | ||
218 | printf PDECODE,"->" | fpu -> cpu | ||
219 | jra 2f | ||
220 | 1: printf PDECODE,"<-" | fpu <- cpu | ||
221 | 2: | ||
222 | #endif | ||
223 | |||
224 | | decode address mode | ||
225 | fp_decode_addr_mode | ||
226 | |||
227 | .long fpc_data, fpc_addr | ||
228 | .long fpc_indirect, fpc_postinc | ||
229 | .long fpc_predecr, fpc_disp16 | ||
230 | .long fpc_extmode0, fpc_extmode1 | ||
231 | |||
232 | fpc_data: | ||
233 | fp_mode_data_direct | ||
234 | move.w %d0,%d1 | ||
235 | bfffo %d2{#19,#3},%d0 | ||
236 | sub.w #19,%d0 | ||
237 | lea (FPD_FPCR,FPDATA,%d0.w*4),%a1 | ||
238 | btst #13,%d2 | ||
239 | jne 1f | ||
240 | move.w %d1,%d0 | ||
241 | jsr fp_get_data_reg | ||
242 | move.l %d0,(%a1) | ||
243 | jra fpc_movem_fin | ||
244 | 1: move.l (%a1),%d0 | ||
245 | jsr fp_put_data_reg | ||
246 | jra fpc_movem_fin | ||
247 | |||
248 | fpc_addr: | ||
249 | fp_decode_addr_reg | ||
250 | printf PDECODE,"a%d",1,%d0 | ||
251 | btst #13,%d2 | ||
252 | jne 1f | ||
253 | jsr fp_get_addr_reg | ||
254 | move.l %a0,(FPD_FPIAR,FPDATA) | ||
255 | jra fpc_movem_fin | ||
256 | 1: move.l (FPD_FPIAR,FPDATA),%a0 | ||
257 | jsr fp_put_addr_reg | ||
258 | jra fpc_movem_fin | ||
259 | |||
260 | fpc_indirect: | ||
261 | fp_mode_addr_indirect | ||
262 | jra fpc_do_movem | ||
263 | |||
264 | fpc_postinc: | ||
265 | fp_mode_addr_indirect_postinc | ||
266 | jra fpc_do_movem | ||
267 | |||
268 | fpc_predecr: | ||
269 | fp_mode_addr_indirect_predec | ||
270 | jra fpc_do_movem | ||
271 | |||
272 | fpc_disp16: | ||
273 | fp_mode_addr_indirect_disp16 | ||
274 | jra fpc_do_movem | ||
275 | |||
276 | fpc_extmode0: | ||
277 | fp_mode_addr_indirect_extmode0 | ||
278 | jra fpc_do_movem | ||
279 | |||
280 | fpc_extmode1: | ||
281 | fp_decode_addr_reg | ||
282 | jmp ([0f:w,%pc,%d0*4]) | ||
283 | |||
284 | .align 4 | ||
285 | 0: | ||
286 | .long fpc_absolute_short, fpc_absolute_long | ||
287 | .long fpc_disp16, fpc_extmode0 | ||
288 | .long fpc_immediate, fp_ill | ||
289 | .long fp_ill, fp_ill | ||
290 | |||
291 | fpc_absolute_short: | ||
292 | fp_mode_abs_short | ||
293 | jra fpc_do_movem | ||
294 | |||
295 | fpc_absolute_long: | ||
296 | fp_mode_abs_long | ||
297 | jra fpc_do_movem | ||
298 | |||
299 | fpc_immediate: | ||
300 | fp_get_pc %a0 | ||
301 | lea (%a0,%d1.w*4),%a1 | ||
302 | fp_put_pc %a1 | ||
303 | printf PDECODE,"#imm" | ||
304 | | jra fpc_do_movem | ||
305 | #if 0 | ||
306 | swap %d1 | ||
307 | lsl.l #5,%d1 | ||
308 | lea (FPD_FPCR,FPDATA),%a0 | ||
309 | jra 3f | ||
310 | 1: move.l %d0,(%a0) | ||
311 | 2: addq.l #4,%a0 | ||
312 | 3: lsl.b #1,%d1 | ||
313 | jcs 1b | ||
314 | jne 2b | ||
315 | jra fpc_movem_fin | ||
316 | #endif | ||
317 | |||
318 | fpc_do_movem: | ||
319 | swap %d1 | get fpu register list | ||
320 | lsl.l #5,%d1 | ||
321 | lea (FPD_FPCR,FPDATA),%a1 | ||
322 | 1: btst #13,%d2 | ||
323 | jne 4f | ||
324 | |||
325 | | move register from memory into fpu | ||
326 | jra 3f | ||
327 | 1: printf PMOVEM,"(%p>%p)",2,%a0,%a1 | ||
328 | getuser.l (%a0)+,%d0,fp_err_ua1,%a0 | ||
329 | move.l %d0,(%a1) | ||
330 | 2: addq.l #4,%a1 | ||
331 | 3: lsl.b #1,%d1 | ||
332 | jcs 1b | ||
333 | jne 2b | ||
334 | jra fpc_movem_fin | ||
335 | |||
336 | | move register from fpu into memory | ||
337 | 1: printf PMOVEM,"(%p>%p)",2,%a1,%a0 | ||
338 | move.l (%a1),%d0 | ||
339 | putuser.l %d0,(%a0)+,fp_err_ua1,%a0 | ||
340 | 2: addq.l #4,%a1 | ||
341 | 4: lsl.b #1,%d1 | ||
342 | jcs 1b | ||
343 | jne 2b | ||
344 | |||
345 | fpc_movem_fin: | ||
346 | and.l #0x0000fff0,(FPD_FPCR,FPDATA) | ||
347 | and.l #0x0ffffff8,(FPD_FPSR,FPDATA) | ||
348 | move.l (FPD_FPCR,FPDATA),%d0 | ||
349 | lsr.l #4,%d0 | ||
350 | moveq #3,%d1 | ||
351 | and.l %d0,%d1 | ||
352 | move.w %d1,(FPD_RND,FPDATA) | ||
353 | lsr.l #2,%d0 | ||
354 | moveq #3,%d1 | ||
355 | and.l %d0,%d1 | ||
356 | move.w %d1,(FPD_PREC,FPDATA) | ||
357 | printf PDECODE,"\n" | ||
358 | #if 0 | ||
359 | printf PMOVEM,"fpcr : %08x\n",1,FPDATA@(FPD_FPCR) | ||
360 | printf PMOVEM,"fpsr : %08x\n",1,FPDATA@(FPD_FPSR) | ||
361 | printf PMOVEM,"fpiar: %08x\n",1,FPDATA@(FPD_FPIAR) | ||
362 | clr.l %d0 | ||
363 | move.w (FPD_PREC,FPDATA),%d0 | ||
364 | printf PMOVEM,"prec : %04x\n",1,%d0 | ||
365 | move.w (FPD_RND,FPDATA),%d0 | ||
366 | printf PMOVEM,"rnd : %04x\n",1,%d0 | ||
367 | #endif | ||
368 | jra fp_end | ||
diff --git a/arch/m68k/math-emu/fp_scan.S b/arch/m68k/math-emu/fp_scan.S new file mode 100644 index 000000000000..e4146ed574db --- /dev/null +++ b/arch/m68k/math-emu/fp_scan.S | |||
@@ -0,0 +1,478 @@ | |||
1 | /* | ||
2 | * fp_scan.S | ||
3 | * | ||
4 | * Copyright Roman Zippel, 1997. All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, and the entire permission notice in its entirety, | ||
11 | * including the disclaimer of warranties. | ||
12 | * 2. Redistributions in binary form must reproduce the above copyright | ||
13 | * notice, this list of conditions and the following disclaimer in the | ||
14 | * documentation and/or other materials provided with the distribution. | ||
15 | * 3. The name of the author may not be used to endorse or promote | ||
16 | * products derived from this software without specific prior | ||
17 | * written permission. | ||
18 | * | ||
19 | * ALTERNATIVELY, this product may be distributed under the terms of | ||
20 | * the GNU General Public License, in which case the provisions of the GPL are | ||
21 | * required INSTEAD OF the above restrictions. (This clause is | ||
22 | * necessary due to a potential bad interaction between the GPL and | ||
23 | * the restrictions contained in a BSD-style copyright.) | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
27 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
28 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | ||
29 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
33 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
35 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
36 | */ | ||
37 | |||
38 | #include "fp_emu.h" | ||
39 | #include "fp_decode.h" | ||
40 | |||
41 | .globl fp_scan, fp_datasize | ||
42 | |||
43 | .data | ||
44 | |||
45 | | %d2 - first two instr words | ||
46 | | %d1 - operand size | ||
47 | |||
48 | /* operand formats are: | ||
49 | |||
50 | Long = 0, i.e. fmove.l | ||
51 | Single, i.e. fmove.s | ||
52 | Extended, i.e. fmove.x | ||
53 | Packed-BCD, i.e. fmove.p | ||
54 | Word, i.e. fmove.w | ||
55 | Double, i.e. fmove.d | ||
56 | */ | ||
57 | |||
58 | .text | ||
59 | |||
60 | | On entry: | ||
61 | | FPDATA - base of emulated FPU registers | ||
62 | |||
63 | fp_scan: | ||
64 | | normal fpu instruction? (this excludes fsave/frestore) | ||
65 | fp_get_pc %a0 | ||
66 | printf PDECODE,"%08x: ",1,%a0 | ||
67 | getuser.b (%a0),%d0,fp_err_ua1,%a0 | ||
68 | #if 1 | ||
69 | cmp.b #0xf2,%d0 | cpid = 1 | ||
70 | #else | ||
71 | cmp.b #0xfc,%d0 | cpid = 6 | ||
72 | #endif | ||
73 | jne fp_nonstd | ||
74 | | first two instruction words are kept in %d2 | ||
75 | getuser.l (%a0)+,%d2,fp_err_ua1,%a0 | ||
76 | fp_put_pc %a0 | ||
77 | fp_decode_cond: | separate conditional instr | ||
78 | fp_decode_cond_instr_type | ||
79 | |||
80 | .long fp_decode_move, fp_fscc | ||
81 | .long fp_fbccw, fp_fbccl | ||
82 | |||
83 | fp_decode_move: | separate move instr | ||
84 | fp_decode_move_instr_type | ||
85 | |||
86 | .long fp_fgen_fp, fp_ill | ||
87 | .long fp_fgen_ea, fp_fmove_fp2mem | ||
88 | .long fp_fmovem_cr, fp_fmovem_cr | ||
89 | .long fp_fmovem_fp, fp_fmovem_fp | ||
90 | |||
91 | | now all arithmetic instr and a few move instr are left | ||
92 | fp_fgen_fp: | source is a fpu register | ||
93 | clr.b (FPD_FPSR+2,FPDATA) | clear the exception byte | ||
94 | fp_decode_sourcespec | ||
95 | printf PDECODE,"f<op>.x fp%d",1,%d0 | ||
96 | fp_get_fp_reg | ||
97 | lea (FPD_TEMPFP1,FPDATA),%a1 | copy src into a temp location | ||
98 | move.l (%a0)+,(%a1)+ | ||
99 | move.l (%a0)+,(%a1)+ | ||
100 | move.l (%a0),(%a1) | ||
101 | lea (-8,%a1),%a0 | ||
102 | jra fp_getdest | ||
103 | |||
104 | fp_fgen_ea: | source is <ea> | ||
105 | clr.b (FPD_FPSR+2,FPDATA) | clear the exception byte | ||
106 | | sort out fmovecr, keep data size in %d1 | ||
107 | fp_decode_sourcespec | ||
108 | cmp.w #7,%d0 | ||
109 | jeq fp_fmovecr | ||
110 | move.w %d0,%d1 | store data size twice in %d1 | ||
111 | swap %d1 | one can be trashed below | ||
112 | move.w %d0,%d1 | ||
113 | #ifdef FPU_EMU_DEBUG | ||
114 | lea 0f,%a0 | ||
115 | clr.l %d0 | ||
116 | move.b (%a0,%d1.w),%d0 | ||
117 | printf PDECODE,"f<op>.%c ",1,%d0 | ||
118 | |||
119 | .data | ||
120 | 0: .byte 'l','s','x','p','w','d','b',0 | ||
121 | .previous | ||
122 | #endif | ||
123 | |||
124 | /* | ||
125 | fp_getsource, fp_getdest | ||
126 | |||
127 | basically, we end up with a pointer to the source operand in | ||
128 | %a1, and a pointer to the destination operand in %a0. both | ||
129 | are, of course, 96-bit extended floating point numbers. | ||
130 | */ | ||
131 | |||
132 | fp_getsource: | ||
133 | | decode addressing mode for source | ||
134 | fp_decode_addr_mode | ||
135 | |||
136 | .long fp_data, fp_ill | ||
137 | .long fp_indirect, fp_postinc | ||
138 | .long fp_predecr, fp_disp16 | ||
139 | .long fp_extmode0, fp_extmode1 | ||
140 | |||
141 | | addressing mode: data register direct | ||
142 | fp_data: | ||
143 | fp_mode_data_direct | ||
144 | jsr fp_get_data_reg | ||
145 | lea (FPD_TEMPFP1,FPDATA),%a0 | ||
146 | jmp ([0f:w,%pc,%d1.w*4]) | ||
147 | |||
148 | .align 4 | ||
149 | 0: | ||
150 | .long fp_data_long, fp_data_single | ||
151 | .long fp_ill, fp_ill | ||
152 | .long fp_data_word, fp_ill | ||
153 | .long fp_data_byte, fp_ill | ||
154 | |||
155 | | data types that fit in an integer data register | ||
156 | fp_data_byte: | ||
157 | extb.l %d0 | ||
158 | jra fp_data_long | ||
159 | |||
160 | fp_data_word: | ||
161 | ext.l %d0 | ||
162 | |||
163 | fp_data_long: | ||
164 | jsr fp_conv_long2ext | ||
165 | jra fp_getdest | ||
166 | |||
167 | fp_data_single: | ||
168 | jsr fp_conv_single2ext | ||
169 | jra fp_getdest | ||
170 | |||
171 | | addressing mode: address register indirect | ||
172 | fp_indirect: | ||
173 | fp_mode_addr_indirect | ||
174 | jra fp_fetchsource | ||
175 | |||
176 | | addressing mode: address register indirect with postincrement | ||
177 | fp_postinc: | ||
178 | fp_mode_addr_indirect_postinc | ||
179 | jra fp_fetchsource | ||
180 | |||
181 | | addressing mode: address register indirect with predecrement | ||
182 | fp_predecr: | ||
183 | fp_mode_addr_indirect_predec | ||
184 | jra fp_fetchsource | ||
185 | |||
186 | | addressing mode: address register/programm counter indirect | ||
187 | | with 16bit displacement | ||
188 | fp_disp16: | ||
189 | fp_mode_addr_indirect_disp16 | ||
190 | jra fp_fetchsource | ||
191 | |||
192 | | all other indirect addressing modes will finally end up here | ||
193 | fp_extmode0: | ||
194 | fp_mode_addr_indirect_extmode0 | ||
195 | jra fp_fetchsource | ||
196 | |||
197 | | all pc relative addressing modes and immediate/absolute modes end up here | ||
198 | | the first ones are sent to fp_extmode0 or fp_disp16 | ||
199 | | and only the latter are handled here | ||
200 | fp_extmode1: | ||
201 | fp_decode_addr_reg | ||
202 | jmp ([0f:w,%pc,%d0*4]) | ||
203 | |||
204 | .align 4 | ||
205 | 0: | ||
206 | .long fp_abs_short, fp_abs_long | ||
207 | .long fp_disp16, fp_extmode0 | ||
208 | .long fp_immediate, fp_ill | ||
209 | .long fp_ill, fp_ill | ||
210 | |||
211 | | addressing mode: absolute short | ||
212 | fp_abs_short: | ||
213 | fp_mode_abs_short | ||
214 | jra fp_fetchsource | ||
215 | |||
216 | | addressing mode: absolute long | ||
217 | fp_abs_long: | ||
218 | fp_mode_abs_long | ||
219 | jra fp_fetchsource | ||
220 | |||
221 | | addressing mode: immediate data | ||
222 | fp_immediate: | ||
223 | printf PDECODE,"#" | ||
224 | fp_get_pc %a0 | ||
225 | move.w (fp_datasize,%d1.w*2),%d0 | ||
226 | addq.w #1,%d0 | ||
227 | and.w #-2,%d0 | ||
228 | #ifdef FPU_EMU_DEBUG | ||
229 | movem.l %d0/%d1,-(%sp) | ||
230 | movel %a0,%a1 | ||
231 | clr.l %d1 | ||
232 | jra 2f | ||
233 | 1: getuser.b (%a1)+,%d1,fp_err_ua1,%a1 | ||
234 | printf PDECODE,"%02x",1,%d1 | ||
235 | 2: dbra %d0,1b | ||
236 | movem.l (%sp)+,%d0/%d1 | ||
237 | #endif | ||
238 | lea (%a0,%d0.w),%a1 | ||
239 | fp_put_pc %a1 | ||
240 | | jra fp_fetchsource | ||
241 | |||
242 | fp_fetchsource: | ||
243 | move.l %a0,%a1 | ||
244 | swap %d1 | ||
245 | lea (FPD_TEMPFP1,FPDATA),%a0 | ||
246 | jmp ([0f:w,%pc,%d1.w*4]) | ||
247 | |||
248 | .align 4 | ||
249 | 0: .long fp_long, fp_single | ||
250 | .long fp_ext, fp_pack | ||
251 | .long fp_word, fp_double | ||
252 | .long fp_byte, fp_ill | ||
253 | |||
254 | fp_long: | ||
255 | getuser.l (%a1),%d0,fp_err_ua1,%a1 | ||
256 | jsr fp_conv_long2ext | ||
257 | jra fp_getdest | ||
258 | |||
259 | fp_single: | ||
260 | getuser.l (%a1),%d0,fp_err_ua1,%a1 | ||
261 | jsr fp_conv_single2ext | ||
262 | jra fp_getdest | ||
263 | |||
264 | fp_ext: | ||
265 | getuser.l (%a1)+,%d0,fp_err_ua1,%a1 | ||
266 | lsr.l #8,%d0 | ||
267 | lsr.l #7,%d0 | ||
268 | lsr.w #1,%d0 | ||
269 | move.l %d0,(%a0)+ | ||
270 | getuser.l (%a1)+,%d0,fp_err_ua1,%a1 | ||
271 | move.l %d0,(%a0)+ | ||
272 | getuser.l (%a1),%d0,fp_err_ua1,%a1 | ||
273 | move.l %d0,(%a0) | ||
274 | subq.l #8,%a0 | ||
275 | jra fp_getdest | ||
276 | |||
277 | fp_pack: | ||
278 | /* not supported yet */ | ||
279 | jra fp_ill | ||
280 | |||
281 | fp_word: | ||
282 | getuser.w (%a1),%d0,fp_err_ua1,%a1 | ||
283 | ext.l %d0 | ||
284 | jsr fp_conv_long2ext | ||
285 | jra fp_getdest | ||
286 | |||
287 | fp_double: | ||
288 | jsr fp_conv_double2ext | ||
289 | jra fp_getdest | ||
290 | |||
291 | fp_byte: | ||
292 | getuser.b (%a1),%d0,fp_err_ua1,%a1 | ||
293 | extb.l %d0 | ||
294 | jsr fp_conv_long2ext | ||
295 | | jra fp_getdest | ||
296 | |||
297 | fp_getdest: | ||
298 | move.l %a0,%a1 | ||
299 | bfextu %d2{#22,#3},%d0 | ||
300 | printf PDECODE,",fp%d\n",1,%d0 | ||
301 | fp_get_fp_reg | ||
302 | movem.l %a0/%a1,-(%sp) | ||
303 | pea fp_finalrounding | ||
304 | bfextu %d2{#25,#7},%d0 | ||
305 | jmp ([0f:w,%pc,%d0*4]) | ||
306 | |||
307 | .align 4 | ||
308 | 0: | ||
309 | .long fp_fmove_mem2fp, fp_fint, fp_fsinh, fp_fintrz | ||
310 | .long fp_fsqrt, fp_ill, fp_flognp1, fp_ill | ||
311 | .long fp_fetoxm1, fp_ftanh, fp_fatan, fp_ill | ||
312 | .long fp_fasin, fp_fatanh, fp_fsin, fp_ftan | ||
313 | .long fp_fetox, fp_ftwotox, fp_ftentox, fp_ill | ||
314 | .long fp_flogn, fp_flog10, fp_flog2, fp_ill | ||
315 | .long fp_fabs, fp_fcosh, fp_fneg, fp_ill | ||
316 | .long fp_facos, fp_fcos, fp_fgetexp, fp_fgetman | ||
317 | .long fp_fdiv, fp_fmod, fp_fadd, fp_fmul | ||
318 | .long fpa_fsgldiv, fp_frem, fp_fscale, fpa_fsglmul | ||
319 | .long fp_fsub, fp_ill, fp_ill, fp_ill | ||
320 | .long fp_ill, fp_ill, fp_ill, fp_ill | ||
321 | .long fp_fsincos0, fp_fsincos1, fp_fsincos2, fp_fsincos3 | ||
322 | .long fp_fsincos4, fp_fsincos5, fp_fsincos6, fp_fsincos7 | ||
323 | .long fp_fcmp, fp_ill, fp_ftst, fp_ill | ||
324 | .long fp_ill, fp_ill, fp_ill, fp_ill | ||
325 | .long fp_fsmove, fp_fssqrt, fp_ill, fp_ill | ||
326 | .long fp_fdmove, fp_fdsqrt, fp_ill, fp_ill | ||
327 | .long fp_ill, fp_ill, fp_ill, fp_ill | ||
328 | .long fp_ill, fp_ill, fp_ill, fp_ill | ||
329 | .long fp_ill, fp_ill, fp_ill, fp_ill | ||
330 | .long fp_ill, fp_ill, fp_ill, fp_ill | ||
331 | .long fp_fsabs, fp_ill, fp_fsneg, fp_ill | ||
332 | .long fp_fdabs, fp_ill, fp_fdneg, fp_ill | ||
333 | .long fp_fsdiv, fp_ill, fp_fsadd, fp_fsmul | ||
334 | .long fp_fddiv, fp_ill, fp_fdadd, fp_fdmul | ||
335 | .long fp_fssub, fp_ill, fp_ill, fp_ill | ||
336 | .long fp_fdsub, fp_ill, fp_ill, fp_ill | ||
337 | .long fp_ill, fp_ill, fp_ill, fp_ill | ||
338 | .long fp_ill, fp_ill, fp_ill, fp_ill | ||
339 | .long fp_ill, fp_ill, fp_ill, fp_ill | ||
340 | .long fp_ill, fp_ill, fp_ill, fp_ill | ||
341 | |||
342 | | Instructions follow | ||
343 | |||
344 | | Move an (emulated) ROM constant | ||
345 | fp_fmovecr: | ||
346 | bfextu %d2{#27,#5},%d0 | ||
347 | printf PINSTR,"fp_fmovecr #%d",1,%d0 | ||
348 | move.l %d0,%d1 | ||
349 | add.l %d0,%d0 | ||
350 | add.l %d1,%d0 | ||
351 | lea (fp_constants,%d0*4),%a0 | ||
352 | move.l #0x801cc0ff,%d0 | ||
353 | addq.l #1,%d1 | ||
354 | lsl.l %d1,%d0 | ||
355 | jcc 1f | ||
356 | fp_set_sr FPSR_EXC_INEX2 | INEX2 exception | ||
357 | 1: moveq #-128,%d0 | continue with fmove | ||
358 | and.l %d0,%d2 | ||
359 | jra fp_getdest | ||
360 | |||
361 | .data | ||
362 | .align 4 | ||
363 | fp_constants: | ||
364 | .long 0x00004000,0xc90fdaa2,0x2168c235 | pi | ||
365 | .extend 0,0,0,0,0,0,0,0,0,0 | ||
366 | .long 0x00003ffd,0x9a209a84,0xfbcff798 | log10(2) | ||
367 | .long 0x00004000,0xadf85458,0xa2bb4a9a | e | ||
368 | .long 0x00003fff,0xb8aa3b29,0x5c17f0bc | log2(e) | ||
369 | .long 0x00003ffd,0xde5bd8a9,0x37287195 | log10(e) | ||
370 | .long 0x00000000,0x00000000,0x00000000 | 0.0 | ||
371 | .long 0x00003ffe,0xb17217f7,0xd1cf79ac | 1n(2) | ||
372 | .long 0x00004000,0x935d8ddd,0xaaa8ac17 | 1n(10) | ||
373 | | read this as "1.0 * 2^0" - note the high bit in the mantissa | ||
374 | .long 0x00003fff,0x80000000,0x00000000 | 10^0 | ||
375 | .long 0x00004002,0xa0000000,0x00000000 | 10^1 | ||
376 | .long 0x00004005,0xc8000000,0x00000000 | 10^2 | ||
377 | .long 0x0000400c,0x9c400000,0x00000000 | 10^4 | ||
378 | .long 0x00004019,0xbebc2000,0x00000000 | 10^8 | ||
379 | .long 0x00004034,0x8e1bc9bf,0x04000000 | 10^16 | ||
380 | .long 0x00004069,0x9dc5ada8,0x2b70b59e | 10^32 | ||
381 | .long 0x000040d3,0xc2781f49,0xffcfa6d5 | 10^64 | ||
382 | .long 0x000041a8,0x93ba47c9,0x80e98ce0 | 10^128 | ||
383 | .long 0x00004351,0xaa7eebfb,0x9df9de8e | 10^256 | ||
384 | .long 0x000046a3,0xe319a0ae,0xa60e91c7 | 10^512 | ||
385 | .long 0x00004d48,0xc9767586,0x81750c17 | 10^1024 | ||
386 | .long 0x00005a92,0x9e8b3b5d,0xc53d5de5 | 10^2048 | ||
387 | .long 0x00007525,0xc4605202,0x8a20979b | 10^4096 | ||
388 | .previous | ||
389 | |||
390 | fp_fmove_mem2fp: | ||
391 | printf PINSTR,"fmove %p,%p\n",2,%a0,%a1 | ||
392 | move.l (%a1)+,(%a0)+ | ||
393 | move.l (%a1)+,(%a0)+ | ||
394 | move.l (%a1),(%a0) | ||
395 | subq.l #8,%a0 | ||
396 | rts | ||
397 | |||
398 | fpa_fsglmul: | ||
399 | move.l #fp_finalrounding_single_fast,(%sp) | ||
400 | jra fp_fsglmul | ||
401 | |||
402 | fpa_fsgldiv: | ||
403 | move.l #fp_finalrounding_single_fast,(%sp) | ||
404 | jra fp_fsgldiv | ||
405 | |||
406 | .macro fp_dosingleprec instr | ||
407 | printf PINSTR,"single " | ||
408 | move.l #fp_finalrounding_single,(%sp) | ||
409 | jra \instr | ||
410 | .endm | ||
411 | |||
412 | .macro fp_dodoubleprec instr | ||
413 | printf PINSTR,"double " | ||
414 | move.l #fp_finalrounding_double,(%sp) | ||
415 | jra \instr | ||
416 | .endm | ||
417 | |||
418 | fp_fsmove: | ||
419 | fp_dosingleprec fp_fmove_mem2fp | ||
420 | |||
421 | fp_fssqrt: | ||
422 | fp_dosingleprec fp_fsqrt | ||
423 | |||
424 | fp_fdmove: | ||
425 | fp_dodoubleprec fp_fmove_mem2fp | ||
426 | |||
427 | fp_fdsqrt: | ||
428 | fp_dodoubleprec fp_fsqrt | ||
429 | |||
430 | fp_fsabs: | ||
431 | fp_dosingleprec fp_fabs | ||
432 | |||
433 | fp_fsneg: | ||
434 | fp_dosingleprec fp_fneg | ||
435 | |||
436 | fp_fdabs: | ||
437 | fp_dodoubleprec fp_fabs | ||
438 | |||
439 | fp_fdneg: | ||
440 | fp_dodoubleprec fp_fneg | ||
441 | |||
442 | fp_fsdiv: | ||
443 | fp_dosingleprec fp_fdiv | ||
444 | |||
445 | fp_fsadd: | ||
446 | fp_dosingleprec fp_fadd | ||
447 | |||
448 | fp_fsmul: | ||
449 | fp_dosingleprec fp_fmul | ||
450 | |||
451 | fp_fddiv: | ||
452 | fp_dodoubleprec fp_fdiv | ||
453 | |||
454 | fp_fdadd: | ||
455 | fp_dodoubleprec fp_fadd | ||
456 | |||
457 | fp_fdmul: | ||
458 | fp_dodoubleprec fp_fmul | ||
459 | |||
460 | fp_fssub: | ||
461 | fp_dosingleprec fp_fsub | ||
462 | |||
463 | fp_fdsub: | ||
464 | fp_dodoubleprec fp_fsub | ||
465 | |||
466 | fp_nonstd: | ||
467 | fp_get_pc %a0 | ||
468 | getuser.l (%a0),%d0,fp_err_ua1,%a0 | ||
469 | printf ,"nonstd ((%08x)=%08x)\n",2,%a0,%d0 | ||
470 | moveq #-1,%d0 | ||
471 | rts | ||
472 | |||
473 | .data | ||
474 | .align 4 | ||
475 | |||
476 | | data sizes corresponding to the operand formats | ||
477 | fp_datasize: | ||
478 | .word 4, 4, 12, 12, 2, 8, 1, 0 | ||
diff --git a/arch/m68k/math-emu/fp_trig.c b/arch/m68k/math-emu/fp_trig.c new file mode 100644 index 000000000000..6361d0784df2 --- /dev/null +++ b/arch/m68k/math-emu/fp_trig.c | |||
@@ -0,0 +1,183 @@ | |||
1 | /* | ||
2 | |||
3 | fp_trig.c: floating-point math routines for the Linux-m68k | ||
4 | floating point emulator. | ||
5 | |||
6 | Copyright (c) 1998-1999 David Huggins-Daines / Roman Zippel. | ||
7 | |||
8 | I hereby give permission, free of charge, to copy, modify, and | ||
9 | redistribute this software, in source or binary form, provided that | ||
10 | the above copyright notice and the following disclaimer are included | ||
11 | in all such copies. | ||
12 | |||
13 | THIS SOFTWARE IS PROVIDED "AS IS", WITH ABSOLUTELY NO WARRANTY, REAL | ||
14 | OR IMPLIED. | ||
15 | |||
16 | */ | ||
17 | |||
18 | #include "fp_emu.h" | ||
19 | #include "fp_trig.h" | ||
20 | |||
21 | struct fp_ext * | ||
22 | fp_fsin(struct fp_ext *dest, struct fp_ext *src) | ||
23 | { | ||
24 | uprint("fsin\n"); | ||
25 | |||
26 | fp_monadic_check(dest, src); | ||
27 | |||
28 | return dest; | ||
29 | } | ||
30 | |||
31 | struct fp_ext * | ||
32 | fp_fcos(struct fp_ext *dest, struct fp_ext *src) | ||
33 | { | ||
34 | uprint("fcos\n"); | ||
35 | |||
36 | fp_monadic_check(dest, src); | ||
37 | |||
38 | return dest; | ||
39 | } | ||
40 | |||
41 | struct fp_ext * | ||
42 | fp_ftan(struct fp_ext *dest, struct fp_ext *src) | ||
43 | { | ||
44 | uprint("ftan\n"); | ||
45 | |||
46 | fp_monadic_check(dest, src); | ||
47 | |||
48 | return dest; | ||
49 | } | ||
50 | |||
51 | struct fp_ext * | ||
52 | fp_fasin(struct fp_ext *dest, struct fp_ext *src) | ||
53 | { | ||
54 | uprint("fasin\n"); | ||
55 | |||
56 | fp_monadic_check(dest, src); | ||
57 | |||
58 | return dest; | ||
59 | } | ||
60 | |||
61 | struct fp_ext * | ||
62 | fp_facos(struct fp_ext *dest, struct fp_ext *src) | ||
63 | { | ||
64 | uprint("facos\n"); | ||
65 | |||
66 | fp_monadic_check(dest, src); | ||
67 | |||
68 | return dest; | ||
69 | } | ||
70 | |||
71 | struct fp_ext * | ||
72 | fp_fatan(struct fp_ext *dest, struct fp_ext *src) | ||
73 | { | ||
74 | uprint("fatan\n"); | ||
75 | |||
76 | fp_monadic_check(dest, src); | ||
77 | |||
78 | return dest; | ||
79 | } | ||
80 | |||
81 | struct fp_ext * | ||
82 | fp_fsinh(struct fp_ext *dest, struct fp_ext *src) | ||
83 | { | ||
84 | uprint("fsinh\n"); | ||
85 | |||
86 | fp_monadic_check(dest, src); | ||
87 | |||
88 | return dest; | ||
89 | } | ||
90 | |||
91 | struct fp_ext * | ||
92 | fp_fcosh(struct fp_ext *dest, struct fp_ext *src) | ||
93 | { | ||
94 | uprint("fcosh\n"); | ||
95 | |||
96 | fp_monadic_check(dest, src); | ||
97 | |||
98 | return dest; | ||
99 | } | ||
100 | |||
101 | struct fp_ext * | ||
102 | fp_ftanh(struct fp_ext *dest, struct fp_ext *src) | ||
103 | { | ||
104 | uprint("ftanh\n"); | ||
105 | |||
106 | fp_monadic_check(dest, src); | ||
107 | |||
108 | return dest; | ||
109 | } | ||
110 | |||
111 | struct fp_ext * | ||
112 | fp_fatanh(struct fp_ext *dest, struct fp_ext *src) | ||
113 | { | ||
114 | uprint("fatanh\n"); | ||
115 | |||
116 | fp_monadic_check(dest, src); | ||
117 | |||
118 | return dest; | ||
119 | } | ||
120 | |||
121 | struct fp_ext * | ||
122 | fp_fsincos0(struct fp_ext *dest, struct fp_ext *src) | ||
123 | { | ||
124 | uprint("fsincos0\n"); | ||
125 | |||
126 | return dest; | ||
127 | } | ||
128 | |||
129 | struct fp_ext * | ||
130 | fp_fsincos1(struct fp_ext *dest, struct fp_ext *src) | ||
131 | { | ||
132 | uprint("fsincos1\n"); | ||
133 | |||
134 | return dest; | ||
135 | } | ||
136 | |||
137 | struct fp_ext * | ||
138 | fp_fsincos2(struct fp_ext *dest, struct fp_ext *src) | ||
139 | { | ||
140 | uprint("fsincos2\n"); | ||
141 | |||
142 | return dest; | ||
143 | } | ||
144 | |||
145 | struct fp_ext * | ||
146 | fp_fsincos3(struct fp_ext *dest, struct fp_ext *src) | ||
147 | { | ||
148 | uprint("fsincos3\n"); | ||
149 | |||
150 | return dest; | ||
151 | } | ||
152 | |||
153 | struct fp_ext * | ||
154 | fp_fsincos4(struct fp_ext *dest, struct fp_ext *src) | ||
155 | { | ||
156 | uprint("fsincos4\n"); | ||
157 | |||
158 | return dest; | ||
159 | } | ||
160 | |||
161 | struct fp_ext * | ||
162 | fp_fsincos5(struct fp_ext *dest, struct fp_ext *src) | ||
163 | { | ||
164 | uprint("fsincos5\n"); | ||
165 | |||
166 | return dest; | ||
167 | } | ||
168 | |||
169 | struct fp_ext * | ||
170 | fp_fsincos6(struct fp_ext *dest, struct fp_ext *src) | ||
171 | { | ||
172 | uprint("fsincos6\n"); | ||
173 | |||
174 | return dest; | ||
175 | } | ||
176 | |||
177 | struct fp_ext * | ||
178 | fp_fsincos7(struct fp_ext *dest, struct fp_ext *src) | ||
179 | { | ||
180 | uprint("fsincos7\n"); | ||
181 | |||
182 | return dest; | ||
183 | } | ||
diff --git a/arch/m68k/math-emu/fp_trig.h b/arch/m68k/math-emu/fp_trig.h new file mode 100644 index 000000000000..af8b247e9c98 --- /dev/null +++ b/arch/m68k/math-emu/fp_trig.h | |||
@@ -0,0 +1,32 @@ | |||
1 | /* | ||
2 | |||
3 | fp_trig.h: floating-point math routines for the Linux-m68k | ||
4 | floating point emulator. | ||
5 | |||
6 | Copyright (c) 1998 David Huggins-Daines. | ||
7 | |||
8 | I hereby give permission, free of charge, to copy, modify, and | ||
9 | redistribute this software, in source or binary form, provided that | ||
10 | the above copyright notice and the following disclaimer are included | ||
11 | in all such copies. | ||
12 | |||
13 | THIS SOFTWARE IS PROVIDED "AS IS", WITH ABSOLUTELY NO WARRANTY, REAL | ||
14 | OR IMPLIED. | ||
15 | |||
16 | */ | ||
17 | |||
18 | #ifndef FP_TRIG_H | ||
19 | #define FP_TRIG_H | ||
20 | |||
21 | #include "fp_emu.h" | ||
22 | |||
23 | /* floating point trigonometric instructions: | ||
24 | |||
25 | the arguments to these are in the "internal" extended format, that | ||
26 | is, an "exploded" version of the 96-bit extended fp format used by | ||
27 | the 68881. | ||
28 | |||
29 | they return a status code, which should end up in %d0, if all goes | ||
30 | well. */ | ||
31 | |||
32 | #endif /* FP_TRIG__H */ | ||
diff --git a/arch/m68k/math-emu/fp_util.S b/arch/m68k/math-emu/fp_util.S new file mode 100644 index 000000000000..a9f7f0129067 --- /dev/null +++ b/arch/m68k/math-emu/fp_util.S | |||
@@ -0,0 +1,1455 @@ | |||
1 | /* | ||
2 | * fp_util.S | ||
3 | * | ||
4 | * Copyright Roman Zippel, 1997. All rights reserved. | ||
5 | * | ||
6 | * Redistribution and use in source and binary forms, with or without | ||
7 | * modification, are permitted provided that the following conditions | ||
8 | * are met: | ||
9 | * 1. Redistributions of source code must retain the above copyright | ||
10 | * notice, and the entire permission notice in its entirety, | ||
11 | * including the disclaimer of warranties. | ||
12 | * 2. Redistributions in binary form must reproduce the above copyright | ||
13 | * notice, this list of conditions and the following disclaimer in the | ||
14 | * documentation and/or other materials provided with the distribution. | ||
15 | * 3. The name of the author may not be used to endorse or promote | ||
16 | * products derived from this software without specific prior | ||
17 | * written permission. | ||
18 | * | ||
19 | * ALTERNATIVELY, this product may be distributed under the terms of | ||
20 | * the GNU General Public License, in which case the provisions of the GPL are | ||
21 | * required INSTEAD OF the above restrictions. (This clause is | ||
22 | * necessary due to a potential bad interaction between the GPL and | ||
23 | * the restrictions contained in a BSD-style copyright.) | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
26 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | ||
27 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
28 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, | ||
29 | * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
31 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | ||
32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | ||
33 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
35 | * OF THE POSSIBILITY OF SUCH DAMAGE. | ||
36 | */ | ||
37 | |||
38 | #include <linux/config.h> | ||
39 | #include "fp_emu.h" | ||
40 | |||
41 | /* | ||
42 | * Here are lots of conversion and normalization functions mainly | ||
43 | * used by fp_scan.S | ||
44 | * Note that these functions are optimized for "normal" numbers, | ||
45 | * these are handled first and exit as fast as possible, this is | ||
46 | * especially important for fp_normalize_ext/fp_conv_ext2ext, as | ||
47 | * it's called very often. | ||
48 | * The register usage is optimized for fp_scan.S and which register | ||
49 | * is currently at that time unused, be careful if you want change | ||
50 | * something here. %d0 and %d1 is always usable, sometimes %d2 (or | ||
51 | * only the lower half) most function have to return the %a0 | ||
52 | * unmodified, so that the caller can immediately reuse it. | ||
53 | */ | ||
54 | |||
55 | .globl fp_ill, fp_end | ||
56 | |||
57 | | exits from fp_scan: | ||
58 | | illegal instruction | ||
59 | fp_ill: | ||
60 | printf ,"fp_illegal\n" | ||
61 | rts | ||
62 | | completed instruction | ||
63 | fp_end: | ||
64 | tst.l (TASK_MM-8,%a2) | ||
65 | jmi 1f | ||
66 | tst.l (TASK_MM-4,%a2) | ||
67 | jmi 1f | ||
68 | tst.l (TASK_MM,%a2) | ||
69 | jpl 2f | ||
70 | 1: printf ,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM) | ||
71 | 2: clr.l %d0 | ||
72 | rts | ||
73 | |||
74 | .globl fp_conv_long2ext, fp_conv_single2ext | ||
75 | .globl fp_conv_double2ext, fp_conv_ext2ext | ||
76 | .globl fp_normalize_ext, fp_normalize_double | ||
77 | .globl fp_normalize_single, fp_normalize_single_fast | ||
78 | .globl fp_conv_ext2double, fp_conv_ext2single | ||
79 | .globl fp_conv_ext2long, fp_conv_ext2short | ||
80 | .globl fp_conv_ext2byte | ||
81 | .globl fp_finalrounding_single, fp_finalrounding_single_fast | ||
82 | .globl fp_finalrounding_double | ||
83 | .globl fp_finalrounding, fp_finaltest, fp_final | ||
84 | |||
85 | /* | ||
86 | * First several conversion functions from a source operand | ||
87 | * into the extended format. Note, that only fp_conv_ext2ext | ||
88 | * normalizes the number and is always called after the other | ||
89 | * conversion functions, which only move the information into | ||
90 | * fp_ext structure. | ||
91 | */ | ||
92 | |||
93 | | fp_conv_long2ext: | ||
94 | | | ||
95 | | args: %d0 = source (32-bit long) | ||
96 | | %a0 = destination (ptr to struct fp_ext) | ||
97 | |||
98 | fp_conv_long2ext: | ||
99 | printf PCONV,"l2e: %p -> %p(",2,%d0,%a0 | ||
100 | clr.l %d1 | sign defaults to zero | ||
101 | tst.l %d0 | ||
102 | jeq fp_l2e_zero | is source zero? | ||
103 | jpl 1f | positive? | ||
104 | moveq #1,%d1 | ||
105 | neg.l %d0 | ||
106 | 1: swap %d1 | ||
107 | move.w #0x3fff+31,%d1 | ||
108 | move.l %d1,(%a0)+ | set sign / exp | ||
109 | move.l %d0,(%a0)+ | set mantissa | ||
110 | clr.l (%a0) | ||
111 | subq.l #8,%a0 | restore %a0 | ||
112 | printx PCONV,%a0@ | ||
113 | printf PCONV,")\n" | ||
114 | rts | ||
115 | | source is zero | ||
116 | fp_l2e_zero: | ||
117 | clr.l (%a0)+ | ||
118 | clr.l (%a0)+ | ||
119 | clr.l (%a0) | ||
120 | subq.l #8,%a0 | ||
121 | printx PCONV,%a0@ | ||
122 | printf PCONV,")\n" | ||
123 | rts | ||
124 | |||
125 | | fp_conv_single2ext | ||
126 | | args: %d0 = source (single-precision fp value) | ||
127 | | %a0 = dest (struct fp_ext *) | ||
128 | |||
129 | fp_conv_single2ext: | ||
130 | printf PCONV,"s2e: %p -> %p(",2,%d0,%a0 | ||
131 | move.l %d0,%d1 | ||
132 | lsl.l #8,%d0 | shift mantissa | ||
133 | lsr.l #8,%d1 | exponent / sign | ||
134 | lsr.l #7,%d1 | ||
135 | lsr.w #8,%d1 | ||
136 | jeq fp_s2e_small | zero / denormal? | ||
137 | cmp.w #0xff,%d1 | NaN / Inf? | ||
138 | jeq fp_s2e_large | ||
139 | bset #31,%d0 | set explizit bit | ||
140 | add.w #0x3fff-0x7f,%d1 | re-bias the exponent. | ||
141 | 9: move.l %d1,(%a0)+ | fp_ext.sign, fp_ext.exp | ||
142 | move.l %d0,(%a0)+ | high lword of fp_ext.mant | ||
143 | clr.l (%a0) | low lword = 0 | ||
144 | subq.l #8,%a0 | ||
145 | printx PCONV,%a0@ | ||
146 | printf PCONV,")\n" | ||
147 | rts | ||
148 | | zeros and denormalized | ||
149 | fp_s2e_small: | ||
150 | | exponent is zero, so explizit bit is already zero too | ||
151 | tst.l %d0 | ||
152 | jeq 9b | ||
153 | move.w #0x4000-0x7f,%d1 | ||
154 | jra 9b | ||
155 | | infinities and NAN | ||
156 | fp_s2e_large: | ||
157 | bclr #31,%d0 | clear explizit bit | ||
158 | move.w #0x7fff,%d1 | ||
159 | jra 9b | ||
160 | |||
161 | fp_conv_double2ext: | ||
162 | #ifdef FPU_EMU_DEBUG | ||
163 | getuser.l %a1@(0),%d0,fp_err_ua2,%a1 | ||
164 | getuser.l %a1@(4),%d1,fp_err_ua2,%a1 | ||
165 | printf PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0 | ||
166 | #endif | ||
167 | getuser.l (%a1)+,%d0,fp_err_ua2,%a1 | ||
168 | move.l %d0,%d1 | ||
169 | lsl.l #8,%d0 | shift high mantissa | ||
170 | lsl.l #3,%d0 | ||
171 | lsr.l #8,%d1 | exponent / sign | ||
172 | lsr.l #7,%d1 | ||
173 | lsr.w #5,%d1 | ||
174 | jeq fp_d2e_small | zero / denormal? | ||
175 | cmp.w #0x7ff,%d1 | NaN / Inf? | ||
176 | jeq fp_d2e_large | ||
177 | bset #31,%d0 | set explizit bit | ||
178 | add.w #0x3fff-0x3ff,%d1 | re-bias the exponent. | ||
179 | 9: move.l %d1,(%a0)+ | fp_ext.sign, fp_ext.exp | ||
180 | move.l %d0,(%a0)+ | ||
181 | getuser.l (%a1)+,%d0,fp_err_ua2,%a1 | ||
182 | move.l %d0,%d1 | ||
183 | lsl.l #8,%d0 | ||
184 | lsl.l #3,%d0 | ||
185 | move.l %d0,(%a0) | ||
186 | moveq #21,%d0 | ||
187 | lsr.l %d0,%d1 | ||
188 | or.l %d1,-(%a0) | ||
189 | subq.l #4,%a0 | ||
190 | printx PCONV,%a0@ | ||
191 | printf PCONV,")\n" | ||
192 | rts | ||
193 | | zeros and denormalized | ||
194 | fp_d2e_small: | ||
195 | | exponent is zero, so explizit bit is already zero too | ||
196 | tst.l %d0 | ||
197 | jeq 9b | ||
198 | move.w #0x4000-0x3ff,%d1 | ||
199 | jra 9b | ||
200 | | infinities and NAN | ||
201 | fp_d2e_large: | ||
202 | bclr #31,%d0 | clear explizit bit | ||
203 | move.w #0x7fff,%d1 | ||
204 | jra 9b | ||
205 | |||
206 | | fp_conv_ext2ext: | ||
207 | | originally used to get longdouble from userspace, now it's | ||
208 | | called before arithmetic operations to make sure the number | ||
209 | | is normalized [maybe rename it?]. | ||
210 | | args: %a0 = dest (struct fp_ext *) | ||
211 | | returns 0 in %d0 for a NaN, otherwise 1 | ||
212 | |||
213 | fp_conv_ext2ext: | ||
214 | printf PCONV,"e2e: %p(",1,%a0 | ||
215 | printx PCONV,%a0@ | ||
216 | printf PCONV,"), " | ||
217 | move.l (%a0)+,%d0 | ||
218 | cmp.w #0x7fff,%d0 | Inf / NaN? | ||
219 | jeq fp_e2e_large | ||
220 | move.l (%a0),%d0 | ||
221 | jpl fp_e2e_small | zero / denorm? | ||
222 | | The high bit is set, so normalization is irrelevant. | ||
223 | fp_e2e_checkround: | ||
224 | subq.l #4,%a0 | ||
225 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
226 | move.b (%a0),%d0 | ||
227 | jne fp_e2e_round | ||
228 | #endif | ||
229 | printf PCONV,"%p(",1,%a0 | ||
230 | printx PCONV,%a0@ | ||
231 | printf PCONV,")\n" | ||
232 | moveq #1,%d0 | ||
233 | rts | ||
234 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
235 | fp_e2e_round: | ||
236 | fp_set_sr FPSR_EXC_INEX2 | ||
237 | clr.b (%a0) | ||
238 | move.w (FPD_RND,FPDATA),%d2 | ||
239 | jne fp_e2e_roundother | %d2 == 0, round to nearest | ||
240 | tst.b %d0 | test guard bit | ||
241 | jpl 9f | zero is closer | ||
242 | btst #0,(11,%a0) | test lsb bit | ||
243 | jne fp_e2e_doroundup | round to infinity | ||
244 | lsl.b #1,%d0 | check low bits | ||
245 | jeq 9f | round to zero | ||
246 | fp_e2e_doroundup: | ||
247 | addq.l #1,(8,%a0) | ||
248 | jcc 9f | ||
249 | addq.l #1,(4,%a0) | ||
250 | jcc 9f | ||
251 | move.w #0x8000,(4,%a0) | ||
252 | addq.w #1,(2,%a0) | ||
253 | 9: printf PNORM,"%p(",1,%a0 | ||
254 | printx PNORM,%a0@ | ||
255 | printf PNORM,")\n" | ||
256 | rts | ||
257 | fp_e2e_roundother: | ||
258 | subq.w #2,%d2 | ||
259 | jcs 9b | %d2 < 2, round to zero | ||
260 | jhi 1f | %d2 > 2, round to +infinity | ||
261 | tst.b (1,%a0) | to -inf | ||
262 | jne fp_e2e_doroundup | negative, round to infinity | ||
263 | jra 9b | positive, round to zero | ||
264 | 1: tst.b (1,%a0) | to +inf | ||
265 | jeq fp_e2e_doroundup | positive, round to infinity | ||
266 | jra 9b | negative, round to zero | ||
267 | #endif | ||
268 | | zeros and subnormals: | ||
269 | | try to normalize these anyway. | ||
270 | fp_e2e_small: | ||
271 | jne fp_e2e_small1 | high lword zero? | ||
272 | move.l (4,%a0),%d0 | ||
273 | jne fp_e2e_small2 | ||
274 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
275 | clr.l %d0 | ||
276 | move.b (-4,%a0),%d0 | ||
277 | jne fp_e2e_small3 | ||
278 | #endif | ||
279 | | Genuine zero. | ||
280 | clr.w -(%a0) | ||
281 | subq.l #2,%a0 | ||
282 | printf PNORM,"%p(",1,%a0 | ||
283 | printx PNORM,%a0@ | ||
284 | printf PNORM,")\n" | ||
285 | moveq #1,%d0 | ||
286 | rts | ||
287 | | definitely subnormal, need to shift all 64 bits | ||
288 | fp_e2e_small1: | ||
289 | bfffo %d0{#0,#32},%d1 | ||
290 | move.w -(%a0),%d2 | ||
291 | sub.w %d1,%d2 | ||
292 | jcc 1f | ||
293 | | Pathologically small, denormalize. | ||
294 | add.w %d2,%d1 | ||
295 | clr.w %d2 | ||
296 | 1: move.w %d2,(%a0)+ | ||
297 | move.w %d1,%d2 | ||
298 | jeq fp_e2e_checkround | ||
299 | | fancy 64-bit double-shift begins here | ||
300 | lsl.l %d2,%d0 | ||
301 | move.l %d0,(%a0)+ | ||
302 | move.l (%a0),%d0 | ||
303 | move.l %d0,%d1 | ||
304 | lsl.l %d2,%d0 | ||
305 | move.l %d0,(%a0) | ||
306 | neg.w %d2 | ||
307 | and.w #0x1f,%d2 | ||
308 | lsr.l %d2,%d1 | ||
309 | or.l %d1,-(%a0) | ||
310 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
311 | fp_e2e_extra1: | ||
312 | clr.l %d0 | ||
313 | move.b (-4,%a0),%d0 | ||
314 | neg.w %d2 | ||
315 | add.w #24,%d2 | ||
316 | jcc 1f | ||
317 | clr.b (-4,%a0) | ||
318 | lsl.l %d2,%d0 | ||
319 | or.l %d0,(4,%a0) | ||
320 | jra fp_e2e_checkround | ||
321 | 1: addq.w #8,%d2 | ||
322 | lsl.l %d2,%d0 | ||
323 | move.b %d0,(-4,%a0) | ||
324 | lsr.l #8,%d0 | ||
325 | or.l %d0,(4,%a0) | ||
326 | #endif | ||
327 | jra fp_e2e_checkround | ||
328 | | pathologically small subnormal | ||
329 | fp_e2e_small2: | ||
330 | bfffo %d0{#0,#32},%d1 | ||
331 | add.w #32,%d1 | ||
332 | move.w -(%a0),%d2 | ||
333 | sub.w %d1,%d2 | ||
334 | jcc 1f | ||
335 | | Beyond pathologically small, denormalize. | ||
336 | add.w %d2,%d1 | ||
337 | clr.w %d2 | ||
338 | 1: move.w %d2,(%a0)+ | ||
339 | ext.l %d1 | ||
340 | jeq fp_e2e_checkround | ||
341 | clr.l (4,%a0) | ||
342 | sub.w #32,%d2 | ||
343 | jcs 1f | ||
344 | lsl.l %d1,%d0 | lower lword needs only to be shifted | ||
345 | move.l %d0,(%a0) | into the higher lword | ||
346 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
347 | clr.l %d0 | ||
348 | move.b (-4,%a0),%d0 | ||
349 | clr.b (-4,%a0) | ||
350 | neg.w %d1 | ||
351 | add.w #32,%d1 | ||
352 | bfins %d0,(%a0){%d1,#8} | ||
353 | #endif | ||
354 | jra fp_e2e_checkround | ||
355 | 1: neg.w %d1 | lower lword is splitted between | ||
356 | bfins %d0,(%a0){%d1,#32} | higher and lower lword | ||
357 | #ifndef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
358 | jra fp_e2e_checkround | ||
359 | #else | ||
360 | move.w %d1,%d2 | ||
361 | jra fp_e2e_extra1 | ||
362 | | These are extremely small numbers, that will mostly end up as zero | ||
363 | | anyway, so this is only important for correct rounding. | ||
364 | fp_e2e_small3: | ||
365 | bfffo %d0{#24,#8},%d1 | ||
366 | add.w #40,%d1 | ||
367 | move.w -(%a0),%d2 | ||
368 | sub.w %d1,%d2 | ||
369 | jcc 1f | ||
370 | | Pathologically small, denormalize. | ||
371 | add.w %d2,%d1 | ||
372 | clr.w %d2 | ||
373 | 1: move.w %d2,(%a0)+ | ||
374 | ext.l %d1 | ||
375 | jeq fp_e2e_checkround | ||
376 | cmp.w #8,%d1 | ||
377 | jcs 2f | ||
378 | 1: clr.b (-4,%a0) | ||
379 | sub.w #64,%d1 | ||
380 | jcs 1f | ||
381 | add.w #24,%d1 | ||
382 | lsl.l %d1,%d0 | ||
383 | move.l %d0,(%a0) | ||
384 | jra fp_e2e_checkround | ||
385 | 1: neg.w %d1 | ||
386 | bfins %d0,(%a0){%d1,#8} | ||
387 | jra fp_e2e_checkround | ||
388 | 2: lsl.l %d1,%d0 | ||
389 | move.b %d0,(-4,%a0) | ||
390 | lsr.l #8,%d0 | ||
391 | move.b %d0,(7,%a0) | ||
392 | jra fp_e2e_checkround | ||
393 | #endif | ||
394 | 1: move.l %d0,%d1 | lower lword is splitted between | ||
395 | lsl.l %d2,%d0 | higher and lower lword | ||
396 | move.l %d0,(%a0) | ||
397 | move.l %d1,%d0 | ||
398 | neg.w %d2 | ||
399 | add.w #32,%d2 | ||
400 | lsr.l %d2,%d0 | ||
401 | move.l %d0,-(%a0) | ||
402 | jra fp_e2e_checkround | ||
403 | | Infinities and NaNs | ||
404 | fp_e2e_large: | ||
405 | move.l (%a0)+,%d0 | ||
406 | jne 3f | ||
407 | 1: tst.l (%a0) | ||
408 | jne 4f | ||
409 | moveq #1,%d0 | ||
410 | 2: subq.l #8,%a0 | ||
411 | printf PCONV,"%p(",1,%a0 | ||
412 | printx PCONV,%a0@ | ||
413 | printf PCONV,")\n" | ||
414 | rts | ||
415 | | we have maybe a NaN, shift off the highest bit | ||
416 | 3: lsl.l #1,%d0 | ||
417 | jeq 1b | ||
418 | | we have a NaN, clear the return value | ||
419 | 4: clrl %d0 | ||
420 | jra 2b | ||
421 | |||
422 | |||
423 | /* | ||
424 | * Normalization functions. Call these on the output of general | ||
425 | * FP operators, and before any conversion into the destination | ||
426 | * formats. fp_normalize_ext has always to be called first, the | ||
427 | * following conversion functions expect an already normalized | ||
428 | * number. | ||
429 | */ | ||
430 | |||
431 | | fp_normalize_ext: | ||
432 | | normalize an extended in extended (unpacked) format, basically | ||
433 | | it does the same as fp_conv_ext2ext, additionally it also does | ||
434 | | the necessary postprocessing checks. | ||
435 | | args: %a0 (struct fp_ext *) | ||
436 | | NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2 | ||
437 | |||
438 | fp_normalize_ext: | ||
439 | printf PNORM,"ne: %p(",1,%a0 | ||
440 | printx PNORM,%a0@ | ||
441 | printf PNORM,"), " | ||
442 | move.l (%a0)+,%d0 | ||
443 | cmp.w #0x7fff,%d0 | Inf / NaN? | ||
444 | jeq fp_ne_large | ||
445 | move.l (%a0),%d0 | ||
446 | jpl fp_ne_small | zero / denorm? | ||
447 | | The high bit is set, so normalization is irrelevant. | ||
448 | fp_ne_checkround: | ||
449 | subq.l #4,%a0 | ||
450 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
451 | move.b (%a0),%d0 | ||
452 | jne fp_ne_round | ||
453 | #endif | ||
454 | printf PNORM,"%p(",1,%a0 | ||
455 | printx PNORM,%a0@ | ||
456 | printf PNORM,")\n" | ||
457 | rts | ||
458 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
459 | fp_ne_round: | ||
460 | fp_set_sr FPSR_EXC_INEX2 | ||
461 | clr.b (%a0) | ||
462 | move.w (FPD_RND,FPDATA),%d2 | ||
463 | jne fp_ne_roundother | %d2 == 0, round to nearest | ||
464 | tst.b %d0 | test guard bit | ||
465 | jpl 9f | zero is closer | ||
466 | btst #0,(11,%a0) | test lsb bit | ||
467 | jne fp_ne_doroundup | round to infinity | ||
468 | lsl.b #1,%d0 | check low bits | ||
469 | jeq 9f | round to zero | ||
470 | fp_ne_doroundup: | ||
471 | addq.l #1,(8,%a0) | ||
472 | jcc 9f | ||
473 | addq.l #1,(4,%a0) | ||
474 | jcc 9f | ||
475 | addq.w #1,(2,%a0) | ||
476 | move.w #0x8000,(4,%a0) | ||
477 | 9: printf PNORM,"%p(",1,%a0 | ||
478 | printx PNORM,%a0@ | ||
479 | printf PNORM,")\n" | ||
480 | rts | ||
481 | fp_ne_roundother: | ||
482 | subq.w #2,%d2 | ||
483 | jcs 9b | %d2 < 2, round to zero | ||
484 | jhi 1f | %d2 > 2, round to +infinity | ||
485 | tst.b (1,%a0) | to -inf | ||
486 | jne fp_ne_doroundup | negative, round to infinity | ||
487 | jra 9b | positive, round to zero | ||
488 | 1: tst.b (1,%a0) | to +inf | ||
489 | jeq fp_ne_doroundup | positive, round to infinity | ||
490 | jra 9b | negative, round to zero | ||
491 | #endif | ||
492 | | Zeros and subnormal numbers | ||
493 | | These are probably merely subnormal, rather than "denormalized" | ||
494 | | numbers, so we will try to make them normal again. | ||
495 | fp_ne_small: | ||
496 | jne fp_ne_small1 | high lword zero? | ||
497 | move.l (4,%a0),%d0 | ||
498 | jne fp_ne_small2 | ||
499 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
500 | clr.l %d0 | ||
501 | move.b (-4,%a0),%d0 | ||
502 | jne fp_ne_small3 | ||
503 | #endif | ||
504 | | Genuine zero. | ||
505 | clr.w -(%a0) | ||
506 | subq.l #2,%a0 | ||
507 | printf PNORM,"%p(",1,%a0 | ||
508 | printx PNORM,%a0@ | ||
509 | printf PNORM,")\n" | ||
510 | rts | ||
511 | | Subnormal. | ||
512 | fp_ne_small1: | ||
513 | bfffo %d0{#0,#32},%d1 | ||
514 | move.w -(%a0),%d2 | ||
515 | sub.w %d1,%d2 | ||
516 | jcc 1f | ||
517 | | Pathologically small, denormalize. | ||
518 | add.w %d2,%d1 | ||
519 | clr.w %d2 | ||
520 | fp_set_sr FPSR_EXC_UNFL | ||
521 | 1: move.w %d2,(%a0)+ | ||
522 | move.w %d1,%d2 | ||
523 | jeq fp_ne_checkround | ||
524 | | This is exactly the same 64-bit double shift as seen above. | ||
525 | lsl.l %d2,%d0 | ||
526 | move.l %d0,(%a0)+ | ||
527 | move.l (%a0),%d0 | ||
528 | move.l %d0,%d1 | ||
529 | lsl.l %d2,%d0 | ||
530 | move.l %d0,(%a0) | ||
531 | neg.w %d2 | ||
532 | and.w #0x1f,%d2 | ||
533 | lsr.l %d2,%d1 | ||
534 | or.l %d1,-(%a0) | ||
535 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
536 | fp_ne_extra1: | ||
537 | clr.l %d0 | ||
538 | move.b (-4,%a0),%d0 | ||
539 | neg.w %d2 | ||
540 | add.w #24,%d2 | ||
541 | jcc 1f | ||
542 | clr.b (-4,%a0) | ||
543 | lsl.l %d2,%d0 | ||
544 | or.l %d0,(4,%a0) | ||
545 | jra fp_ne_checkround | ||
546 | 1: addq.w #8,%d2 | ||
547 | lsl.l %d2,%d0 | ||
548 | move.b %d0,(-4,%a0) | ||
549 | lsr.l #8,%d0 | ||
550 | or.l %d0,(4,%a0) | ||
551 | #endif | ||
552 | jra fp_ne_checkround | ||
553 | | May or may not be subnormal, if so, only 32 bits to shift. | ||
554 | fp_ne_small2: | ||
555 | bfffo %d0{#0,#32},%d1 | ||
556 | add.w #32,%d1 | ||
557 | move.w -(%a0),%d2 | ||
558 | sub.w %d1,%d2 | ||
559 | jcc 1f | ||
560 | | Beyond pathologically small, denormalize. | ||
561 | add.w %d2,%d1 | ||
562 | clr.w %d2 | ||
563 | fp_set_sr FPSR_EXC_UNFL | ||
564 | 1: move.w %d2,(%a0)+ | ||
565 | ext.l %d1 | ||
566 | jeq fp_ne_checkround | ||
567 | clr.l (4,%a0) | ||
568 | sub.w #32,%d1 | ||
569 | jcs 1f | ||
570 | lsl.l %d1,%d0 | lower lword needs only to be shifted | ||
571 | move.l %d0,(%a0) | into the higher lword | ||
572 | #ifdef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
573 | clr.l %d0 | ||
574 | move.b (-4,%a0),%d0 | ||
575 | clr.b (-4,%a0) | ||
576 | neg.w %d1 | ||
577 | add.w #32,%d1 | ||
578 | bfins %d0,(%a0){%d1,#8} | ||
579 | #endif | ||
580 | jra fp_ne_checkround | ||
581 | 1: neg.w %d1 | lower lword is splitted between | ||
582 | bfins %d0,(%a0){%d1,#32} | higher and lower lword | ||
583 | #ifndef CONFIG_M68KFPU_EMU_EXTRAPREC | ||
584 | jra fp_ne_checkround | ||
585 | #else | ||
586 | move.w %d1,%d2 | ||
587 | jra fp_ne_extra1 | ||
588 | | These are extremely small numbers, that will mostly end up as zero | ||
589 | | anyway, so this is only important for correct rounding. | ||
590 | fp_ne_small3: | ||
591 | bfffo %d0{#24,#8},%d1 | ||
592 | add.w #40,%d1 | ||
593 | move.w -(%a0),%d2 | ||
594 | sub.w %d1,%d2 | ||
595 | jcc 1f | ||
596 | | Pathologically small, denormalize. | ||
597 | add.w %d2,%d1 | ||
598 | clr.w %d2 | ||
599 | 1: move.w %d2,(%a0)+ | ||
600 | ext.l %d1 | ||
601 | jeq fp_ne_checkround | ||
602 | cmp.w #8,%d1 | ||
603 | jcs 2f | ||
604 | 1: clr.b (-4,%a0) | ||
605 | sub.w #64,%d1 | ||
606 | jcs 1f | ||
607 | add.w #24,%d1 | ||
608 | lsl.l %d1,%d0 | ||
609 | move.l %d0,(%a0) | ||
610 | jra fp_ne_checkround | ||
611 | 1: neg.w %d1 | ||
612 | bfins %d0,(%a0){%d1,#8} | ||
613 | jra fp_ne_checkround | ||
614 | 2: lsl.l %d1,%d0 | ||
615 | move.b %d0,(-4,%a0) | ||
616 | lsr.l #8,%d0 | ||
617 | move.b %d0,(7,%a0) | ||
618 | jra fp_ne_checkround | ||
619 | #endif | ||
620 | | Infinities and NaNs, again, same as above. | ||
621 | fp_ne_large: | ||
622 | move.l (%a0)+,%d0 | ||
623 | jne 3f | ||
624 | 1: tst.l (%a0) | ||
625 | jne 4f | ||
626 | 2: subq.l #8,%a0 | ||
627 | printf PNORM,"%p(",1,%a0 | ||
628 | printx PNORM,%a0@ | ||
629 | printf PNORM,")\n" | ||
630 | rts | ||
631 | | we have maybe a NaN, shift off the highest bit | ||
632 | 3: move.l %d0,%d1 | ||
633 | lsl.l #1,%d1 | ||
634 | jne 4f | ||
635 | clr.l (-4,%a0) | ||
636 | jra 1b | ||
637 | | we have a NaN, test if it is signaling | ||
638 | 4: bset #30,%d0 | ||
639 | jne 2b | ||
640 | fp_set_sr FPSR_EXC_SNAN | ||
641 | move.l %d0,(-4,%a0) | ||
642 | jra 2b | ||
643 | |||
644 | | these next two do rounding as per the IEEE standard. | ||
645 | | values for the rounding modes appear to be: | ||
646 | | 0: Round to nearest | ||
647 | | 1: Round to zero | ||
648 | | 2: Round to -Infinity | ||
649 | | 3: Round to +Infinity | ||
650 | | both functions expect that fp_normalize was already | ||
651 | | called (and extended argument is already normalized | ||
652 | | as far as possible), these are used if there is different | ||
653 | | rounding precision is selected and before converting | ||
654 | | into single/double | ||
655 | |||
656 | | fp_normalize_double: | ||
657 | | normalize an extended with double (52-bit) precision | ||
658 | | args: %a0 (struct fp_ext *) | ||
659 | |||
660 | fp_normalize_double: | ||
661 | printf PNORM,"nd: %p(",1,%a0 | ||
662 | printx PNORM,%a0@ | ||
663 | printf PNORM,"), " | ||
664 | move.l (%a0)+,%d2 | ||
665 | tst.w %d2 | ||
666 | jeq fp_nd_zero | zero / denormalized | ||
667 | cmp.w #0x7fff,%d2 | ||
668 | jeq fp_nd_huge | NaN / infinitive. | ||
669 | sub.w #0x4000-0x3ff,%d2 | will the exponent fit? | ||
670 | jcs fp_nd_small | too small. | ||
671 | cmp.w #0x7fe,%d2 | ||
672 | jcc fp_nd_large | too big. | ||
673 | addq.l #4,%a0 | ||
674 | move.l (%a0),%d0 | low lword of mantissa | ||
675 | | now, round off the low 11 bits. | ||
676 | fp_nd_round: | ||
677 | moveq #21,%d1 | ||
678 | lsl.l %d1,%d0 | keep 11 low bits. | ||
679 | jne fp_nd_checkround | Are they non-zero? | ||
680 | | nothing to do here | ||
681 | 9: subq.l #8,%a0 | ||
682 | printf PNORM,"%p(",1,%a0 | ||
683 | printx PNORM,%a0@ | ||
684 | printf PNORM,")\n" | ||
685 | rts | ||
686 | | Be careful with the X bit! It contains the lsb | ||
687 | | from the shift above, it is needed for round to nearest. | ||
688 | fp_nd_checkround: | ||
689 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | ||
690 | and.w #0xf800,(2,%a0) | clear bits 0-10 | ||
691 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | ||
692 | jne 2f | %d2 == 0, round to nearest | ||
693 | tst.l %d0 | test guard bit | ||
694 | jpl 9b | zero is closer | ||
695 | | here we test the X bit by adding it to %d2 | ||
696 | clr.w %d2 | first set z bit, addx only clears it | ||
697 | addx.w %d2,%d2 | test lsb bit | ||
698 | | IEEE754-specified "round to even" behaviour. If the guard | ||
699 | | bit is set, then the number is odd, so rounding works like | ||
700 | | in grade-school arithmetic (i.e. 1.5 rounds to 2.0) | ||
701 | | Otherwise, an equal distance rounds towards zero, so as not | ||
702 | | to produce an odd number. This is strange, but it is what | ||
703 | | the standard says. | ||
704 | jne fp_nd_doroundup | round to infinity | ||
705 | lsl.l #1,%d0 | check low bits | ||
706 | jeq 9b | round to zero | ||
707 | fp_nd_doroundup: | ||
708 | | round (the mantissa, that is) towards infinity | ||
709 | add.l #0x800,(%a0) | ||
710 | jcc 9b | no overflow, good. | ||
711 | addq.l #1,-(%a0) | extend to high lword | ||
712 | jcc 1f | no overflow, good. | ||
713 | | Yow! we have managed to overflow the mantissa. Since this | ||
714 | | only happens when %d1 was 0xfffff800, it is now zero, so | ||
715 | | reset the high bit, and increment the exponent. | ||
716 | move.w #0x8000,(%a0) | ||
717 | addq.w #1,-(%a0) | ||
718 | cmp.w #0x43ff,(%a0)+ | exponent now overflown? | ||
719 | jeq fp_nd_large | yes, so make it infinity. | ||
720 | 1: subq.l #4,%a0 | ||
721 | printf PNORM,"%p(",1,%a0 | ||
722 | printx PNORM,%a0@ | ||
723 | printf PNORM,")\n" | ||
724 | rts | ||
725 | 2: subq.w #2,%d2 | ||
726 | jcs 9b | %d2 < 2, round to zero | ||
727 | jhi 3f | %d2 > 2, round to +infinity | ||
728 | | Round to +Inf or -Inf. High word of %d2 contains the | ||
729 | | sign of the number, by the way. | ||
730 | swap %d2 | to -inf | ||
731 | tst.b %d2 | ||
732 | jne fp_nd_doroundup | negative, round to infinity | ||
733 | jra 9b | positive, round to zero | ||
734 | 3: swap %d2 | to +inf | ||
735 | tst.b %d2 | ||
736 | jeq fp_nd_doroundup | positive, round to infinity | ||
737 | jra 9b | negative, round to zero | ||
738 | | Exponent underflow. Try to make a denormal, and set it to | ||
739 | | the smallest possible fraction if this fails. | ||
740 | fp_nd_small: | ||
741 | fp_set_sr FPSR_EXC_UNFL | set UNFL bit | ||
742 | move.w #0x3c01,(-2,%a0) | 2**-1022 | ||
743 | neg.w %d2 | degree of underflow | ||
744 | cmp.w #32,%d2 | single or double shift? | ||
745 | jcc 1f | ||
746 | | Again, another 64-bit double shift. | ||
747 | move.l (%a0),%d0 | ||
748 | move.l %d0,%d1 | ||
749 | lsr.l %d2,%d0 | ||
750 | move.l %d0,(%a0)+ | ||
751 | move.l (%a0),%d0 | ||
752 | lsr.l %d2,%d0 | ||
753 | neg.w %d2 | ||
754 | add.w #32,%d2 | ||
755 | lsl.l %d2,%d1 | ||
756 | or.l %d1,%d0 | ||
757 | move.l (%a0),%d1 | ||
758 | move.l %d0,(%a0) | ||
759 | | Check to see if we shifted off any significant bits | ||
760 | lsl.l %d2,%d1 | ||
761 | jeq fp_nd_round | Nope, round. | ||
762 | bset #0,%d0 | Yes, so set the "sticky bit". | ||
763 | jra fp_nd_round | Now, round. | ||
764 | | Another 64-bit single shift and store | ||
765 | 1: sub.w #32,%d2 | ||
766 | cmp.w #32,%d2 | Do we really need to shift? | ||
767 | jcc 2f | No, the number is too small. | ||
768 | move.l (%a0),%d0 | ||
769 | clr.l (%a0)+ | ||
770 | move.l %d0,%d1 | ||
771 | lsr.l %d2,%d0 | ||
772 | neg.w %d2 | ||
773 | add.w #32,%d2 | ||
774 | | Again, check to see if we shifted off any significant bits. | ||
775 | tst.l (%a0) | ||
776 | jeq 1f | ||
777 | bset #0,%d0 | Sticky bit. | ||
778 | 1: move.l %d0,(%a0) | ||
779 | lsl.l %d2,%d1 | ||
780 | jeq fp_nd_round | ||
781 | bset #0,%d0 | ||
782 | jra fp_nd_round | ||
783 | | Sorry, the number is just too small. | ||
784 | 2: clr.l (%a0)+ | ||
785 | clr.l (%a0) | ||
786 | moveq #1,%d0 | Smallest possible fraction, | ||
787 | jra fp_nd_round | round as desired. | ||
788 | | zero and denormalized | ||
789 | fp_nd_zero: | ||
790 | tst.l (%a0)+ | ||
791 | jne 1f | ||
792 | tst.l (%a0) | ||
793 | jne 1f | ||
794 | subq.l #8,%a0 | ||
795 | printf PNORM,"%p(",1,%a0 | ||
796 | printx PNORM,%a0@ | ||
797 | printf PNORM,")\n" | ||
798 | rts | zero. nothing to do. | ||
799 | | These are not merely subnormal numbers, but true denormals, | ||
800 | | i.e. pathologically small (exponent is 2**-16383) numbers. | ||
801 | | It is clearly impossible for even a normal extended number | ||
802 | | with that exponent to fit into double precision, so just | ||
803 | | write these ones off as "too darn small". | ||
804 | 1: fp_set_sr FPSR_EXC_UNFL | Set UNFL bit | ||
805 | clr.l (%a0) | ||
806 | clr.l -(%a0) | ||
807 | move.w #0x3c01,-(%a0) | i.e. 2**-1022 | ||
808 | addq.l #6,%a0 | ||
809 | moveq #1,%d0 | ||
810 | jra fp_nd_round | round. | ||
811 | | Exponent overflow. Just call it infinity. | ||
812 | fp_nd_large: | ||
813 | move.w #0x7ff,%d0 | ||
814 | and.w (6,%a0),%d0 | ||
815 | jeq 1f | ||
816 | fp_set_sr FPSR_EXC_INEX2 | ||
817 | 1: fp_set_sr FPSR_EXC_OVFL | ||
818 | move.w (FPD_RND,FPDATA),%d2 | ||
819 | jne 3f | %d2 = 0 round to nearest | ||
820 | 1: move.w #0x7fff,(-2,%a0) | ||
821 | clr.l (%a0)+ | ||
822 | clr.l (%a0) | ||
823 | 2: subq.l #8,%a0 | ||
824 | printf PNORM,"%p(",1,%a0 | ||
825 | printx PNORM,%a0@ | ||
826 | printf PNORM,")\n" | ||
827 | rts | ||
828 | 3: subq.w #2,%d2 | ||
829 | jcs 5f | %d2 < 2, round to zero | ||
830 | jhi 4f | %d2 > 2, round to +infinity | ||
831 | tst.b (-3,%a0) | to -inf | ||
832 | jne 1b | ||
833 | jra 5f | ||
834 | 4: tst.b (-3,%a0) | to +inf | ||
835 | jeq 1b | ||
836 | 5: move.w #0x43fe,(-2,%a0) | ||
837 | moveq #-1,%d0 | ||
838 | move.l %d0,(%a0)+ | ||
839 | move.w #0xf800,%d0 | ||
840 | move.l %d0,(%a0) | ||
841 | jra 2b | ||
842 | | Infinities or NaNs | ||
843 | fp_nd_huge: | ||
844 | subq.l #4,%a0 | ||
845 | printf PNORM,"%p(",1,%a0 | ||
846 | printx PNORM,%a0@ | ||
847 | printf PNORM,")\n" | ||
848 | rts | ||
849 | |||
850 | | fp_normalize_single: | ||
851 | | normalize an extended with single (23-bit) precision | ||
852 | | args: %a0 (struct fp_ext *) | ||
853 | |||
854 | fp_normalize_single: | ||
855 | printf PNORM,"ns: %p(",1,%a0 | ||
856 | printx PNORM,%a0@ | ||
857 | printf PNORM,") " | ||
858 | addq.l #2,%a0 | ||
859 | move.w (%a0)+,%d2 | ||
860 | jeq fp_ns_zero | zero / denormalized | ||
861 | cmp.w #0x7fff,%d2 | ||
862 | jeq fp_ns_huge | NaN / infinitive. | ||
863 | sub.w #0x4000-0x7f,%d2 | will the exponent fit? | ||
864 | jcs fp_ns_small | too small. | ||
865 | cmp.w #0xfe,%d2 | ||
866 | jcc fp_ns_large | too big. | ||
867 | move.l (%a0)+,%d0 | get high lword of mantissa | ||
868 | fp_ns_round: | ||
869 | tst.l (%a0) | check the low lword | ||
870 | jeq 1f | ||
871 | | Set a sticky bit if it is non-zero. This should only | ||
872 | | affect the rounding in what would otherwise be equal- | ||
873 | | distance situations, which is what we want it to do. | ||
874 | bset #0,%d0 | ||
875 | 1: clr.l (%a0) | zap it from memory. | ||
876 | | now, round off the low 8 bits of the hi lword. | ||
877 | tst.b %d0 | 8 low bits. | ||
878 | jne fp_ns_checkround | Are they non-zero? | ||
879 | | nothing to do here | ||
880 | subq.l #8,%a0 | ||
881 | printf PNORM,"%p(",1,%a0 | ||
882 | printx PNORM,%a0@ | ||
883 | printf PNORM,")\n" | ||
884 | rts | ||
885 | fp_ns_checkround: | ||
886 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | ||
887 | clr.b -(%a0) | clear low byte of high lword | ||
888 | subq.l #3,%a0 | ||
889 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | ||
890 | jne 2f | %d2 == 0, round to nearest | ||
891 | tst.b %d0 | test guard bit | ||
892 | jpl 9f | zero is closer | ||
893 | btst #8,%d0 | test lsb bit | ||
894 | | round to even behaviour, see above. | ||
895 | jne fp_ns_doroundup | round to infinity | ||
896 | lsl.b #1,%d0 | check low bits | ||
897 | jeq 9f | round to zero | ||
898 | fp_ns_doroundup: | ||
899 | | round (the mantissa, that is) towards infinity | ||
900 | add.l #0x100,(%a0) | ||
901 | jcc 9f | no overflow, good. | ||
902 | | Overflow. This means that the %d1 was 0xffffff00, so it | ||
903 | | is now zero. We will set the mantissa to reflect this, and | ||
904 | | increment the exponent (checking for overflow there too) | ||
905 | move.w #0x8000,(%a0) | ||
906 | addq.w #1,-(%a0) | ||
907 | cmp.w #0x407f,(%a0)+ | exponent now overflown? | ||
908 | jeq fp_ns_large | yes, so make it infinity. | ||
909 | 9: subq.l #4,%a0 | ||
910 | printf PNORM,"%p(",1,%a0 | ||
911 | printx PNORM,%a0@ | ||
912 | printf PNORM,")\n" | ||
913 | rts | ||
914 | | check nondefault rounding modes | ||
915 | 2: subq.w #2,%d2 | ||
916 | jcs 9b | %d2 < 2, round to zero | ||
917 | jhi 3f | %d2 > 2, round to +infinity | ||
918 | tst.b (-3,%a0) | to -inf | ||
919 | jne fp_ns_doroundup | negative, round to infinity | ||
920 | jra 9b | positive, round to zero | ||
921 | 3: tst.b (-3,%a0) | to +inf | ||
922 | jeq fp_ns_doroundup | positive, round to infinity | ||
923 | jra 9b | negative, round to zero | ||
924 | | Exponent underflow. Try to make a denormal, and set it to | ||
925 | | the smallest possible fraction if this fails. | ||
926 | fp_ns_small: | ||
927 | fp_set_sr FPSR_EXC_UNFL | set UNFL bit | ||
928 | move.w #0x3f81,(-2,%a0) | 2**-126 | ||
929 | neg.w %d2 | degree of underflow | ||
930 | cmp.w #32,%d2 | single or double shift? | ||
931 | jcc 2f | ||
932 | | a 32-bit shift. | ||
933 | move.l (%a0),%d0 | ||
934 | move.l %d0,%d1 | ||
935 | lsr.l %d2,%d0 | ||
936 | move.l %d0,(%a0)+ | ||
937 | | Check to see if we shifted off any significant bits. | ||
938 | neg.w %d2 | ||
939 | add.w #32,%d2 | ||
940 | lsl.l %d2,%d1 | ||
941 | jeq 1f | ||
942 | bset #0,%d0 | Sticky bit. | ||
943 | | Check the lower lword | ||
944 | 1: tst.l (%a0) | ||
945 | jeq fp_ns_round | ||
946 | clr (%a0) | ||
947 | bset #0,%d0 | Sticky bit. | ||
948 | jra fp_ns_round | ||
949 | | Sorry, the number is just too small. | ||
950 | 2: clr.l (%a0)+ | ||
951 | clr.l (%a0) | ||
952 | moveq #1,%d0 | Smallest possible fraction, | ||
953 | jra fp_ns_round | round as desired. | ||
954 | | Exponent overflow. Just call it infinity. | ||
955 | fp_ns_large: | ||
956 | tst.b (3,%a0) | ||
957 | jeq 1f | ||
958 | fp_set_sr FPSR_EXC_INEX2 | ||
959 | 1: fp_set_sr FPSR_EXC_OVFL | ||
960 | move.w (FPD_RND,FPDATA),%d2 | ||
961 | jne 3f | %d2 = 0 round to nearest | ||
962 | 1: move.w #0x7fff,(-2,%a0) | ||
963 | clr.l (%a0)+ | ||
964 | clr.l (%a0) | ||
965 | 2: subq.l #8,%a0 | ||
966 | printf PNORM,"%p(",1,%a0 | ||
967 | printx PNORM,%a0@ | ||
968 | printf PNORM,")\n" | ||
969 | rts | ||
970 | 3: subq.w #2,%d2 | ||
971 | jcs 5f | %d2 < 2, round to zero | ||
972 | jhi 4f | %d2 > 2, round to +infinity | ||
973 | tst.b (-3,%a0) | to -inf | ||
974 | jne 1b | ||
975 | jra 5f | ||
976 | 4: tst.b (-3,%a0) | to +inf | ||
977 | jeq 1b | ||
978 | 5: move.w #0x407e,(-2,%a0) | ||
979 | move.l #0xffffff00,(%a0)+ | ||
980 | clr.l (%a0) | ||
981 | jra 2b | ||
982 | | zero and denormalized | ||
983 | fp_ns_zero: | ||
984 | tst.l (%a0)+ | ||
985 | jne 1f | ||
986 | tst.l (%a0) | ||
987 | jne 1f | ||
988 | subq.l #8,%a0 | ||
989 | printf PNORM,"%p(",1,%a0 | ||
990 | printx PNORM,%a0@ | ||
991 | printf PNORM,")\n" | ||
992 | rts | zero. nothing to do. | ||
993 | | These are not merely subnormal numbers, but true denormals, | ||
994 | | i.e. pathologically small (exponent is 2**-16383) numbers. | ||
995 | | It is clearly impossible for even a normal extended number | ||
996 | | with that exponent to fit into single precision, so just | ||
997 | | write these ones off as "too darn small". | ||
998 | 1: fp_set_sr FPSR_EXC_UNFL | Set UNFL bit | ||
999 | clr.l (%a0) | ||
1000 | clr.l -(%a0) | ||
1001 | move.w #0x3f81,-(%a0) | i.e. 2**-126 | ||
1002 | addq.l #6,%a0 | ||
1003 | moveq #1,%d0 | ||
1004 | jra fp_ns_round | round. | ||
1005 | | Infinities or NaNs | ||
1006 | fp_ns_huge: | ||
1007 | subq.l #4,%a0 | ||
1008 | printf PNORM,"%p(",1,%a0 | ||
1009 | printx PNORM,%a0@ | ||
1010 | printf PNORM,")\n" | ||
1011 | rts | ||
1012 | |||
1013 | | fp_normalize_single_fast: | ||
1014 | | normalize an extended with single (23-bit) precision | ||
1015 | | this is only used by fsgldiv/fsgdlmul, where the | ||
1016 | | operand is not completly normalized. | ||
1017 | | args: %a0 (struct fp_ext *) | ||
1018 | |||
1019 | fp_normalize_single_fast: | ||
1020 | printf PNORM,"nsf: %p(",1,%a0 | ||
1021 | printx PNORM,%a0@ | ||
1022 | printf PNORM,") " | ||
1023 | addq.l #2,%a0 | ||
1024 | move.w (%a0)+,%d2 | ||
1025 | cmp.w #0x7fff,%d2 | ||
1026 | jeq fp_nsf_huge | NaN / infinitive. | ||
1027 | move.l (%a0)+,%d0 | get high lword of mantissa | ||
1028 | fp_nsf_round: | ||
1029 | tst.l (%a0) | check the low lword | ||
1030 | jeq 1f | ||
1031 | | Set a sticky bit if it is non-zero. This should only | ||
1032 | | affect the rounding in what would otherwise be equal- | ||
1033 | | distance situations, which is what we want it to do. | ||
1034 | bset #0,%d0 | ||
1035 | 1: clr.l (%a0) | zap it from memory. | ||
1036 | | now, round off the low 8 bits of the hi lword. | ||
1037 | tst.b %d0 | 8 low bits. | ||
1038 | jne fp_nsf_checkround | Are they non-zero? | ||
1039 | | nothing to do here | ||
1040 | subq.l #8,%a0 | ||
1041 | printf PNORM,"%p(",1,%a0 | ||
1042 | printx PNORM,%a0@ | ||
1043 | printf PNORM,")\n" | ||
1044 | rts | ||
1045 | fp_nsf_checkround: | ||
1046 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | ||
1047 | clr.b -(%a0) | clear low byte of high lword | ||
1048 | subq.l #3,%a0 | ||
1049 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | ||
1050 | jne 2f | %d2 == 0, round to nearest | ||
1051 | tst.b %d0 | test guard bit | ||
1052 | jpl 9f | zero is closer | ||
1053 | btst #8,%d0 | test lsb bit | ||
1054 | | round to even behaviour, see above. | ||
1055 | jne fp_nsf_doroundup | round to infinity | ||
1056 | lsl.b #1,%d0 | check low bits | ||
1057 | jeq 9f | round to zero | ||
1058 | fp_nsf_doroundup: | ||
1059 | | round (the mantissa, that is) towards infinity | ||
1060 | add.l #0x100,(%a0) | ||
1061 | jcc 9f | no overflow, good. | ||
1062 | | Overflow. This means that the %d1 was 0xffffff00, so it | ||
1063 | | is now zero. We will set the mantissa to reflect this, and | ||
1064 | | increment the exponent (checking for overflow there too) | ||
1065 | move.w #0x8000,(%a0) | ||
1066 | addq.w #1,-(%a0) | ||
1067 | cmp.w #0x407f,(%a0)+ | exponent now overflown? | ||
1068 | jeq fp_nsf_large | yes, so make it infinity. | ||
1069 | 9: subq.l #4,%a0 | ||
1070 | printf PNORM,"%p(",1,%a0 | ||
1071 | printx PNORM,%a0@ | ||
1072 | printf PNORM,")\n" | ||
1073 | rts | ||
1074 | | check nondefault rounding modes | ||
1075 | 2: subq.w #2,%d2 | ||
1076 | jcs 9b | %d2 < 2, round to zero | ||
1077 | jhi 3f | %d2 > 2, round to +infinity | ||
1078 | tst.b (-3,%a0) | to -inf | ||
1079 | jne fp_nsf_doroundup | negative, round to infinity | ||
1080 | jra 9b | positive, round to zero | ||
1081 | 3: tst.b (-3,%a0) | to +inf | ||
1082 | jeq fp_nsf_doroundup | positive, round to infinity | ||
1083 | jra 9b | negative, round to zero | ||
1084 | | Exponent overflow. Just call it infinity. | ||
1085 | fp_nsf_large: | ||
1086 | tst.b (3,%a0) | ||
1087 | jeq 1f | ||
1088 | fp_set_sr FPSR_EXC_INEX2 | ||
1089 | 1: fp_set_sr FPSR_EXC_OVFL | ||
1090 | move.w (FPD_RND,FPDATA),%d2 | ||
1091 | jne 3f | %d2 = 0 round to nearest | ||
1092 | 1: move.w #0x7fff,(-2,%a0) | ||
1093 | clr.l (%a0)+ | ||
1094 | clr.l (%a0) | ||
1095 | 2: subq.l #8,%a0 | ||
1096 | printf PNORM,"%p(",1,%a0 | ||
1097 | printx PNORM,%a0@ | ||
1098 | printf PNORM,")\n" | ||
1099 | rts | ||
1100 | 3: subq.w #2,%d2 | ||
1101 | jcs 5f | %d2 < 2, round to zero | ||
1102 | jhi 4f | %d2 > 2, round to +infinity | ||
1103 | tst.b (-3,%a0) | to -inf | ||
1104 | jne 1b | ||
1105 | jra 5f | ||
1106 | 4: tst.b (-3,%a0) | to +inf | ||
1107 | jeq 1b | ||
1108 | 5: move.w #0x407e,(-2,%a0) | ||
1109 | move.l #0xffffff00,(%a0)+ | ||
1110 | clr.l (%a0) | ||
1111 | jra 2b | ||
1112 | | Infinities or NaNs | ||
1113 | fp_nsf_huge: | ||
1114 | subq.l #4,%a0 | ||
1115 | printf PNORM,"%p(",1,%a0 | ||
1116 | printx PNORM,%a0@ | ||
1117 | printf PNORM,")\n" | ||
1118 | rts | ||
1119 | |||
1120 | | conv_ext2int (macro): | ||
1121 | | Generates a subroutine that converts an extended value to an | ||
1122 | | integer of a given size, again, with the appropriate type of | ||
1123 | | rounding. | ||
1124 | |||
1125 | | Macro arguments: | ||
1126 | | s: size, as given in an assembly instruction. | ||
1127 | | b: number of bits in that size. | ||
1128 | |||
1129 | | Subroutine arguments: | ||
1130 | | %a0: source (struct fp_ext *) | ||
1131 | |||
1132 | | Returns the integer in %d0 (like it should) | ||
1133 | |||
1134 | .macro conv_ext2int s,b | ||
1135 | .set inf,(1<<(\b-1))-1 | i.e. MAXINT | ||
1136 | printf PCONV,"e2i%d: %p(",2,#\b,%a0 | ||
1137 | printx PCONV,%a0@ | ||
1138 | printf PCONV,") " | ||
1139 | addq.l #2,%a0 | ||
1140 | move.w (%a0)+,%d2 | exponent | ||
1141 | jeq fp_e2i_zero\b | zero / denorm (== 0, here) | ||
1142 | cmp.w #0x7fff,%d2 | ||
1143 | jeq fp_e2i_huge\b | Inf / NaN | ||
1144 | sub.w #0x3ffe,%d2 | ||
1145 | jcs fp_e2i_small\b | ||
1146 | cmp.w #\b,%d2 | ||
1147 | jhi fp_e2i_large\b | ||
1148 | move.l (%a0),%d0 | ||
1149 | move.l %d0,%d1 | ||
1150 | lsl.l %d2,%d1 | ||
1151 | jne fp_e2i_round\b | ||
1152 | tst.l (4,%a0) | ||
1153 | jne fp_e2i_round\b | ||
1154 | neg.w %d2 | ||
1155 | add.w #32,%d2 | ||
1156 | lsr.l %d2,%d0 | ||
1157 | 9: tst.w (-4,%a0) | ||
1158 | jne 1f | ||
1159 | tst.\s %d0 | ||
1160 | jmi fp_e2i_large\b | ||
1161 | printf PCONV,"-> %p\n",1,%d0 | ||
1162 | rts | ||
1163 | 1: neg.\s %d0 | ||
1164 | jeq 1f | ||
1165 | jpl fp_e2i_large\b | ||
1166 | 1: printf PCONV,"-> %p\n",1,%d0 | ||
1167 | rts | ||
1168 | fp_e2i_round\b: | ||
1169 | fp_set_sr FPSR_EXC_INEX2 | INEX2 bit | ||
1170 | neg.w %d2 | ||
1171 | add.w #32,%d2 | ||
1172 | .if \b>16 | ||
1173 | jeq 5f | ||
1174 | .endif | ||
1175 | lsr.l %d2,%d0 | ||
1176 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | ||
1177 | jne 2f | %d2 == 0, round to nearest | ||
1178 | tst.l %d1 | test guard bit | ||
1179 | jpl 9b | zero is closer | ||
1180 | btst %d2,%d0 | test lsb bit (%d2 still 0) | ||
1181 | jne fp_e2i_doroundup\b | ||
1182 | lsl.l #1,%d1 | check low bits | ||
1183 | jne fp_e2i_doroundup\b | ||
1184 | tst.l (4,%a0) | ||
1185 | jeq 9b | ||
1186 | fp_e2i_doroundup\b: | ||
1187 | addq.l #1,%d0 | ||
1188 | jra 9b | ||
1189 | | check nondefault rounding modes | ||
1190 | 2: subq.w #2,%d2 | ||
1191 | jcs 9b | %d2 < 2, round to zero | ||
1192 | jhi 3f | %d2 > 2, round to +infinity | ||
1193 | tst.w (-4,%a0) | to -inf | ||
1194 | jne fp_e2i_doroundup\b | negative, round to infinity | ||
1195 | jra 9b | positive, round to zero | ||
1196 | 3: tst.w (-4,%a0) | to +inf | ||
1197 | jeq fp_e2i_doroundup\b | positive, round to infinity | ||
1198 | jra 9b | negative, round to zero | ||
1199 | | we are only want -2**127 get correctly rounded here, | ||
1200 | | since the guard bit is in the lower lword. | ||
1201 | | everything else ends up anyway as overflow. | ||
1202 | .if \b>16 | ||
1203 | 5: move.w (FPD_RND,FPDATA),%d2 | rounding mode | ||
1204 | jne 2b | %d2 == 0, round to nearest | ||
1205 | move.l (4,%a0),%d1 | test guard bit | ||
1206 | jpl 9b | zero is closer | ||
1207 | lsl.l #1,%d1 | check low bits | ||
1208 | jne fp_e2i_doroundup\b | ||
1209 | jra 9b | ||
1210 | .endif | ||
1211 | fp_e2i_zero\b: | ||
1212 | clr.l %d0 | ||
1213 | tst.l (%a0)+ | ||
1214 | jne 1f | ||
1215 | tst.l (%a0) | ||
1216 | jeq 3f | ||
1217 | 1: subq.l #4,%a0 | ||
1218 | fp_clr_sr FPSR_EXC_UNFL | fp_normalize_ext has set this bit | ||
1219 | fp_e2i_small\b: | ||
1220 | fp_set_sr FPSR_EXC_INEX2 | ||
1221 | clr.l %d0 | ||
1222 | move.w (FPD_RND,FPDATA),%d2 | rounding mode | ||
1223 | subq.w #2,%d2 | ||
1224 | jcs 3f | %d2 < 2, round to nearest/zero | ||
1225 | jhi 2f | %d2 > 2, round to +infinity | ||
1226 | tst.w (-4,%a0) | to -inf | ||
1227 | jeq 3f | ||
1228 | subq.\s #1,%d0 | ||
1229 | jra 3f | ||
1230 | 2: tst.w (-4,%a0) | to +inf | ||
1231 | jne 3f | ||
1232 | addq.\s #1,%d0 | ||
1233 | 3: printf PCONV,"-> %p\n",1,%d0 | ||
1234 | rts | ||
1235 | fp_e2i_large\b: | ||
1236 | fp_set_sr FPSR_EXC_OPERR | ||
1237 | move.\s #inf,%d0 | ||
1238 | tst.w (-4,%a0) | ||
1239 | jeq 1f | ||
1240 | addq.\s #1,%d0 | ||
1241 | 1: printf PCONV,"-> %p\n",1,%d0 | ||
1242 | rts | ||
1243 | fp_e2i_huge\b: | ||
1244 | move.\s (%a0),%d0 | ||
1245 | tst.l (%a0) | ||
1246 | jne 1f | ||
1247 | tst.l (%a0) | ||
1248 | jeq fp_e2i_large\b | ||
1249 | | fp_normalize_ext has set this bit already | ||
1250 | | and made the number nonsignaling | ||
1251 | 1: fp_tst_sr FPSR_EXC_SNAN | ||
1252 | jne 1f | ||
1253 | fp_set_sr FPSR_EXC_OPERR | ||
1254 | 1: printf PCONV,"-> %p\n",1,%d0 | ||
1255 | rts | ||
1256 | .endm | ||
1257 | |||
1258 | fp_conv_ext2long: | ||
1259 | conv_ext2int l,32 | ||
1260 | |||
1261 | fp_conv_ext2short: | ||
1262 | conv_ext2int w,16 | ||
1263 | |||
1264 | fp_conv_ext2byte: | ||
1265 | conv_ext2int b,8 | ||
1266 | |||
1267 | fp_conv_ext2double: | ||
1268 | jsr fp_normalize_double | ||
1269 | printf PCONV,"e2d: %p(",1,%a0 | ||
1270 | printx PCONV,%a0@ | ||
1271 | printf PCONV,"), " | ||
1272 | move.l (%a0)+,%d2 | ||
1273 | cmp.w #0x7fff,%d2 | ||
1274 | jne 1f | ||
1275 | move.w #0x7ff,%d2 | ||
1276 | move.l (%a0)+,%d0 | ||
1277 | jra 2f | ||
1278 | 1: sub.w #0x3fff-0x3ff,%d2 | ||
1279 | move.l (%a0)+,%d0 | ||
1280 | jmi 2f | ||
1281 | clr.w %d2 | ||
1282 | 2: lsl.w #5,%d2 | ||
1283 | lsl.l #7,%d2 | ||
1284 | lsl.l #8,%d2 | ||
1285 | move.l %d0,%d1 | ||
1286 | lsl.l #1,%d0 | ||
1287 | lsr.l #4,%d0 | ||
1288 | lsr.l #8,%d0 | ||
1289 | or.l %d2,%d0 | ||
1290 | putuser.l %d0,(%a1)+,fp_err_ua2,%a1 | ||
1291 | moveq #21,%d0 | ||
1292 | lsl.l %d0,%d1 | ||
1293 | move.l (%a0),%d0 | ||
1294 | lsr.l #4,%d0 | ||
1295 | lsr.l #7,%d0 | ||
1296 | or.l %d1,%d0 | ||
1297 | putuser.l %d0,(%a1),fp_err_ua2,%a1 | ||
1298 | #ifdef FPU_EMU_DEBUG | ||
1299 | getuser.l %a1@(-4),%d0,fp_err_ua2,%a1 | ||
1300 | getuser.l %a1@(0),%d1,fp_err_ua2,%a1 | ||
1301 | printf PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1 | ||
1302 | #endif | ||
1303 | rts | ||
1304 | |||
1305 | fp_conv_ext2single: | ||
1306 | jsr fp_normalize_single | ||
1307 | printf PCONV,"e2s: %p(",1,%a0 | ||
1308 | printx PCONV,%a0@ | ||
1309 | printf PCONV,"), " | ||
1310 | move.l (%a0)+,%d1 | ||
1311 | cmp.w #0x7fff,%d1 | ||
1312 | jne 1f | ||
1313 | move.w #0xff,%d1 | ||
1314 | move.l (%a0)+,%d0 | ||
1315 | jra 2f | ||
1316 | 1: sub.w #0x3fff-0x7f,%d1 | ||
1317 | move.l (%a0)+,%d0 | ||
1318 | jmi 2f | ||
1319 | clr.w %d1 | ||
1320 | 2: lsl.w #8,%d1 | ||
1321 | lsl.l #7,%d1 | ||
1322 | lsl.l #8,%d1 | ||
1323 | bclr #31,%d0 | ||
1324 | lsr.l #8,%d0 | ||
1325 | or.l %d1,%d0 | ||
1326 | printf PCONV,"%08x\n",1,%d0 | ||
1327 | rts | ||
1328 | |||
1329 | | special return addresses for instr that | ||
1330 | | encode the rounding precision in the opcode | ||
1331 | | (e.g. fsmove,fdmove) | ||
1332 | |||
1333 | fp_finalrounding_single: | ||
1334 | addq.l #8,%sp | ||
1335 | jsr fp_normalize_ext | ||
1336 | jsr fp_normalize_single | ||
1337 | jra fp_finaltest | ||
1338 | |||
1339 | fp_finalrounding_single_fast: | ||
1340 | addq.l #8,%sp | ||
1341 | jsr fp_normalize_ext | ||
1342 | jsr fp_normalize_single_fast | ||
1343 | jra fp_finaltest | ||
1344 | |||
1345 | fp_finalrounding_double: | ||
1346 | addq.l #8,%sp | ||
1347 | jsr fp_normalize_ext | ||
1348 | jsr fp_normalize_double | ||
1349 | jra fp_finaltest | ||
1350 | |||
1351 | | fp_finaltest: | ||
1352 | | set the emulated status register based on the outcome of an | ||
1353 | | emulated instruction. | ||
1354 | |||
1355 | fp_finalrounding: | ||
1356 | addq.l #8,%sp | ||
1357 | | printf ,"f: %p\n",1,%a0 | ||
1358 | jsr fp_normalize_ext | ||
1359 | move.w (FPD_PREC,FPDATA),%d0 | ||
1360 | subq.w #1,%d0 | ||
1361 | jcs fp_finaltest | ||
1362 | jne 1f | ||
1363 | jsr fp_normalize_single | ||
1364 | jra 2f | ||
1365 | 1: jsr fp_normalize_double | ||
1366 | 2:| printf ,"f: %p\n",1,%a0 | ||
1367 | fp_finaltest: | ||
1368 | | First, we do some of the obvious tests for the exception | ||
1369 | | status byte and condition code bytes of fp_sr here, so that | ||
1370 | | they do not have to be handled individually by every | ||
1371 | | emulated instruction. | ||
1372 | clr.l %d0 | ||
1373 | addq.l #1,%a0 | ||
1374 | tst.b (%a0)+ | sign | ||
1375 | jeq 1f | ||
1376 | bset #FPSR_CC_NEG-24,%d0 | N bit | ||
1377 | 1: cmp.w #0x7fff,(%a0)+ | exponent | ||
1378 | jeq 2f | ||
1379 | | test for zero | ||
1380 | moveq #FPSR_CC_Z-24,%d1 | ||
1381 | tst.l (%a0)+ | ||
1382 | jne 9f | ||
1383 | tst.l (%a0) | ||
1384 | jne 9f | ||
1385 | jra 8f | ||
1386 | | infinitiv and NAN | ||
1387 | 2: moveq #FPSR_CC_NAN-24,%d1 | ||
1388 | move.l (%a0)+,%d2 | ||
1389 | lsl.l #1,%d2 | ignore high bit | ||
1390 | jne 8f | ||
1391 | tst.l (%a0) | ||
1392 | jne 8f | ||
1393 | moveq #FPSR_CC_INF-24,%d1 | ||
1394 | 8: bset %d1,%d0 | ||
1395 | 9: move.b %d0,(FPD_FPSR+0,FPDATA) | set condition test result | ||
1396 | | move instructions enter here | ||
1397 | | Here, we test things in the exception status byte, and set | ||
1398 | | other things in the accrued exception byte accordingly. | ||
1399 | | Emulated instructions can set various things in the former, | ||
1400 | | as defined in fp_emu.h. | ||
1401 | fp_final: | ||
1402 | move.l (FPD_FPSR,FPDATA),%d0 | ||
1403 | #if 0 | ||
1404 | btst #FPSR_EXC_SNAN,%d0 | EXC_SNAN | ||
1405 | jne 1f | ||
1406 | btst #FPSR_EXC_OPERR,%d0 | EXC_OPERR | ||
1407 | jeq 2f | ||
1408 | 1: bset #FPSR_AEXC_IOP,%d0 | set IOP bit | ||
1409 | 2: btst #FPSR_EXC_OVFL,%d0 | EXC_OVFL | ||
1410 | jeq 1f | ||
1411 | bset #FPSR_AEXC_OVFL,%d0 | set OVFL bit | ||
1412 | 1: btst #FPSR_EXC_UNFL,%d0 | EXC_UNFL | ||
1413 | jeq 1f | ||
1414 | btst #FPSR_EXC_INEX2,%d0 | EXC_INEX2 | ||
1415 | jeq 1f | ||
1416 | bset #FPSR_AEXC_UNFL,%d0 | set UNFL bit | ||
1417 | 1: btst #FPSR_EXC_DZ,%d0 | EXC_INEX1 | ||
1418 | jeq 1f | ||
1419 | bset #FPSR_AEXC_DZ,%d0 | set DZ bit | ||
1420 | 1: btst #FPSR_EXC_OVFL,%d0 | EXC_OVFL | ||
1421 | jne 1f | ||
1422 | btst #FPSR_EXC_INEX2,%d0 | EXC_INEX2 | ||
1423 | jne 1f | ||
1424 | btst #FPSR_EXC_INEX1,%d0 | EXC_INEX1 | ||
1425 | jeq 2f | ||
1426 | 1: bset #FPSR_AEXC_INEX,%d0 | set INEX bit | ||
1427 | 2: move.l %d0,(FPD_FPSR,FPDATA) | ||
1428 | #else | ||
1429 | | same as above, greatly optimized, but untested (yet) | ||
1430 | move.l %d0,%d2 | ||
1431 | lsr.l #5,%d0 | ||
1432 | move.l %d0,%d1 | ||
1433 | lsr.l #4,%d1 | ||
1434 | or.l %d0,%d1 | ||
1435 | and.b #0x08,%d1 | ||
1436 | move.l %d2,%d0 | ||
1437 | lsr.l #6,%d0 | ||
1438 | or.l %d1,%d0 | ||
1439 | move.l %d2,%d1 | ||
1440 | lsr.l #4,%d1 | ||
1441 | or.b #0xdf,%d1 | ||
1442 | and.b %d1,%d0 | ||
1443 | move.l %d2,%d1 | ||
1444 | lsr.l #7,%d1 | ||
1445 | and.b #0x80,%d1 | ||
1446 | or.b %d1,%d0 | ||
1447 | and.b #0xf8,%d0 | ||
1448 | or.b %d0,%d2 | ||
1449 | move.l %d2,(FPD_FPSR,FPDATA) | ||
1450 | #endif | ||
1451 | move.b (FPD_FPSR+2,FPDATA),%d0 | ||
1452 | and.b (FPD_FPCR+2,FPDATA),%d0 | ||
1453 | jeq 1f | ||
1454 | printf ,"send signal!!!\n" | ||
1455 | 1: jra fp_end | ||
diff --git a/arch/m68k/math-emu/multi_arith.h b/arch/m68k/math-emu/multi_arith.h new file mode 100644 index 000000000000..02251e5afd89 --- /dev/null +++ b/arch/m68k/math-emu/multi_arith.h | |||
@@ -0,0 +1,819 @@ | |||
1 | /* multi_arith.h: multi-precision integer arithmetic functions, needed | ||
2 | to do extended-precision floating point. | ||
3 | |||
4 | (c) 1998 David Huggins-Daines. | ||
5 | |||
6 | Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c) | ||
7 | David Mosberger-Tang. | ||
8 | |||
9 | You may copy, modify, and redistribute this file under the terms of | ||
10 | the GNU General Public License, version 2, or any later version, at | ||
11 | your convenience. */ | ||
12 | |||
13 | /* Note: | ||
14 | |||
15 | These are not general multi-precision math routines. Rather, they | ||
16 | implement the subset of integer arithmetic that we need in order to | ||
17 | multiply, divide, and normalize 128-bit unsigned mantissae. */ | ||
18 | |||
19 | #ifndef MULTI_ARITH_H | ||
20 | #define MULTI_ARITH_H | ||
21 | |||
22 | #if 0 /* old code... */ | ||
23 | |||
24 | /* Unsigned only, because we don't need signs to multiply and divide. */ | ||
25 | typedef unsigned int int128[4]; | ||
26 | |||
27 | /* Word order */ | ||
28 | enum { | ||
29 | MSW128, | ||
30 | NMSW128, | ||
31 | NLSW128, | ||
32 | LSW128 | ||
33 | }; | ||
34 | |||
35 | /* big-endian */ | ||
36 | #define LO_WORD(ll) (((unsigned int *) &ll)[1]) | ||
37 | #define HI_WORD(ll) (((unsigned int *) &ll)[0]) | ||
38 | |||
39 | /* Convenience functions to stuff various integer values into int128s */ | ||
40 | |||
41 | static inline void zero128(int128 a) | ||
42 | { | ||
43 | a[LSW128] = a[NLSW128] = a[NMSW128] = a[MSW128] = 0; | ||
44 | } | ||
45 | |||
46 | /* Human-readable word order in the arguments */ | ||
47 | static inline void set128(unsigned int i3, unsigned int i2, unsigned int i1, | ||
48 | unsigned int i0, int128 a) | ||
49 | { | ||
50 | a[LSW128] = i0; | ||
51 | a[NLSW128] = i1; | ||
52 | a[NMSW128] = i2; | ||
53 | a[MSW128] = i3; | ||
54 | } | ||
55 | |||
56 | /* Convenience functions (for testing as well) */ | ||
57 | static inline void int64_to_128(unsigned long long src, int128 dest) | ||
58 | { | ||
59 | dest[LSW128] = (unsigned int) src; | ||
60 | dest[NLSW128] = src >> 32; | ||
61 | dest[NMSW128] = dest[MSW128] = 0; | ||
62 | } | ||
63 | |||
64 | static inline void int128_to_64(const int128 src, unsigned long long *dest) | ||
65 | { | ||
66 | *dest = src[LSW128] | (long long) src[NLSW128] << 32; | ||
67 | } | ||
68 | |||
69 | static inline void put_i128(const int128 a) | ||
70 | { | ||
71 | printk("%08x %08x %08x %08x\n", a[MSW128], a[NMSW128], | ||
72 | a[NLSW128], a[LSW128]); | ||
73 | } | ||
74 | |||
75 | /* Internal shifters: | ||
76 | |||
77 | Note that these are only good for 0 < count < 32. | ||
78 | */ | ||
79 | |||
80 | static inline void _lsl128(unsigned int count, int128 a) | ||
81 | { | ||
82 | a[MSW128] = (a[MSW128] << count) | (a[NMSW128] >> (32 - count)); | ||
83 | a[NMSW128] = (a[NMSW128] << count) | (a[NLSW128] >> (32 - count)); | ||
84 | a[NLSW128] = (a[NLSW128] << count) | (a[LSW128] >> (32 - count)); | ||
85 | a[LSW128] <<= count; | ||
86 | } | ||
87 | |||
88 | static inline void _lsr128(unsigned int count, int128 a) | ||
89 | { | ||
90 | a[LSW128] = (a[LSW128] >> count) | (a[NLSW128] << (32 - count)); | ||
91 | a[NLSW128] = (a[NLSW128] >> count) | (a[NMSW128] << (32 - count)); | ||
92 | a[NMSW128] = (a[NMSW128] >> count) | (a[MSW128] << (32 - count)); | ||
93 | a[MSW128] >>= count; | ||
94 | } | ||
95 | |||
96 | /* Should be faster, one would hope */ | ||
97 | |||
98 | static inline void lslone128(int128 a) | ||
99 | { | ||
100 | asm volatile ("lsl.l #1,%0\n" | ||
101 | "roxl.l #1,%1\n" | ||
102 | "roxl.l #1,%2\n" | ||
103 | "roxl.l #1,%3\n" | ||
104 | : | ||
105 | "=d" (a[LSW128]), | ||
106 | "=d"(a[NLSW128]), | ||
107 | "=d"(a[NMSW128]), | ||
108 | "=d"(a[MSW128]) | ||
109 | : | ||
110 | "0"(a[LSW128]), | ||
111 | "1"(a[NLSW128]), | ||
112 | "2"(a[NMSW128]), | ||
113 | "3"(a[MSW128])); | ||
114 | } | ||
115 | |||
116 | static inline void lsrone128(int128 a) | ||
117 | { | ||
118 | asm volatile ("lsr.l #1,%0\n" | ||
119 | "roxr.l #1,%1\n" | ||
120 | "roxr.l #1,%2\n" | ||
121 | "roxr.l #1,%3\n" | ||
122 | : | ||
123 | "=d" (a[MSW128]), | ||
124 | "=d"(a[NMSW128]), | ||
125 | "=d"(a[NLSW128]), | ||
126 | "=d"(a[LSW128]) | ||
127 | : | ||
128 | "0"(a[MSW128]), | ||
129 | "1"(a[NMSW128]), | ||
130 | "2"(a[NLSW128]), | ||
131 | "3"(a[LSW128])); | ||
132 | } | ||
133 | |||
134 | /* Generalized 128-bit shifters: | ||
135 | |||
136 | These bit-shift to a multiple of 32, then move whole longwords. */ | ||
137 | |||
138 | static inline void lsl128(unsigned int count, int128 a) | ||
139 | { | ||
140 | int wordcount, i; | ||
141 | |||
142 | if (count % 32) | ||
143 | _lsl128(count % 32, a); | ||
144 | |||
145 | if (0 == (wordcount = count / 32)) | ||
146 | return; | ||
147 | |||
148 | /* argh, gak, endian-sensitive */ | ||
149 | for (i = 0; i < 4 - wordcount; i++) { | ||
150 | a[i] = a[i + wordcount]; | ||
151 | } | ||
152 | for (i = 3; i >= 4 - wordcount; --i) { | ||
153 | a[i] = 0; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | static inline void lsr128(unsigned int count, int128 a) | ||
158 | { | ||
159 | int wordcount, i; | ||
160 | |||
161 | if (count % 32) | ||
162 | _lsr128(count % 32, a); | ||
163 | |||
164 | if (0 == (wordcount = count / 32)) | ||
165 | return; | ||
166 | |||
167 | for (i = 3; i >= wordcount; --i) { | ||
168 | a[i] = a[i - wordcount]; | ||
169 | } | ||
170 | for (i = 0; i < wordcount; i++) { | ||
171 | a[i] = 0; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | static inline int orl128(int a, int128 b) | ||
176 | { | ||
177 | b[LSW128] |= a; | ||
178 | } | ||
179 | |||
180 | static inline int btsthi128(const int128 a) | ||
181 | { | ||
182 | return a[MSW128] & 0x80000000; | ||
183 | } | ||
184 | |||
185 | /* test bits (numbered from 0 = LSB) up to and including "top" */ | ||
186 | static inline int bftestlo128(int top, const int128 a) | ||
187 | { | ||
188 | int r = 0; | ||
189 | |||
190 | if (top > 31) | ||
191 | r |= a[LSW128]; | ||
192 | if (top > 63) | ||
193 | r |= a[NLSW128]; | ||
194 | if (top > 95) | ||
195 | r |= a[NMSW128]; | ||
196 | |||
197 | r |= a[3 - (top / 32)] & ((1 << (top % 32 + 1)) - 1); | ||
198 | |||
199 | return (r != 0); | ||
200 | } | ||
201 | |||
202 | /* Aargh. We need these because GCC is broken */ | ||
203 | /* FIXME: do them in assembly, for goodness' sake! */ | ||
204 | static inline void mask64(int pos, unsigned long long *mask) | ||
205 | { | ||
206 | *mask = 0; | ||
207 | |||
208 | if (pos < 32) { | ||
209 | LO_WORD(*mask) = (1 << pos) - 1; | ||
210 | return; | ||
211 | } | ||
212 | LO_WORD(*mask) = -1; | ||
213 | HI_WORD(*mask) = (1 << (pos - 32)) - 1; | ||
214 | } | ||
215 | |||
216 | static inline void bset64(int pos, unsigned long long *dest) | ||
217 | { | ||
218 | /* This conditional will be optimized away. Thanks, GCC! */ | ||
219 | if (pos < 32) | ||
220 | asm volatile ("bset %1,%0":"=m" | ||
221 | (LO_WORD(*dest)):"id"(pos)); | ||
222 | else | ||
223 | asm volatile ("bset %1,%0":"=m" | ||
224 | (HI_WORD(*dest)):"id"(pos - 32)); | ||
225 | } | ||
226 | |||
227 | static inline int btst64(int pos, unsigned long long dest) | ||
228 | { | ||
229 | if (pos < 32) | ||
230 | return (0 != (LO_WORD(dest) & (1 << pos))); | ||
231 | else | ||
232 | return (0 != (HI_WORD(dest) & (1 << (pos - 32)))); | ||
233 | } | ||
234 | |||
235 | static inline void lsl64(int count, unsigned long long *dest) | ||
236 | { | ||
237 | if (count < 32) { | ||
238 | HI_WORD(*dest) = (HI_WORD(*dest) << count) | ||
239 | | (LO_WORD(*dest) >> count); | ||
240 | LO_WORD(*dest) <<= count; | ||
241 | return; | ||
242 | } | ||
243 | count -= 32; | ||
244 | HI_WORD(*dest) = LO_WORD(*dest) << count; | ||
245 | LO_WORD(*dest) = 0; | ||
246 | } | ||
247 | |||
248 | static inline void lsr64(int count, unsigned long long *dest) | ||
249 | { | ||
250 | if (count < 32) { | ||
251 | LO_WORD(*dest) = (LO_WORD(*dest) >> count) | ||
252 | | (HI_WORD(*dest) << (32 - count)); | ||
253 | HI_WORD(*dest) >>= count; | ||
254 | return; | ||
255 | } | ||
256 | count -= 32; | ||
257 | LO_WORD(*dest) = HI_WORD(*dest) >> count; | ||
258 | HI_WORD(*dest) = 0; | ||
259 | } | ||
260 | #endif | ||
261 | |||
262 | static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt) | ||
263 | { | ||
264 | reg->exp += cnt; | ||
265 | |||
266 | switch (cnt) { | ||
267 | case 0 ... 8: | ||
268 | reg->lowmant = reg->mant.m32[1] << (8 - cnt); | ||
269 | reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) | | ||
270 | (reg->mant.m32[0] << (32 - cnt)); | ||
271 | reg->mant.m32[0] = reg->mant.m32[0] >> cnt; | ||
272 | break; | ||
273 | case 9 ... 32: | ||
274 | reg->lowmant = reg->mant.m32[1] >> (cnt - 8); | ||
275 | if (reg->mant.m32[1] << (40 - cnt)) | ||
276 | reg->lowmant |= 1; | ||
277 | reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) | | ||
278 | (reg->mant.m32[0] << (32 - cnt)); | ||
279 | reg->mant.m32[0] = reg->mant.m32[0] >> cnt; | ||
280 | break; | ||
281 | case 33 ... 39: | ||
282 | asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg->lowmant) | ||
283 | : "m" (reg->mant.m32[0]), "d" (64 - cnt)); | ||
284 | if (reg->mant.m32[1] << (40 - cnt)) | ||
285 | reg->lowmant |= 1; | ||
286 | reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32); | ||
287 | reg->mant.m32[0] = 0; | ||
288 | break; | ||
289 | case 40 ... 71: | ||
290 | reg->lowmant = reg->mant.m32[0] >> (cnt - 40); | ||
291 | if ((reg->mant.m32[0] << (72 - cnt)) || reg->mant.m32[1]) | ||
292 | reg->lowmant |= 1; | ||
293 | reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32); | ||
294 | reg->mant.m32[0] = 0; | ||
295 | break; | ||
296 | default: | ||
297 | reg->lowmant = reg->mant.m32[0] || reg->mant.m32[1]; | ||
298 | reg->mant.m32[0] = 0; | ||
299 | reg->mant.m32[1] = 0; | ||
300 | break; | ||
301 | } | ||
302 | } | ||
303 | |||
304 | static inline int fp_overnormalize(struct fp_ext *reg) | ||
305 | { | ||
306 | int shift; | ||
307 | |||
308 | if (reg->mant.m32[0]) { | ||
309 | asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[0])); | ||
310 | reg->mant.m32[0] = (reg->mant.m32[0] << shift) | (reg->mant.m32[1] >> (32 - shift)); | ||
311 | reg->mant.m32[1] = (reg->mant.m32[1] << shift); | ||
312 | } else { | ||
313 | asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[1])); | ||
314 | reg->mant.m32[0] = (reg->mant.m32[1] << shift); | ||
315 | reg->mant.m32[1] = 0; | ||
316 | shift += 32; | ||
317 | } | ||
318 | |||
319 | return shift; | ||
320 | } | ||
321 | |||
322 | static inline int fp_addmant(struct fp_ext *dest, struct fp_ext *src) | ||
323 | { | ||
324 | int carry; | ||
325 | |||
326 | /* we assume here, gcc only insert move and a clr instr */ | ||
327 | asm volatile ("add.b %1,%0" : "=d,g" (dest->lowmant) | ||
328 | : "g,d" (src->lowmant), "0,0" (dest->lowmant)); | ||
329 | asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[1]) | ||
330 | : "d" (src->mant.m32[1]), "0" (dest->mant.m32[1])); | ||
331 | asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[0]) | ||
332 | : "d" (src->mant.m32[0]), "0" (dest->mant.m32[0])); | ||
333 | asm volatile ("addx.l %0,%0" : "=d" (carry) : "0" (0)); | ||
334 | |||
335 | return carry; | ||
336 | } | ||
337 | |||
338 | static inline int fp_addcarry(struct fp_ext *reg) | ||
339 | { | ||
340 | if (++reg->exp == 0x7fff) { | ||
341 | if (reg->mant.m64) | ||
342 | fp_set_sr(FPSR_EXC_INEX2); | ||
343 | reg->mant.m64 = 0; | ||
344 | fp_set_sr(FPSR_EXC_OVFL); | ||
345 | return 0; | ||
346 | } | ||
347 | reg->lowmant = (reg->mant.m32[1] << 7) | (reg->lowmant ? 1 : 0); | ||
348 | reg->mant.m32[1] = (reg->mant.m32[1] >> 1) | | ||
349 | (reg->mant.m32[0] << 31); | ||
350 | reg->mant.m32[0] = (reg->mant.m32[0] >> 1) | 0x80000000; | ||
351 | |||
352 | return 1; | ||
353 | } | ||
354 | |||
355 | static inline void fp_submant(struct fp_ext *dest, struct fp_ext *src1, | ||
356 | struct fp_ext *src2) | ||
357 | { | ||
358 | /* we assume here, gcc only insert move and a clr instr */ | ||
359 | asm volatile ("sub.b %1,%0" : "=d,g" (dest->lowmant) | ||
360 | : "g,d" (src2->lowmant), "0,0" (src1->lowmant)); | ||
361 | asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[1]) | ||
362 | : "d" (src2->mant.m32[1]), "0" (src1->mant.m32[1])); | ||
363 | asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[0]) | ||
364 | : "d" (src2->mant.m32[0]), "0" (src1->mant.m32[0])); | ||
365 | } | ||
366 | |||
367 | #define fp_mul64(desth, destl, src1, src2) ({ \ | ||
368 | asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth) \ | ||
369 | : "g" (src1), "0" (src2)); \ | ||
370 | }) | ||
371 | #define fp_div64(quot, rem, srch, srcl, div) \ | ||
372 | asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem) \ | ||
373 | : "dm" (div), "1" (srch), "0" (srcl)) | ||
374 | #define fp_add64(dest1, dest2, src1, src2) ({ \ | ||
375 | asm ("add.l %1,%0" : "=d,dm" (dest2) \ | ||
376 | : "dm,d" (src2), "0,0" (dest2)); \ | ||
377 | asm ("addx.l %1,%0" : "=d" (dest1) \ | ||
378 | : "d" (src1), "0" (dest1)); \ | ||
379 | }) | ||
380 | #define fp_addx96(dest, src) ({ \ | ||
381 | /* we assume here, gcc only insert move and a clr instr */ \ | ||
382 | asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2]) \ | ||
383 | : "g,d" (temp.m32[1]), "0,0" (dest->m32[2])); \ | ||
384 | asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1]) \ | ||
385 | : "d" (temp.m32[0]), "0" (dest->m32[1])); \ | ||
386 | asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0]) \ | ||
387 | : "d" (0), "0" (dest->m32[0])); \ | ||
388 | }) | ||
389 | #define fp_sub64(dest, src) ({ \ | ||
390 | asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1]) \ | ||
391 | : "dm,d" (src.m32[1]), "0,0" (dest.m32[1])); \ | ||
392 | asm ("subx.l %1,%0" : "=d" (dest.m32[0]) \ | ||
393 | : "d" (src.m32[0]), "0" (dest.m32[0])); \ | ||
394 | }) | ||
395 | #define fp_sub96c(dest, srch, srcm, srcl) ({ \ | ||
396 | char carry; \ | ||
397 | asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2]) \ | ||
398 | : "dm,d" (srcl), "0,0" (dest.m32[2])); \ | ||
399 | asm ("subx.l %1,%0" : "=d" (dest.m32[1]) \ | ||
400 | : "d" (srcm), "0" (dest.m32[1])); \ | ||
401 | asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0]) \ | ||
402 | : "d" (srch), "1" (dest.m32[0])); \ | ||
403 | carry; \ | ||
404 | }) | ||
405 | |||
406 | static inline void fp_multiplymant(union fp_mant128 *dest, struct fp_ext *src1, | ||
407 | struct fp_ext *src2) | ||
408 | { | ||
409 | union fp_mant64 temp; | ||
410 | |||
411 | fp_mul64(dest->m32[0], dest->m32[1], src1->mant.m32[0], src2->mant.m32[0]); | ||
412 | fp_mul64(dest->m32[2], dest->m32[3], src1->mant.m32[1], src2->mant.m32[1]); | ||
413 | |||
414 | fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[0], src2->mant.m32[1]); | ||
415 | fp_addx96(dest, temp); | ||
416 | |||
417 | fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[1], src2->mant.m32[0]); | ||
418 | fp_addx96(dest, temp); | ||
419 | } | ||
420 | |||
421 | static inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src, | ||
422 | struct fp_ext *div) | ||
423 | { | ||
424 | union fp_mant128 tmp; | ||
425 | union fp_mant64 tmp64; | ||
426 | unsigned long *mantp = dest->m32; | ||
427 | unsigned long fix, rem, first, dummy; | ||
428 | int i; | ||
429 | |||
430 | /* the algorithm below requires dest to be smaller than div, | ||
431 | but both have the high bit set */ | ||
432 | if (src->mant.m64 >= div->mant.m64) { | ||
433 | fp_sub64(src->mant, div->mant); | ||
434 | *mantp = 1; | ||
435 | } else | ||
436 | *mantp = 0; | ||
437 | mantp++; | ||
438 | |||
439 | /* basic idea behind this algorithm: we can't divide two 64bit numbers | ||
440 | (AB/CD) directly, but we can calculate AB/C0, but this means this | ||
441 | quotient is off by C0/CD, so we have to multiply the first result | ||
442 | to fix the result, after that we have nearly the correct result | ||
443 | and only a few corrections are needed. */ | ||
444 | |||
445 | /* C0/CD can be precalculated, but it's an 64bit division again, but | ||
446 | we can make it a bit easier, by dividing first through C so we get | ||
447 | 10/1D and now only a single shift and the value fits into 32bit. */ | ||
448 | fix = 0x80000000; | ||
449 | dummy = div->mant.m32[1] / div->mant.m32[0] + 1; | ||
450 | dummy = (dummy >> 1) | fix; | ||
451 | fp_div64(fix, dummy, fix, 0, dummy); | ||
452 | fix--; | ||
453 | |||
454 | for (i = 0; i < 3; i++, mantp++) { | ||
455 | if (src->mant.m32[0] == div->mant.m32[0]) { | ||
456 | fp_div64(first, rem, 0, src->mant.m32[1], div->mant.m32[0]); | ||
457 | |||
458 | fp_mul64(*mantp, dummy, first, fix); | ||
459 | *mantp += fix; | ||
460 | } else { | ||
461 | fp_div64(first, rem, src->mant.m32[0], src->mant.m32[1], div->mant.m32[0]); | ||
462 | |||
463 | fp_mul64(*mantp, dummy, first, fix); | ||
464 | } | ||
465 | |||
466 | fp_mul64(tmp.m32[0], tmp.m32[1], div->mant.m32[0], first - *mantp); | ||
467 | fp_add64(tmp.m32[0], tmp.m32[1], 0, rem); | ||
468 | tmp.m32[2] = 0; | ||
469 | |||
470 | fp_mul64(tmp64.m32[0], tmp64.m32[1], *mantp, div->mant.m32[1]); | ||
471 | fp_sub96c(tmp, 0, tmp64.m32[0], tmp64.m32[1]); | ||
472 | |||
473 | src->mant.m32[0] = tmp.m32[1]; | ||
474 | src->mant.m32[1] = tmp.m32[2]; | ||
475 | |||
476 | while (!fp_sub96c(tmp, 0, div->mant.m32[0], div->mant.m32[1])) { | ||
477 | src->mant.m32[0] = tmp.m32[1]; | ||
478 | src->mant.m32[1] = tmp.m32[2]; | ||
479 | *mantp += 1; | ||
480 | } | ||
481 | } | ||
482 | } | ||
483 | |||
484 | #if 0 | ||
485 | static inline unsigned int fp_fls128(union fp_mant128 *src) | ||
486 | { | ||
487 | unsigned long data; | ||
488 | unsigned int res, off; | ||
489 | |||
490 | if ((data = src->m32[0])) | ||
491 | off = 0; | ||
492 | else if ((data = src->m32[1])) | ||
493 | off = 32; | ||
494 | else if ((data = src->m32[2])) | ||
495 | off = 64; | ||
496 | else if ((data = src->m32[3])) | ||
497 | off = 96; | ||
498 | else | ||
499 | return 128; | ||
500 | |||
501 | asm ("bfffo %1{#0,#32},%0" : "=d" (res) : "dm" (data)); | ||
502 | return res + off; | ||
503 | } | ||
504 | |||
505 | static inline void fp_shiftmant128(union fp_mant128 *src, int shift) | ||
506 | { | ||
507 | unsigned long sticky; | ||
508 | |||
509 | switch (shift) { | ||
510 | case 0: | ||
511 | return; | ||
512 | case 1: | ||
513 | asm volatile ("lsl.l #1,%0" | ||
514 | : "=d" (src->m32[3]) : "0" (src->m32[3])); | ||
515 | asm volatile ("roxl.l #1,%0" | ||
516 | : "=d" (src->m32[2]) : "0" (src->m32[2])); | ||
517 | asm volatile ("roxl.l #1,%0" | ||
518 | : "=d" (src->m32[1]) : "0" (src->m32[1])); | ||
519 | asm volatile ("roxl.l #1,%0" | ||
520 | : "=d" (src->m32[0]) : "0" (src->m32[0])); | ||
521 | return; | ||
522 | case 2 ... 31: | ||
523 | src->m32[0] = (src->m32[0] << shift) | (src->m32[1] >> (32 - shift)); | ||
524 | src->m32[1] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift)); | ||
525 | src->m32[2] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift)); | ||
526 | src->m32[3] = (src->m32[3] << shift); | ||
527 | return; | ||
528 | case 32 ... 63: | ||
529 | shift -= 32; | ||
530 | src->m32[0] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift)); | ||
531 | src->m32[1] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift)); | ||
532 | src->m32[2] = (src->m32[3] << shift); | ||
533 | src->m32[3] = 0; | ||
534 | return; | ||
535 | case 64 ... 95: | ||
536 | shift -= 64; | ||
537 | src->m32[0] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift)); | ||
538 | src->m32[1] = (src->m32[3] << shift); | ||
539 | src->m32[2] = src->m32[3] = 0; | ||
540 | return; | ||
541 | case 96 ... 127: | ||
542 | shift -= 96; | ||
543 | src->m32[0] = (src->m32[3] << shift); | ||
544 | src->m32[1] = src->m32[2] = src->m32[3] = 0; | ||
545 | return; | ||
546 | case -31 ... -1: | ||
547 | shift = -shift; | ||
548 | sticky = 0; | ||
549 | if (src->m32[3] << (32 - shift)) | ||
550 | sticky = 1; | ||
551 | src->m32[3] = (src->m32[3] >> shift) | (src->m32[2] << (32 - shift)) | sticky; | ||
552 | src->m32[2] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift)); | ||
553 | src->m32[1] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift)); | ||
554 | src->m32[0] = (src->m32[0] >> shift); | ||
555 | return; | ||
556 | case -63 ... -32: | ||
557 | shift = -shift - 32; | ||
558 | sticky = 0; | ||
559 | if ((src->m32[2] << (32 - shift)) || src->m32[3]) | ||
560 | sticky = 1; | ||
561 | src->m32[3] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift)) | sticky; | ||
562 | src->m32[2] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift)); | ||
563 | src->m32[1] = (src->m32[0] >> shift); | ||
564 | src->m32[0] = 0; | ||
565 | return; | ||
566 | case -95 ... -64: | ||
567 | shift = -shift - 64; | ||
568 | sticky = 0; | ||
569 | if ((src->m32[1] << (32 - shift)) || src->m32[2] || src->m32[3]) | ||
570 | sticky = 1; | ||
571 | src->m32[3] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift)) | sticky; | ||
572 | src->m32[2] = (src->m32[0] >> shift); | ||
573 | src->m32[1] = src->m32[0] = 0; | ||
574 | return; | ||
575 | case -127 ... -96: | ||
576 | shift = -shift - 96; | ||
577 | sticky = 0; | ||
578 | if ((src->m32[0] << (32 - shift)) || src->m32[1] || src->m32[2] || src->m32[3]) | ||
579 | sticky = 1; | ||
580 | src->m32[3] = (src->m32[0] >> shift) | sticky; | ||
581 | src->m32[2] = src->m32[1] = src->m32[0] = 0; | ||
582 | return; | ||
583 | } | ||
584 | |||
585 | if (shift < 0 && (src->m32[0] || src->m32[1] || src->m32[2] || src->m32[3])) | ||
586 | src->m32[3] = 1; | ||
587 | else | ||
588 | src->m32[3] = 0; | ||
589 | src->m32[2] = 0; | ||
590 | src->m32[1] = 0; | ||
591 | src->m32[0] = 0; | ||
592 | } | ||
593 | #endif | ||
594 | |||
595 | static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src, | ||
596 | int shift) | ||
597 | { | ||
598 | unsigned long tmp; | ||
599 | |||
600 | switch (shift) { | ||
601 | case 0: | ||
602 | dest->mant.m64 = src->m64[0]; | ||
603 | dest->lowmant = src->m32[2] >> 24; | ||
604 | if (src->m32[3] || (src->m32[2] << 8)) | ||
605 | dest->lowmant |= 1; | ||
606 | break; | ||
607 | case 1: | ||
608 | asm volatile ("lsl.l #1,%0" | ||
609 | : "=d" (tmp) : "0" (src->m32[2])); | ||
610 | asm volatile ("roxl.l #1,%0" | ||
611 | : "=d" (dest->mant.m32[1]) : "0" (src->m32[1])); | ||
612 | asm volatile ("roxl.l #1,%0" | ||
613 | : "=d" (dest->mant.m32[0]) : "0" (src->m32[0])); | ||
614 | dest->lowmant = tmp >> 24; | ||
615 | if (src->m32[3] || (tmp << 8)) | ||
616 | dest->lowmant |= 1; | ||
617 | break; | ||
618 | case 31: | ||
619 | asm volatile ("lsr.l #1,%1; roxr.l #1,%0" | ||
620 | : "=d" (dest->mant.m32[0]) | ||
621 | : "d" (src->m32[0]), "0" (src->m32[1])); | ||
622 | asm volatile ("roxr.l #1,%0" | ||
623 | : "=d" (dest->mant.m32[1]) : "0" (src->m32[2])); | ||
624 | asm volatile ("roxr.l #1,%0" | ||
625 | : "=d" (tmp) : "0" (src->m32[3])); | ||
626 | dest->lowmant = tmp >> 24; | ||
627 | if (src->m32[3] << 7) | ||
628 | dest->lowmant |= 1; | ||
629 | break; | ||
630 | case 32: | ||
631 | dest->mant.m32[0] = src->m32[1]; | ||
632 | dest->mant.m32[1] = src->m32[2]; | ||
633 | dest->lowmant = src->m32[3] >> 24; | ||
634 | if (src->m32[3] << 8) | ||
635 | dest->lowmant |= 1; | ||
636 | break; | ||
637 | } | ||
638 | } | ||
639 | |||
640 | #if 0 /* old code... */ | ||
641 | static inline int fls(unsigned int a) | ||
642 | { | ||
643 | int r; | ||
644 | |||
645 | asm volatile ("bfffo %1{#0,#32},%0" | ||
646 | : "=d" (r) : "md" (a)); | ||
647 | return r; | ||
648 | } | ||
649 | |||
650 | /* fls = "find last set" (cf. ffs(3)) */ | ||
651 | static inline int fls128(const int128 a) | ||
652 | { | ||
653 | if (a[MSW128]) | ||
654 | return fls(a[MSW128]); | ||
655 | if (a[NMSW128]) | ||
656 | return fls(a[NMSW128]) + 32; | ||
657 | /* XXX: it probably never gets beyond this point in actual | ||
658 | use, but that's indicative of a more general problem in the | ||
659 | algorithm (i.e. as per the actual 68881 implementation, we | ||
660 | really only need at most 67 bits of precision [plus | ||
661 | overflow]) so I'm not going to fix it. */ | ||
662 | if (a[NLSW128]) | ||
663 | return fls(a[NLSW128]) + 64; | ||
664 | if (a[LSW128]) | ||
665 | return fls(a[LSW128]) + 96; | ||
666 | else | ||
667 | return -1; | ||
668 | } | ||
669 | |||
670 | static inline int zerop128(const int128 a) | ||
671 | { | ||
672 | return !(a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]); | ||
673 | } | ||
674 | |||
675 | static inline int nonzerop128(const int128 a) | ||
676 | { | ||
677 | return (a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]); | ||
678 | } | ||
679 | |||
680 | /* Addition and subtraction */ | ||
681 | /* Do these in "pure" assembly, because "extended" asm is unmanageable | ||
682 | here */ | ||
683 | static inline void add128(const int128 a, int128 b) | ||
684 | { | ||
685 | /* rotating carry flags */ | ||
686 | unsigned int carry[2]; | ||
687 | |||
688 | carry[0] = a[LSW128] > (0xffffffff - b[LSW128]); | ||
689 | b[LSW128] += a[LSW128]; | ||
690 | |||
691 | carry[1] = a[NLSW128] > (0xffffffff - b[NLSW128] - carry[0]); | ||
692 | b[NLSW128] = a[NLSW128] + b[NLSW128] + carry[0]; | ||
693 | |||
694 | carry[0] = a[NMSW128] > (0xffffffff - b[NMSW128] - carry[1]); | ||
695 | b[NMSW128] = a[NMSW128] + b[NMSW128] + carry[1]; | ||
696 | |||
697 | b[MSW128] = a[MSW128] + b[MSW128] + carry[0]; | ||
698 | } | ||
699 | |||
700 | /* Note: assembler semantics: "b -= a" */ | ||
701 | static inline void sub128(const int128 a, int128 b) | ||
702 | { | ||
703 | /* rotating borrow flags */ | ||
704 | unsigned int borrow[2]; | ||
705 | |||
706 | borrow[0] = b[LSW128] < a[LSW128]; | ||
707 | b[LSW128] -= a[LSW128]; | ||
708 | |||
709 | borrow[1] = b[NLSW128] < a[NLSW128] + borrow[0]; | ||
710 | b[NLSW128] = b[NLSW128] - a[NLSW128] - borrow[0]; | ||
711 | |||
712 | borrow[0] = b[NMSW128] < a[NMSW128] + borrow[1]; | ||
713 | b[NMSW128] = b[NMSW128] - a[NMSW128] - borrow[1]; | ||
714 | |||
715 | b[MSW128] = b[MSW128] - a[MSW128] - borrow[0]; | ||
716 | } | ||
717 | |||
718 | /* Poor man's 64-bit expanding multiply */ | ||
719 | static inline void mul64(unsigned long long a, unsigned long long b, int128 c) | ||
720 | { | ||
721 | unsigned long long acc; | ||
722 | int128 acc128; | ||
723 | |||
724 | zero128(acc128); | ||
725 | zero128(c); | ||
726 | |||
727 | /* first the low words */ | ||
728 | if (LO_WORD(a) && LO_WORD(b)) { | ||
729 | acc = (long long) LO_WORD(a) * LO_WORD(b); | ||
730 | c[NLSW128] = HI_WORD(acc); | ||
731 | c[LSW128] = LO_WORD(acc); | ||
732 | } | ||
733 | /* Next the high words */ | ||
734 | if (HI_WORD(a) && HI_WORD(b)) { | ||
735 | acc = (long long) HI_WORD(a) * HI_WORD(b); | ||
736 | c[MSW128] = HI_WORD(acc); | ||
737 | c[NMSW128] = LO_WORD(acc); | ||
738 | } | ||
739 | /* The middle words */ | ||
740 | if (LO_WORD(a) && HI_WORD(b)) { | ||
741 | acc = (long long) LO_WORD(a) * HI_WORD(b); | ||
742 | acc128[NMSW128] = HI_WORD(acc); | ||
743 | acc128[NLSW128] = LO_WORD(acc); | ||
744 | add128(acc128, c); | ||
745 | } | ||
746 | /* The first and last words */ | ||
747 | if (HI_WORD(a) && LO_WORD(b)) { | ||
748 | acc = (long long) HI_WORD(a) * LO_WORD(b); | ||
749 | acc128[NMSW128] = HI_WORD(acc); | ||
750 | acc128[NLSW128] = LO_WORD(acc); | ||
751 | add128(acc128, c); | ||
752 | } | ||
753 | } | ||
754 | |||
755 | /* Note: unsigned */ | ||
756 | static inline int cmp128(int128 a, int128 b) | ||
757 | { | ||
758 | if (a[MSW128] < b[MSW128]) | ||
759 | return -1; | ||
760 | if (a[MSW128] > b[MSW128]) | ||
761 | return 1; | ||
762 | if (a[NMSW128] < b[NMSW128]) | ||
763 | return -1; | ||
764 | if (a[NMSW128] > b[NMSW128]) | ||
765 | return 1; | ||
766 | if (a[NLSW128] < b[NLSW128]) | ||
767 | return -1; | ||
768 | if (a[NLSW128] > b[NLSW128]) | ||
769 | return 1; | ||
770 | |||
771 | return (signed) a[LSW128] - b[LSW128]; | ||
772 | } | ||
773 | |||
774 | inline void div128(int128 a, int128 b, int128 c) | ||
775 | { | ||
776 | int128 mask; | ||
777 | |||
778 | /* Algorithm: | ||
779 | |||
780 | Shift the divisor until it's at least as big as the | ||
781 | dividend, keeping track of the position to which we've | ||
782 | shifted it, i.e. the power of 2 which we've multiplied it | ||
783 | by. | ||
784 | |||
785 | Then, for this power of 2 (the mask), and every one smaller | ||
786 | than it, subtract the mask from the dividend and add it to | ||
787 | the quotient until the dividend is smaller than the raised | ||
788 | divisor. At this point, divide the dividend and the mask | ||
789 | by 2 (i.e. shift one place to the right). Lather, rinse, | ||
790 | and repeat, until there are no more powers of 2 left. */ | ||
791 | |||
792 | /* FIXME: needless to say, there's room for improvement here too. */ | ||
793 | |||
794 | /* Shift up */ | ||
795 | /* XXX: since it just has to be "at least as big", we can | ||
796 | probably eliminate this horribly wasteful loop. I will | ||
797 | have to prove this first, though */ | ||
798 | set128(0, 0, 0, 1, mask); | ||
799 | while (cmp128(b, a) < 0 && !btsthi128(b)) { | ||
800 | lslone128(b); | ||
801 | lslone128(mask); | ||
802 | } | ||
803 | |||
804 | /* Shift down */ | ||
805 | zero128(c); | ||
806 | do { | ||
807 | if (cmp128(a, b) >= 0) { | ||
808 | sub128(b, a); | ||
809 | add128(mask, c); | ||
810 | } | ||
811 | lsrone128(mask); | ||
812 | lsrone128(b); | ||
813 | } while (nonzerop128(mask)); | ||
814 | |||
815 | /* The remainder is in a... */ | ||
816 | } | ||
817 | #endif | ||
818 | |||
819 | #endif /* MULTI_ARITH_H */ | ||