aboutsummaryrefslogtreecommitdiffstats
path: root/arch/m68k/math-emu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/m68k/math-emu')
-rw-r--r--arch/m68k/math-emu/Makefile11
-rw-r--r--arch/m68k/math-emu/fp_arith.c701
-rw-r--r--arch/m68k/math-emu/fp_arith.h52
-rw-r--r--arch/m68k/math-emu/fp_cond.S334
-rw-r--r--arch/m68k/math-emu/fp_decode.h417
-rw-r--r--arch/m68k/math-emu/fp_emu.h146
-rw-r--r--arch/m68k/math-emu/fp_entry.S325
-rw-r--r--arch/m68k/math-emu/fp_log.c223
-rw-r--r--arch/m68k/math-emu/fp_move.S244
-rw-r--r--arch/m68k/math-emu/fp_movem.S368
-rw-r--r--arch/m68k/math-emu/fp_scan.S478
-rw-r--r--arch/m68k/math-emu/fp_trig.c183
-rw-r--r--arch/m68k/math-emu/fp_trig.h32
-rw-r--r--arch/m68k/math-emu/fp_util.S1455
-rw-r--r--arch/m68k/math-emu/multi_arith.h819
15 files changed, 5788 insertions, 0 deletions
diff --git a/arch/m68k/math-emu/Makefile b/arch/m68k/math-emu/Makefile
new file mode 100644
index 000000000000..539940401814
--- /dev/null
+++ b/arch/m68k/math-emu/Makefile
@@ -0,0 +1,11 @@
1#
2# Makefile for the linux kernel.
3#
4
5EXTRA_AFLAGS := -traditional
6
7#EXTRA_AFLAGS += -DFPU_EMU_DEBUG
8#EXTRA_CFLAGS += -DFPU_EMU_DEBUG
9
10obj-y := fp_entry.o fp_scan.o fp_util.o fp_move.o fp_movem.o \
11 fp_cond.o fp_arith.o fp_log.o fp_trig.o
diff --git a/arch/m68k/math-emu/fp_arith.c b/arch/m68k/math-emu/fp_arith.c
new file mode 100644
index 000000000000..08f286db3c5a
--- /dev/null
+++ b/arch/m68k/math-emu/fp_arith.c
@@ -0,0 +1,701 @@
1/*
2
3 fp_arith.c: floating-point math routines for the Linux-m68k
4 floating point emulator.
5
6 Copyright (c) 1998-1999 David Huggins-Daines.
7
8 Somewhat based on the AlphaLinux floating point emulator, by David
9 Mosberger-Tang.
10
11 You may copy, modify, and redistribute this file under the terms of
12 the GNU General Public License, version 2, or any later version, at
13 your convenience.
14 */
15
16#include "fp_emu.h"
17#include "multi_arith.h"
18#include "fp_arith.h"
19
20const struct fp_ext fp_QNaN =
21{
22 .exp = 0x7fff,
23 .mant = { .m64 = ~0 }
24};
25
26const struct fp_ext fp_Inf =
27{
28 .exp = 0x7fff,
29};
30
31/* let's start with the easy ones */
32
33struct fp_ext *
34fp_fabs(struct fp_ext *dest, struct fp_ext *src)
35{
36 dprint(PINSTR, "fabs\n");
37
38 fp_monadic_check(dest, src);
39
40 dest->sign = 0;
41
42 return dest;
43}
44
45struct fp_ext *
46fp_fneg(struct fp_ext *dest, struct fp_ext *src)
47{
48 dprint(PINSTR, "fneg\n");
49
50 fp_monadic_check(dest, src);
51
52 dest->sign = !dest->sign;
53
54 return dest;
55}
56
57/* Now, the slightly harder ones */
58
59/* fp_fadd: Implements the kernel of the FADD, FSADD, FDADD, FSUB,
60 FDSUB, and FCMP instructions. */
61
62struct fp_ext *
63fp_fadd(struct fp_ext *dest, struct fp_ext *src)
64{
65 int diff;
66
67 dprint(PINSTR, "fadd\n");
68
69 fp_dyadic_check(dest, src);
70
71 if (IS_INF(dest)) {
72 /* infinity - infinity == NaN */
73 if (IS_INF(src) && (src->sign != dest->sign))
74 fp_set_nan(dest);
75 return dest;
76 }
77 if (IS_INF(src)) {
78 fp_copy_ext(dest, src);
79 return dest;
80 }
81
82 if (IS_ZERO(dest)) {
83 if (IS_ZERO(src)) {
84 if (src->sign != dest->sign) {
85 if (FPDATA->rnd == FPCR_ROUND_RM)
86 dest->sign = 1;
87 else
88 dest->sign = 0;
89 }
90 } else
91 fp_copy_ext(dest, src);
92 return dest;
93 }
94
95 dest->lowmant = src->lowmant = 0;
96
97 if ((diff = dest->exp - src->exp) > 0)
98 fp_denormalize(src, diff);
99 else if ((diff = -diff) > 0)
100 fp_denormalize(dest, diff);
101
102 if (dest->sign == src->sign) {
103 if (fp_addmant(dest, src))
104 if (!fp_addcarry(dest))
105 return dest;
106 } else {
107 if (dest->mant.m64 < src->mant.m64) {
108 fp_submant(dest, src, dest);
109 dest->sign = !dest->sign;
110 } else
111 fp_submant(dest, dest, src);
112 }
113
114 return dest;
115}
116
117/* fp_fsub: Implements the kernel of the FSUB, FSSUB, and FDSUB
118 instructions.
119
120 Remember that the arguments are in assembler-syntax order! */
121
122struct fp_ext *
123fp_fsub(struct fp_ext *dest, struct fp_ext *src)
124{
125 dprint(PINSTR, "fsub ");
126
127 src->sign = !src->sign;
128 return fp_fadd(dest, src);
129}
130
131
132struct fp_ext *
133fp_fcmp(struct fp_ext *dest, struct fp_ext *src)
134{
135 dprint(PINSTR, "fcmp ");
136
137 FPDATA->temp[1] = *dest;
138 src->sign = !src->sign;
139 return fp_fadd(&FPDATA->temp[1], src);
140}
141
142struct fp_ext *
143fp_ftst(struct fp_ext *dest, struct fp_ext *src)
144{
145 dprint(PINSTR, "ftst\n");
146
147 (void)dest;
148
149 return src;
150}
151
152struct fp_ext *
153fp_fmul(struct fp_ext *dest, struct fp_ext *src)
154{
155 union fp_mant128 temp;
156 int exp;
157
158 dprint(PINSTR, "fmul\n");
159
160 fp_dyadic_check(dest, src);
161
162 /* calculate the correct sign now, as it's necessary for infinities */
163 dest->sign = src->sign ^ dest->sign;
164
165 /* Handle infinities */
166 if (IS_INF(dest)) {
167 if (IS_ZERO(src))
168 fp_set_nan(dest);
169 return dest;
170 }
171 if (IS_INF(src)) {
172 if (IS_ZERO(dest))
173 fp_set_nan(dest);
174 else
175 fp_copy_ext(dest, src);
176 return dest;
177 }
178
179 /* Of course, as we all know, zero * anything = zero. You may
180 not have known that it might be a positive or negative
181 zero... */
182 if (IS_ZERO(dest) || IS_ZERO(src)) {
183 dest->exp = 0;
184 dest->mant.m64 = 0;
185 dest->lowmant = 0;
186
187 return dest;
188 }
189
190 exp = dest->exp + src->exp - 0x3ffe;
191
192 /* shift up the mantissa for denormalized numbers,
193 so that the highest bit is set, this makes the
194 shift of the result below easier */
195 if ((long)dest->mant.m32[0] >= 0)
196 exp -= fp_overnormalize(dest);
197 if ((long)src->mant.m32[0] >= 0)
198 exp -= fp_overnormalize(src);
199
200 /* now, do a 64-bit multiply with expansion */
201 fp_multiplymant(&temp, dest, src);
202
203 /* normalize it back to 64 bits and stuff it back into the
204 destination struct */
205 if ((long)temp.m32[0] > 0) {
206 exp--;
207 fp_putmant128(dest, &temp, 1);
208 } else
209 fp_putmant128(dest, &temp, 0);
210
211 if (exp >= 0x7fff) {
212 fp_set_ovrflw(dest);
213 return dest;
214 }
215 dest->exp = exp;
216 if (exp < 0) {
217 fp_set_sr(FPSR_EXC_UNFL);
218 fp_denormalize(dest, -exp);
219 }
220
221 return dest;
222}
223
224/* fp_fdiv: Implements the "kernel" of the FDIV, FSDIV, FDDIV and
225 FSGLDIV instructions.
226
227 Note that the order of the operands is counter-intuitive: instead
228 of src / dest, the result is actually dest / src. */
229
230struct fp_ext *
231fp_fdiv(struct fp_ext *dest, struct fp_ext *src)
232{
233 union fp_mant128 temp;
234 int exp;
235
236 dprint(PINSTR, "fdiv\n");
237
238 fp_dyadic_check(dest, src);
239
240 /* calculate the correct sign now, as it's necessary for infinities */
241 dest->sign = src->sign ^ dest->sign;
242
243 /* Handle infinities */
244 if (IS_INF(dest)) {
245 /* infinity / infinity = NaN (quiet, as always) */
246 if (IS_INF(src))
247 fp_set_nan(dest);
248 /* infinity / anything else = infinity (with approprate sign) */
249 return dest;
250 }
251 if (IS_INF(src)) {
252 /* anything / infinity = zero (with appropriate sign) */
253 dest->exp = 0;
254 dest->mant.m64 = 0;
255 dest->lowmant = 0;
256
257 return dest;
258 }
259
260 /* zeroes */
261 if (IS_ZERO(dest)) {
262 /* zero / zero = NaN */
263 if (IS_ZERO(src))
264 fp_set_nan(dest);
265 /* zero / anything else = zero */
266 return dest;
267 }
268 if (IS_ZERO(src)) {
269 /* anything / zero = infinity (with appropriate sign) */
270 fp_set_sr(FPSR_EXC_DZ);
271 dest->exp = 0x7fff;
272 dest->mant.m64 = 0;
273
274 return dest;
275 }
276
277 exp = dest->exp - src->exp + 0x3fff;
278
279 /* shift up the mantissa for denormalized numbers,
280 so that the highest bit is set, this makes lots
281 of things below easier */
282 if ((long)dest->mant.m32[0] >= 0)
283 exp -= fp_overnormalize(dest);
284 if ((long)src->mant.m32[0] >= 0)
285 exp -= fp_overnormalize(src);
286
287 /* now, do the 64-bit divide */
288 fp_dividemant(&temp, dest, src);
289
290 /* normalize it back to 64 bits and stuff it back into the
291 destination struct */
292 if (!temp.m32[0]) {
293 exp--;
294 fp_putmant128(dest, &temp, 32);
295 } else
296 fp_putmant128(dest, &temp, 31);
297
298 if (exp >= 0x7fff) {
299 fp_set_ovrflw(dest);
300 return dest;
301 }
302 dest->exp = exp;
303 if (exp < 0) {
304 fp_set_sr(FPSR_EXC_UNFL);
305 fp_denormalize(dest, -exp);
306 }
307
308 return dest;
309}
310
311struct fp_ext *
312fp_fsglmul(struct fp_ext *dest, struct fp_ext *src)
313{
314 int exp;
315
316 dprint(PINSTR, "fsglmul\n");
317
318 fp_dyadic_check(dest, src);
319
320 /* calculate the correct sign now, as it's necessary for infinities */
321 dest->sign = src->sign ^ dest->sign;
322
323 /* Handle infinities */
324 if (IS_INF(dest)) {
325 if (IS_ZERO(src))
326 fp_set_nan(dest);
327 return dest;
328 }
329 if (IS_INF(src)) {
330 if (IS_ZERO(dest))
331 fp_set_nan(dest);
332 else
333 fp_copy_ext(dest, src);
334 return dest;
335 }
336
337 /* Of course, as we all know, zero * anything = zero. You may
338 not have known that it might be a positive or negative
339 zero... */
340 if (IS_ZERO(dest) || IS_ZERO(src)) {
341 dest->exp = 0;
342 dest->mant.m64 = 0;
343 dest->lowmant = 0;
344
345 return dest;
346 }
347
348 exp = dest->exp + src->exp - 0x3ffe;
349
350 /* do a 32-bit multiply */
351 fp_mul64(dest->mant.m32[0], dest->mant.m32[1],
352 dest->mant.m32[0] & 0xffffff00,
353 src->mant.m32[0] & 0xffffff00);
354
355 if (exp >= 0x7fff) {
356 fp_set_ovrflw(dest);
357 return dest;
358 }
359 dest->exp = exp;
360 if (exp < 0) {
361 fp_set_sr(FPSR_EXC_UNFL);
362 fp_denormalize(dest, -exp);
363 }
364
365 return dest;
366}
367
368struct fp_ext *
369fp_fsgldiv(struct fp_ext *dest, struct fp_ext *src)
370{
371 int exp;
372 unsigned long quot, rem;
373
374 dprint(PINSTR, "fsgldiv\n");
375
376 fp_dyadic_check(dest, src);
377
378 /* calculate the correct sign now, as it's necessary for infinities */
379 dest->sign = src->sign ^ dest->sign;
380
381 /* Handle infinities */
382 if (IS_INF(dest)) {
383 /* infinity / infinity = NaN (quiet, as always) */
384 if (IS_INF(src))
385 fp_set_nan(dest);
386 /* infinity / anything else = infinity (with approprate sign) */
387 return dest;
388 }
389 if (IS_INF(src)) {
390 /* anything / infinity = zero (with appropriate sign) */
391 dest->exp = 0;
392 dest->mant.m64 = 0;
393 dest->lowmant = 0;
394
395 return dest;
396 }
397
398 /* zeroes */
399 if (IS_ZERO(dest)) {
400 /* zero / zero = NaN */
401 if (IS_ZERO(src))
402 fp_set_nan(dest);
403 /* zero / anything else = zero */
404 return dest;
405 }
406 if (IS_ZERO(src)) {
407 /* anything / zero = infinity (with appropriate sign) */
408 fp_set_sr(FPSR_EXC_DZ);
409 dest->exp = 0x7fff;
410 dest->mant.m64 = 0;
411
412 return dest;
413 }
414
415 exp = dest->exp - src->exp + 0x3fff;
416
417 dest->mant.m32[0] &= 0xffffff00;
418 src->mant.m32[0] &= 0xffffff00;
419
420 /* do the 32-bit divide */
421 if (dest->mant.m32[0] >= src->mant.m32[0]) {
422 fp_sub64(dest->mant, src->mant);
423 fp_div64(quot, rem, dest->mant.m32[0], 0, src->mant.m32[0]);
424 dest->mant.m32[0] = 0x80000000 | (quot >> 1);
425 dest->mant.m32[1] = (quot & 1) | rem; /* only for rounding */
426 } else {
427 fp_div64(quot, rem, dest->mant.m32[0], 0, src->mant.m32[0]);
428 dest->mant.m32[0] = quot;
429 dest->mant.m32[1] = rem; /* only for rounding */
430 exp--;
431 }
432
433 if (exp >= 0x7fff) {
434 fp_set_ovrflw(dest);
435 return dest;
436 }
437 dest->exp = exp;
438 if (exp < 0) {
439 fp_set_sr(FPSR_EXC_UNFL);
440 fp_denormalize(dest, -exp);
441 }
442
443 return dest;
444}
445
446/* fp_roundint: Internal rounding function for use by several of these
447 emulated instructions.
448
449 This one rounds off the fractional part using the rounding mode
450 specified. */
451
452static void fp_roundint(struct fp_ext *dest, int mode)
453{
454 union fp_mant64 oldmant;
455 unsigned long mask;
456
457 if (!fp_normalize_ext(dest))
458 return;
459
460 /* infinities and zeroes */
461 if (IS_INF(dest) || IS_ZERO(dest))
462 return;
463
464 /* first truncate the lower bits */
465 oldmant = dest->mant;
466 switch (dest->exp) {
467 case 0 ... 0x3ffe:
468 dest->mant.m64 = 0;
469 break;
470 case 0x3fff ... 0x401e:
471 dest->mant.m32[0] &= 0xffffffffU << (0x401e - dest->exp);
472 dest->mant.m32[1] = 0;
473 if (oldmant.m64 == dest->mant.m64)
474 return;
475 break;
476 case 0x401f ... 0x403e:
477 dest->mant.m32[1] &= 0xffffffffU << (0x403e - dest->exp);
478 if (oldmant.m32[1] == dest->mant.m32[1])
479 return;
480 break;
481 default:
482 return;
483 }
484 fp_set_sr(FPSR_EXC_INEX2);
485
486 /* We might want to normalize upwards here... however, since
487 we know that this is only called on the output of fp_fdiv,
488 or with the input to fp_fint or fp_fintrz, and the inputs
489 to all these functions are either normal or denormalized
490 (no subnormals allowed!), there's really no need.
491
492 In the case of fp_fdiv, observe that 0x80000000 / 0xffff =
493 0xffff8000, and the same holds for 128-bit / 64-bit. (i.e. the
494 smallest possible normal dividend and the largest possible normal
495 divisor will still produce a normal quotient, therefore, (normal
496 << 64) / normal is normal in all cases) */
497
498 switch (mode) {
499 case FPCR_ROUND_RN:
500 switch (dest->exp) {
501 case 0 ... 0x3ffd:
502 return;
503 case 0x3ffe:
504 /* As noted above, the input is always normal, so the
505 guard bit (bit 63) is always set. therefore, the
506 only case in which we will NOT round to 1.0 is when
507 the input is exactly 0.5. */
508 if (oldmant.m64 == (1ULL << 63))
509 return;
510 break;
511 case 0x3fff ... 0x401d:
512 mask = 1 << (0x401d - dest->exp);
513 if (!(oldmant.m32[0] & mask))
514 return;
515 if (oldmant.m32[0] & (mask << 1))
516 break;
517 if (!(oldmant.m32[0] << (dest->exp - 0x3ffd)) &&
518 !oldmant.m32[1])
519 return;
520 break;
521 case 0x401e:
522 if (!(oldmant.m32[1] >= 0))
523 return;
524 if (oldmant.m32[0] & 1)
525 break;
526 if (!(oldmant.m32[1] << 1))
527 return;
528 break;
529 case 0x401f ... 0x403d:
530 mask = 1 << (0x403d - dest->exp);
531 if (!(oldmant.m32[1] & mask))
532 return;
533 if (oldmant.m32[1] & (mask << 1))
534 break;
535 if (!(oldmant.m32[1] << (dest->exp - 0x401d)))
536 return;
537 break;
538 default:
539 return;
540 }
541 break;
542 case FPCR_ROUND_RZ:
543 return;
544 default:
545 if (dest->sign ^ (mode - FPCR_ROUND_RM))
546 break;
547 return;
548 }
549
550 switch (dest->exp) {
551 case 0 ... 0x3ffe:
552 dest->exp = 0x3fff;
553 dest->mant.m64 = 1ULL << 63;
554 break;
555 case 0x3fff ... 0x401e:
556 mask = 1 << (0x401e - dest->exp);
557 if (dest->mant.m32[0] += mask)
558 break;
559 dest->mant.m32[0] = 0x80000000;
560 dest->exp++;
561 break;
562 case 0x401f ... 0x403e:
563 mask = 1 << (0x403e - dest->exp);
564 if (dest->mant.m32[1] += mask)
565 break;
566 if (dest->mant.m32[0] += 1)
567 break;
568 dest->mant.m32[0] = 0x80000000;
569 dest->exp++;
570 break;
571 }
572}
573
574/* modrem_kernel: Implementation of the FREM and FMOD instructions
575 (which are exactly the same, except for the rounding used on the
576 intermediate value) */
577
578static struct fp_ext *
579modrem_kernel(struct fp_ext *dest, struct fp_ext *src, int mode)
580{
581 struct fp_ext tmp;
582
583 fp_dyadic_check(dest, src);
584
585 /* Infinities and zeros */
586 if (IS_INF(dest) || IS_ZERO(src)) {
587 fp_set_nan(dest);
588 return dest;
589 }
590 if (IS_ZERO(dest) || IS_INF(src))
591 return dest;
592
593 /* FIXME: there is almost certainly a smarter way to do this */
594 fp_copy_ext(&tmp, dest);
595 fp_fdiv(&tmp, src); /* NOTE: src might be modified */
596 fp_roundint(&tmp, mode);
597 fp_fmul(&tmp, src);
598 fp_fsub(dest, &tmp);
599
600 /* set the quotient byte */
601 fp_set_quotient((dest->mant.m64 & 0x7f) | (dest->sign << 7));
602 return dest;
603}
604
605/* fp_fmod: Implements the kernel of the FMOD instruction.
606
607 Again, the argument order is backwards. The result, as defined in
608 the Motorola manuals, is:
609
610 fmod(src,dest) = (dest - (src * floor(dest / src))) */
611
612struct fp_ext *
613fp_fmod(struct fp_ext *dest, struct fp_ext *src)
614{
615 dprint(PINSTR, "fmod\n");
616 return modrem_kernel(dest, src, FPCR_ROUND_RZ);
617}
618
619/* fp_frem: Implements the kernel of the FREM instruction.
620
621 frem(src,dest) = (dest - (src * round(dest / src)))
622 */
623
624struct fp_ext *
625fp_frem(struct fp_ext *dest, struct fp_ext *src)
626{
627 dprint(PINSTR, "frem\n");
628 return modrem_kernel(dest, src, FPCR_ROUND_RN);
629}
630
631struct fp_ext *
632fp_fint(struct fp_ext *dest, struct fp_ext *src)
633{
634 dprint(PINSTR, "fint\n");
635
636 fp_copy_ext(dest, src);
637
638 fp_roundint(dest, FPDATA->rnd);
639
640 return dest;
641}
642
643struct fp_ext *
644fp_fintrz(struct fp_ext *dest, struct fp_ext *src)
645{
646 dprint(PINSTR, "fintrz\n");
647
648 fp_copy_ext(dest, src);
649
650 fp_roundint(dest, FPCR_ROUND_RZ);
651
652 return dest;
653}
654
655struct fp_ext *
656fp_fscale(struct fp_ext *dest, struct fp_ext *src)
657{
658 int scale, oldround;
659
660 dprint(PINSTR, "fscale\n");
661
662 fp_dyadic_check(dest, src);
663
664 /* Infinities */
665 if (IS_INF(src)) {
666 fp_set_nan(dest);
667 return dest;
668 }
669 if (IS_INF(dest))
670 return dest;
671
672 /* zeroes */
673 if (IS_ZERO(src) || IS_ZERO(dest))
674 return dest;
675
676 /* Source exponent out of range */
677 if (src->exp >= 0x400c) {
678 fp_set_ovrflw(dest);
679 return dest;
680 }
681
682 /* src must be rounded with round to zero. */
683 oldround = FPDATA->rnd;
684 FPDATA->rnd = FPCR_ROUND_RZ;
685 scale = fp_conv_ext2long(src);
686 FPDATA->rnd = oldround;
687
688 /* new exponent */
689 scale += dest->exp;
690
691 if (scale >= 0x7fff) {
692 fp_set_ovrflw(dest);
693 } else if (scale <= 0) {
694 fp_set_sr(FPSR_EXC_UNFL);
695 fp_denormalize(dest, -scale);
696 } else
697 dest->exp = scale;
698
699 return dest;
700}
701
diff --git a/arch/m68k/math-emu/fp_arith.h b/arch/m68k/math-emu/fp_arith.h
new file mode 100644
index 000000000000..2cc3f846c393
--- /dev/null
+++ b/arch/m68k/math-emu/fp_arith.h
@@ -0,0 +1,52 @@
1/*
2
3 fp_arith.h: floating-point math routines for the Linux-m68k
4 floating point emulator.
5
6 Copyright (c) 1998 David Huggins-Daines.
7
8 Somewhat based on the AlphaLinux floating point emulator, by David
9 Mosberger-Tang.
10
11 You may copy, modify, and redistribute this file under the terms of
12 the GNU General Public License, version 2, or any later version, at
13 your convenience.
14
15 */
16
17#ifndef FP_ARITH_H
18#define FP_ARITH_H
19
20/* easy ones */
21struct fp_ext *
22fp_fabs(struct fp_ext *dest, struct fp_ext *src);
23struct fp_ext *
24fp_fneg(struct fp_ext *dest, struct fp_ext *src);
25
26/* straightforward arithmetic */
27struct fp_ext *
28fp_fadd(struct fp_ext *dest, struct fp_ext *src);
29struct fp_ext *
30fp_fsub(struct fp_ext *dest, struct fp_ext *src);
31struct fp_ext *
32fp_fcmp(struct fp_ext *dest, struct fp_ext *src);
33struct fp_ext *
34fp_ftst(struct fp_ext *dest, struct fp_ext *src);
35struct fp_ext *
36fp_fmul(struct fp_ext *dest, struct fp_ext *src);
37struct fp_ext *
38fp_fdiv(struct fp_ext *dest, struct fp_ext *src);
39
40/* ones that do rounding and integer conversions */
41struct fp_ext *
42fp_fmod(struct fp_ext *dest, struct fp_ext *src);
43struct fp_ext *
44fp_frem(struct fp_ext *dest, struct fp_ext *src);
45struct fp_ext *
46fp_fint(struct fp_ext *dest, struct fp_ext *src);
47struct fp_ext *
48fp_fintrz(struct fp_ext *dest, struct fp_ext *src);
49struct fp_ext *
50fp_fscale(struct fp_ext *dest, struct fp_ext *src);
51
52#endif /* FP_ARITH__H */
diff --git a/arch/m68k/math-emu/fp_cond.S b/arch/m68k/math-emu/fp_cond.S
new file mode 100644
index 000000000000..ddae8b1b8b83
--- /dev/null
+++ b/arch/m68k/math-emu/fp_cond.S
@@ -0,0 +1,334 @@
1/*
2 * fp_cond.S
3 *
4 * Copyright Roman Zippel, 1997. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, and the entire permission notice in its entirety,
11 * including the disclaimer of warranties.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 * products derived from this software without specific prior
17 * written permission.
18 *
19 * ALTERNATIVELY, this product may be distributed under the terms of
20 * the GNU General Public License, in which case the provisions of the GPL are
21 * required INSTEAD OF the above restrictions. (This clause is
22 * necessary due to a potential bad interaction between the GPL and
23 * the restrictions contained in a BSD-style copyright.)
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35 * OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include "fp_emu.h"
39#include "fp_decode.h"
40
41 .globl fp_fscc, fp_fbccw, fp_fbccl
42
43#ifdef FPU_EMU_DEBUG
44fp_fnop:
45 printf PDECODE,"fnop\n"
46 jra fp_end
47#else
48#define fp_fnop fp_end
49#endif
50
51fp_fbccw:
52 tst.w %d2
53 jeq fp_fnop
54 printf PDECODE,"fbccw "
55 fp_get_pc %a0
56 lea (-2,%a0,%d2.w),%a0
57 jra 1f
58
59fp_fbccl:
60 printf PDECODE,"fbccl "
61 fp_get_pc %a0
62 move.l %d2,%d0
63 swap %d0
64 fp_get_instr_word %d0,fp_err_ua1
65 lea (-2,%a0,%d0.l),%a0
661: printf PDECODE,"%x",1,%a0
67 move.l %d2,%d0
68 swap %d0
69 jsr fp_compute_cond
70 tst.l %d0
71 jeq 1f
72 fp_put_pc %a0,1
731: printf PDECODE,"\n"
74 jra fp_end
75
76fp_fdbcc:
77 printf PDECODE,"fdbcc "
78 fp_get_pc %a1 | calculate new pc
79 fp_get_instr_word %d0,fp_err_ua1
80 add.w %d0,%a1
81 fp_decode_addr_reg
82 printf PDECODE,"d%d,%x\n",2,%d0,%a1
83 swap %d1 | test condition in %d1
84 tst.w %d1
85 jne 2f
86 move.l %d0,%d1
87 jsr fp_get_data_reg
88 subq.w #1,%d0
89 jcs 1f
90 fp_put_pc %a1,1
911: jsr fp_put_data_reg
922: jra fp_end
93
94| set flags for decode macros for fs<cc>
95do_fscc=1
96do_no_pc_mode=1
97
98fp_fscc:
99 printf PDECODE,"fscc "
100 move.l %d2,%d0
101 jsr fp_compute_cond
102 move.w %d0,%d1
103 swap %d1
104
105 | decode addressing mode
106 fp_decode_addr_mode
107
108 .long fp_data, fp_fdbcc
109 .long fp_indirect, fp_postinc
110 .long fp_predecr, fp_disp16
111 .long fp_extmode0, fp_extmode1
112
113 | addressing mode: data register direct
114fp_data:
115 fp_mode_data_direct
116 move.w %d0,%d1 | save register nr
117 jsr fp_get_data_reg
118 swap %d1
119 move.b %d1,%d0
120 swap %d1
121 jsr fp_put_data_reg
122 printf PDECODE,"\n"
123 jra fp_end
124
125fp_indirect:
126 fp_mode_addr_indirect
127 jra fp_do_scc
128
129fp_postinc:
130 fp_mode_addr_indirect_postinc
131 jra fp_do_scc
132
133fp_predecr:
134 fp_mode_addr_indirect_predec
135 jra fp_do_scc
136
137fp_disp16:
138 fp_mode_addr_indirect_disp16
139 jra fp_do_scc
140
141fp_extmode0:
142 fp_mode_addr_indirect_extmode0
143 jra fp_do_scc
144
145fp_extmode1:
146 bfextu %d2{#13,#3},%d0
147 jmp ([0f:w,%pc,%d0*4])
148
149 .align 4
1500:
151 .long fp_absolute_short, fp_absolute_long
152 .long fp_ill, fp_ill | NOTE: jump here to ftrap.x
153 .long fp_ill, fp_ill
154 .long fp_ill, fp_ill
155
156fp_absolute_short:
157 fp_mode_abs_short
158 jra fp_do_scc
159
160fp_absolute_long:
161 fp_mode_abs_long
162| jra fp_do_scc
163
164fp_do_scc:
165 swap %d1
166 putuser.b %d1,(%a0),fp_err_ua1,%a0
167 printf PDECODE,"\n"
168 jra fp_end
169
170
171#define tst_NAN btst #24,%d1
172#define tst_Z btst #26,%d1
173#define tst_N btst #27,%d1
174
175fp_compute_cond:
176 move.l (FPD_FPSR,FPDATA),%d1
177 btst #4,%d0
178 jeq 1f
179 tst_NAN
180 jeq 1f
181 bset #15,%d1
182 bset #7,%d1
183 move.l %d1,(FPD_FPSR,FPDATA)
1841: and.w #0xf,%d0
185 jmp ([0f:w,%pc,%d0.w*4])
186
187 .align 4
1880:
189 .long fp_f , fp_eq , fp_ogt, fp_oge
190 .long fp_olt, fp_ole, fp_ogl, fp_or
191 .long fp_un , fp_ueq, fp_ugt, fp_uge
192 .long fp_ult, fp_ule, fp_ne , fp_t
193
194fp_f:
195 moveq #0,%d0
196 rts
197
198fp_eq:
199 moveq #0,%d0
200 tst_Z
201 jeq 1f
202 moveq #-1,%d0
2031: rts
204
205fp_ogt:
206 moveq #0,%d0
207 tst_NAN
208 jne 1f
209 tst_Z
210 jne 1f
211 tst_N
212 jne 1f
213 moveq #-1,%d0
2141: rts
215
216fp_oge:
217 moveq #-1,%d0
218 tst_Z
219 jne 2f
220 tst_NAN
221 jne 1f
222 tst_N
223 jeq 2f
2241: moveq #0,%d0
2252: rts
226
227fp_olt:
228 moveq #0,%d0
229 tst_NAN
230 jne 1f
231 tst_Z
232 jne 1f
233 tst_N
234 jeq 1f
235 moveq #-1,%d0
2361: rts
237
238fp_ole:
239 moveq #-1,%d0
240 tst_Z
241 jne 2f
242 tst_NAN
243 jne 1f
244 tst_N
245 jne 2f
2461: moveq #0,%d0
2472: rts
248
249fp_ogl:
250 moveq #0,%d0
251 tst_NAN
252 jne 1f
253 tst_Z
254 jne 1f
255 moveq #-1,%d0
2561: rts
257
258fp_or:
259 moveq #0,%d0
260 tst_NAN
261 jne 1f
262 moveq #-1,%d0
2631: rts
264
265fp_un:
266 moveq #0,%d0
267 tst_NAN
268 jeq 1f
269 moveq #-1,%d0
270 rts
271
272fp_ueq:
273 moveq #-1,%d0
274 tst_NAN
275 jne 1f
276 tst_Z
277 jne 1f
278 moveq #0,%d0
2791: rts
280
281fp_ugt:
282 moveq #-1,%d0
283 tst_NAN
284 jne 2f
285 tst_N
286 jne 1f
287 tst_Z
288 jeq 2f
2891: moveq #0,%d0
2902: rts
291
292fp_uge:
293 moveq #-1,%d0
294 tst_NAN
295 jne 1f
296 tst_Z
297 jne 1f
298 tst_N
299 jeq 1f
300 moveq #0,%d0
3011: rts
302
303fp_ult:
304 moveq #-1,%d0
305 tst_NAN
306 jne 2f
307 tst_Z
308 jne 1f
309 tst_N
310 jne 2f
3111: moveq #0,%d0
3122: rts
313
314fp_ule:
315 moveq #-1,%d0
316 tst_NAN
317 jne 1f
318 tst_Z
319 jne 1f
320 tst_N
321 jne 1f
322 moveq #0,%d0
3231: rts
324
325fp_ne:
326 moveq #0,%d0
327 tst_Z
328 jne 1f
329 moveq #-1,%d0
3301: rts
331
332fp_t:
333 moveq #-1,%d0
334 rts
diff --git a/arch/m68k/math-emu/fp_decode.h b/arch/m68k/math-emu/fp_decode.h
new file mode 100644
index 000000000000..759679d9ab96
--- /dev/null
+++ b/arch/m68k/math-emu/fp_decode.h
@@ -0,0 +1,417 @@
1/*
2 * fp_decode.h
3 *
4 * Copyright Roman Zippel, 1997. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, and the entire permission notice in its entirety,
11 * including the disclaimer of warranties.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 * products derived from this software without specific prior
17 * written permission.
18 *
19 * ALTERNATIVELY, this product may be distributed under the terms of
20 * the GNU General Public License, in which case the provisions of the GPL are
21 * required INSTEAD OF the above restrictions. (This clause is
22 * necessary due to a potential bad interaction between the GPL and
23 * the restrictions contained in a BSD-style copyright.)
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35 * OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#ifndef _FP_DECODE_H
39#define _FP_DECODE_H
40
41/* These macros do the dirty work of the instr decoding, several variables
42 * can be defined in the source file to modify the work of these macros,
43 * currently the following variables are used:
44 * ...
45 * The register usage:
46 * d0 - will contain source operand for data direct mode,
47 * otherwise scratch register
48 * d1 - upper 16bit are reserved for caller
49 * lower 16bit may contain further arguments,
50 * is destroyed during decoding
51 * d2 - contains first two instruction words,
52 * first word will be used for extension word
53 * a0 - will point to source/dest operand for any indirect mode
54 * otherwise scratch register
55 * a1 - scratch register
56 * a2 - base addr to the task structure
57 *
58 * the current implementation doesn't check for every disallowed
59 * addressing mode (e.g. pc relative modes as destination), as long
60 * as it only means a new addressing mode, which should not appear
61 * in a program and that doesn't crash the emulation, I think it's
62 * not a problem to allow these modes.
63 */
64
65do_fmovem=0
66do_fmovem_cr=0
67do_no_pc_mode=0
68do_fscc=0
69
70| first decoding of the instr type
71| this separates the conditional instr
72.macro fp_decode_cond_instr_type
73 bfextu %d2{#8,#2},%d0
74 jmp ([0f:w,%pc,%d0*4])
75
76 .align 4
770:
78| .long "f<op>","fscc/fdbcc"
79| .long "fbccw","fbccl"
80.endm
81
82| second decoding of the instr type
83| this separates most move instr
84.macro fp_decode_move_instr_type
85 bfextu %d2{#16,#3},%d0
86 jmp ([0f:w,%pc,%d0*4])
87
88 .align 4
890:
90| .long "f<op> fpx,fpx","invalid instr"
91| .long "f<op> <ea>,fpx","fmove fpx,<ea>"
92| .long "fmovem <ea>,fpcr","fmovem <ea>,fpx"
93| .long "fmovem fpcr,<ea>","fmovem fpx,<ea>"
94.endm
95
96| extract the source specifier, specifies
97| either source fp register or data format
98.macro fp_decode_sourcespec
99 bfextu %d2{#19,#3},%d0
100.endm
101
102| decode destination format for fmove reg,ea
103.macro fp_decode_dest_format
104 bfextu %d2{#19,#3},%d0
105.endm
106
107| decode source register for fmove reg,ea
108.macro fp_decode_src_reg
109 bfextu %d2{#22,#3},%d0
110.endm
111
112| extract the addressing mode
113| it depends on the instr which of the modes is valid
114.macro fp_decode_addr_mode
115 bfextu %d2{#10,#3},%d0
116 jmp ([0f:w,%pc,%d0*4])
117
118 .align 4
1190:
120| .long "data register direct","addr register direct"
121| .long "addr register indirect"
122| .long "addr register indirect postincrement"
123| .long "addr register indirect predecrement"
124| .long "addr register + index16"
125| .long "extension mode1","extension mode2"
126.endm
127
128| extract the register for the addressing mode
129.macro fp_decode_addr_reg
130 bfextu %d2{#13,#3},%d0
131.endm
132
133| decode the 8bit diplacement from the brief extension word
134.macro fp_decode_disp8
135 move.b %d2,%d0
136 ext.w %d0
137.endm
138
139| decode the index of the brief/full extension word
140.macro fp_decode_index
141 bfextu %d2{#17,#3},%d0 | get the register nr
142 btst #15,%d2 | test for data/addr register
143 jne 1\@f
144 printf PDECODE,"d%d",1,%d0
145 jsr fp_get_data_reg
146 jra 2\@f
1471\@: printf PDECODE,"a%d",1,%d0
148 jsr fp_get_addr_reg
149 move.l %a0,%d0
1502\@:
151debug lea "'l'.w,%a0"
152 btst #11,%d2 | 16/32 bit size?
153 jne 3\@f
154debug lea "'w'.w,%a0"
155 ext.l %d0
1563\@: printf PDECODE,":%c",1,%a0
157 move.w %d2,%d1 | scale factor
158 rol.w #7,%d1
159 and.w #3,%d1
160debug move.l "%d1,-(%sp)"
161debug ext.l "%d1"
162 printf PDECODE,":%d",1,%d1
163debug move.l "(%sp)+,%d1"
164 lsl.l %d1,%d0
165.endm
166
167| decode the base displacement size
168.macro fp_decode_basedisp
169 bfextu %d2{#26,#2},%d0
170 jmp ([0f:w,%pc,%d0*4])
171
172 .align 4
1730:
174| .long "reserved","null displacement"
175| .long "word displacement","long displacement"
176.endm
177
178.macro fp_decode_outerdisp
179 bfextu %d2{#30,#2},%d0
180 jmp ([0f:w,%pc,%d0*4])
181
182 .align 4
1830:
184| .long "no memory indirect action/reserved","null outer displacement"
185| .long "word outer displacement","long outer displacement"
186.endm
187
188| get the extension word and test for brief or full extension type
189.macro fp_get_test_extword label
190 fp_get_instr_word %d2,fp_err_ua1
191 btst #8,%d2
192 jne \label
193.endm
194
195
196| test if %pc is the base register for the indirect addr mode
197.macro fp_test_basereg_d16 label
198 btst #20,%d2
199 jeq \label
200.endm
201
202| test if %pc is the base register for one of the extended modes
203.macro fp_test_basereg_ext label
204 btst #19,%d2
205 jeq \label
206.endm
207
208.macro fp_test_suppr_index label
209 btst #6,%d2
210 jne \label
211.endm
212
213
214| addressing mode: data register direct
215.macro fp_mode_data_direct
216 fp_decode_addr_reg
217 printf PDECODE,"d%d",1,%d0
218.endm
219
220| addressing mode: address register indirect
221.macro fp_mode_addr_indirect
222 fp_decode_addr_reg
223 printf PDECODE,"(a%d)",1,%d0
224 jsr fp_get_addr_reg
225.endm
226
227| adjust stack for byte moves from/to stack
228.macro fp_test_sp_byte_move
229 .if !do_fmovem
230 .if do_fscc
231 move.w #6,%d1
232 .endif
233 cmp.w #7,%d0
234 jne 1\@f
235 .if !do_fscc
236 cmp.w #6,%d1
237 jne 1\@f
238 .endif
239 move.w #4,%d1
2401\@:
241 .endif
242.endm
243
244| addressing mode: address register indirect with postincrement
245.macro fp_mode_addr_indirect_postinc
246 fp_decode_addr_reg
247 printf PDECODE,"(a%d)+",1,%d0
248 fp_test_sp_byte_move
249 jsr fp_get_addr_reg
250 move.l %a0,%a1 | save addr
251 .if do_fmovem
252 lea (%a0,%d1.w*4),%a0
253 .if !do_fmovem_cr
254 lea (%a0,%d1.w*8),%a0
255 .endif
256 .else
257 add.w (fp_datasize,%d1.w*2),%a0
258 .endif
259 jsr fp_put_addr_reg
260 move.l %a1,%a0
261.endm
262
263| addressing mode: address register indirect with predecrement
264.macro fp_mode_addr_indirect_predec
265 fp_decode_addr_reg
266 printf PDECODE,"-(a%d)",1,%d0
267 fp_test_sp_byte_move
268 jsr fp_get_addr_reg
269 .if do_fmovem
270 .if !do_fmovem_cr
271 lea (-12,%a0),%a1 | setup to addr of 1st reg to move
272 neg.w %d1
273 lea (%a0,%d1.w*4),%a0
274 add.w %d1,%d1
275 lea (%a0,%d1.w*4),%a0
276 jsr fp_put_addr_reg
277 move.l %a1,%a0
278 .else
279 neg.w %d1
280 lea (%a0,%d1.w*4),%a0
281 jsr fp_put_addr_reg
282 .endif
283 .else
284 sub.w (fp_datasize,%d1.w*2),%a0
285 jsr fp_put_addr_reg
286 .endif
287.endm
288
289| addressing mode: address register/programm counter indirect
290| with 16bit displacement
291.macro fp_mode_addr_indirect_disp16
292 .if !do_no_pc_mode
293 fp_test_basereg_d16 1f
294 printf PDECODE,"pc"
295 fp_get_pc %a0
296 jra 2f
297 .endif
2981: fp_decode_addr_reg
299 printf PDECODE,"a%d",1,%d0
300 jsr fp_get_addr_reg
3012: fp_get_instr_word %a1,fp_err_ua1
302 printf PDECODE,"@(%x)",1,%a1
303 add.l %a1,%a0
304.endm
305
306| perform preindex (if I/IS == 0xx and xx != 00)
307.macro fp_do_preindex
308 moveq #3,%d0
309 and.w %d2,%d0
310 jeq 1f
311 btst #2,%d2
312 jne 1f
313 printf PDECODE,")@("
314 getuser.l (%a1),%a1,fp_err_ua1,%a1
315debug jra "2f"
3161: printf PDECODE,","
3172:
318.endm
319
320| perform postindex (if I/IS == 1xx)
321.macro fp_do_postindex
322 btst #2,%d2
323 jeq 1f
324 printf PDECODE,")@("
325 getuser.l (%a1),%a1,fp_err_ua1,%a1
326debug jra "2f"
3271: printf PDECODE,","
3282:
329.endm
330
331| all other indirect addressing modes will finally end up here
332.macro fp_mode_addr_indirect_extmode0
333 .if !do_no_pc_mode
334 fp_test_basereg_ext 1f
335 printf PDECODE,"pc"
336 fp_get_pc %a0
337 jra 2f
338 .endif
3391: fp_decode_addr_reg
340 printf PDECODE,"a%d",1,%d0
341 jsr fp_get_addr_reg
3422: move.l %a0,%a1
343 swap %d2
344 fp_get_test_extword 3f
345 | addressing mode: address register/programm counter indirect
346 | with index and 8bit displacement
347 fp_decode_disp8
348debug ext.l "%d0"
349 printf PDECODE,"@(%x,",1,%d0
350 add.w %d0,%a1
351 fp_decode_index
352 add.l %d0,%a1
353 printf PDECODE,")"
354 jra 9f
3553: | addressing mode: address register/programm counter memory indirect
356 | with base and/or outer displacement
357 btst #7,%d2 | base register suppressed?
358 jeq 1f
359 printf PDECODE,"!"
360 sub.l %a1,%a1
3611: printf PDECODE,"@("
362 fp_decode_basedisp
363
364 .long fp_ill,1f
365 .long 2f,3f
366
367#ifdef FPU_EMU_DEBUG
3681: printf PDECODE,"0" | null base displacement
369 jra 1f
370#endif
3712: fp_get_instr_word %a0,fp_err_ua1 | 16bit base displacement
372 printf PDECODE,"%x:w",1,%a0
373 jra 4f
3743: fp_get_instr_long %a0,fp_err_ua1 | 32bit base displacement
375 printf PDECODE,"%x:l",1,%a0
3764: add.l %a0,%a1
3771:
378 fp_do_postindex
379 fp_test_suppr_index 1f
380 fp_decode_index
381 add.l %d0,%a1
3821: fp_do_preindex
383
384 fp_decode_outerdisp
385
386 .long 5f,1f
387 .long 2f,3f
388
389#ifdef FPU_EMU_DEBUG
3901: printf PDECODE,"0" | null outer displacement
391 jra 1f
392#endif
3932: fp_get_instr_word %a0,fp_err_ua1 | 16bit outer displacement
394 printf PDECODE,"%x:w",1,%a0
395 jra 4f
3963: fp_get_instr_long %a0,fp_err_ua1 | 32bit outer displacement
397 printf PDECODE,"%x:l",1,%a0
3984: add.l %a0,%a1
3991:
4005: printf PDECODE,")"
4019: move.l %a1,%a0
402 swap %d2
403.endm
404
405| get the absolute short address from user space
406.macro fp_mode_abs_short
407 fp_get_instr_word %a0,fp_err_ua1
408 printf PDECODE,"%x.w",1,%a0
409.endm
410
411| get the absolute long address from user space
412.macro fp_mode_abs_long
413 fp_get_instr_long %a0,fp_err_ua1
414 printf PDECODE,"%x.l",1,%a0
415.endm
416
417#endif /* _FP_DECODE_H */
diff --git a/arch/m68k/math-emu/fp_emu.h b/arch/m68k/math-emu/fp_emu.h
new file mode 100644
index 000000000000..1d6edc975d89
--- /dev/null
+++ b/arch/m68k/math-emu/fp_emu.h
@@ -0,0 +1,146 @@
1/*
2 * fp_emu.h
3 *
4 * Copyright Roman Zippel, 1997. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, and the entire permission notice in its entirety,
11 * including the disclaimer of warranties.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 * products derived from this software without specific prior
17 * written permission.
18 *
19 * ALTERNATIVELY, this product may be distributed under the terms of
20 * the GNU General Public License, in which case the provisions of the GPL are
21 * required INSTEAD OF the above restrictions. (This clause is
22 * necessary due to a potential bad interaction between the GPL and
23 * the restrictions contained in a BSD-style copyright.)
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35 * OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#ifndef _FP_EMU_H
39#define _FP_EMU_H
40
41#ifdef __ASSEMBLY__
42#include <asm/offsets.h>
43#endif
44#include <asm/math-emu.h>
45
46#ifndef __ASSEMBLY__
47
48#define IS_INF(a) ((a)->exp == 0x7fff)
49#define IS_ZERO(a) ((a)->mant.m64 == 0)
50
51
52#define fp_set_sr(bit) ({ \
53 FPDATA->fpsr |= 1 << (bit); \
54})
55
56#define fp_set_quotient(quotient) ({ \
57 FPDATA->fpsr &= 0xff00ffff; \
58 FPDATA->fpsr |= ((quotient) & 0xff) << 16; \
59})
60
61/* linkage for several useful functions */
62
63/* Normalize the extended struct, return 0 for a NaN */
64#define fp_normalize_ext(fpreg) ({ \
65 register struct fp_ext *reg asm ("a0") = fpreg; \
66 register int res asm ("d0"); \
67 \
68 asm volatile ("jsr fp_conv_ext2ext" \
69 : "=d" (res) : "a" (reg) \
70 : "a1", "d1", "d2", "memory"); \
71 res; \
72})
73
74#define fp_copy_ext(dest, src) ({ \
75 *dest = *src; \
76})
77
78#define fp_monadic_check(dest, src) ({ \
79 fp_copy_ext(dest, src); \
80 if (!fp_normalize_ext(dest)) \
81 return dest; \
82})
83
84#define fp_dyadic_check(dest, src) ({ \
85 if (!fp_normalize_ext(dest)) \
86 return dest; \
87 if (!fp_normalize_ext(src)) { \
88 fp_copy_ext(dest, src); \
89 return dest; \
90 } \
91})
92
93extern const struct fp_ext fp_QNaN;
94extern const struct fp_ext fp_Inf;
95
96#define fp_set_nan(dest) ({ \
97 fp_set_sr(FPSR_EXC_OPERR); \
98 *dest = fp_QNaN; \
99})
100
101/* TODO check rounding mode? */
102#define fp_set_ovrflw(dest) ({ \
103 fp_set_sr(FPSR_EXC_OVFL); \
104 dest->exp = 0x7fff; \
105 dest->mant.m64 = 0; \
106})
107
108#define fp_conv_ext2long(src) ({ \
109 register struct fp_ext *__src asm ("a0") = src; \
110 register int __res asm ("d0"); \
111 \
112 asm volatile ("jsr fp_conv_ext2long" \
113 : "=d" (__res) : "a" (__src) \
114 : "a1", "d1", "d2", "memory"); \
115 __res; \
116})
117
118#define fp_conv_long2ext(dest, src) ({ \
119 register struct fp_ext *__dest asm ("a0") = dest; \
120 register int __src asm ("d0") = src; \
121 \
122 asm volatile ("jsr fp_conv_ext2long" \
123 : : "d" (__src), "a" (__dest) \
124 : "a1", "d1", "d2", "memory"); \
125})
126
127#else /* __ASSEMBLY__ */
128
129/*
130 * set, reset or clear a bit in the fp status register
131 */
132.macro fp_set_sr bit
133 bset #(\bit&7),(FPD_FPSR+3-(\bit/8),FPDATA)
134.endm
135
136.macro fp_clr_sr bit
137 bclr #(\bit&7),(FPD_FPSR+3-(\bit/8),FPDATA)
138.endm
139
140.macro fp_tst_sr bit
141 btst #(\bit&7),(FPD_FPSR+3-(\bit/8),FPDATA)
142.endm
143
144#endif /* __ASSEMBLY__ */
145
146#endif /* _FP_EMU_H */
diff --git a/arch/m68k/math-emu/fp_entry.S b/arch/m68k/math-emu/fp_entry.S
new file mode 100644
index 000000000000..5ec2d9101ea3
--- /dev/null
+++ b/arch/m68k/math-emu/fp_entry.S
@@ -0,0 +1,325 @@
1/*
2 * fp_emu.S
3 *
4 * Copyright Roman Zippel, 1997. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, and the entire permission notice in its entirety,
11 * including the disclaimer of warranties.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 * products derived from this software without specific prior
17 * written permission.
18 *
19 * ALTERNATIVELY, this product may be distributed under the terms of
20 * the GNU General Public License, in which case the provisions of the GPL are
21 * required INSTEAD OF the above restrictions. (This clause is
22 * necessary due to a potential bad interaction between the GPL and
23 * the restrictions contained in a BSD-style copyright.)
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35 * OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <linux/config.h>
39#include <linux/linkage.h>
40#include <asm/entry.h>
41
42#include "fp_emu.h"
43
44 .globl fpu_emu
45 .globl fp_debugprint
46 .globl fp_err_ua1,fp_err_ua2
47
48 .text
49fpu_emu:
50 SAVE_ALL_INT
51 GET_CURRENT(%d0)
52
53#if defined(CPU_M68020_OR_M68030) && defined(CPU_M68040_OR_M68060)
54 tst.l m68k_is040or060
55 jeq 1f
56#endif
57#if defined(CPU_M68040_OR_M68060)
58 move.l (FPS_PC2,%sp),(FPS_PC,%sp)
59#endif
601:
61 | emulate the instruction
62 jsr fp_scan
63
64#if defined(CONFIG_M68060)
65#if !defined(CPU_M68060_ONLY)
66 btst #3,m68k_cputype+3
67 jeq 1f
68#endif
69 btst #7,(FPS_SR,%sp)
70 jne fp_sendtrace060
71#endif
721:
73 | emulation successful?
74 tst.l %d0
75 jeq ret_from_exception
76
77 | send some signal to program here
78
79 jra ret_from_exception
80
81 | we jump here after an access error while trying to access
82 | user space, we correct stackpointer and send a SIGSEGV to
83 | the user process
84fp_err_ua2:
85 addq.l #4,%sp
86fp_err_ua1:
87 addq.l #4,%sp
88 move.l %a0,-(%sp)
89 pea SEGV_MAPERR
90 pea SIGSEGV
91 jsr fpemu_signal
92 add.w #12,%sp
93 jra ret_from_exception
94
95#if defined(CONFIG_M68060)
96 | send a trace signal if we are debugged
97 | it does not really belong here, but...
98fp_sendtrace060:
99 move.l (FPS_PC,%sp),-(%sp)
100 pea TRAP_TRACE
101 pea SIGTRAP
102 jsr fpemu_signal
103 add.w #12,%sp
104 jra ret_from_exception
105#endif
106
107 .globl fp_get_data_reg, fp_put_data_reg
108 .globl fp_get_addr_reg, fp_put_addr_reg
109
110 | Entry points to get/put a register. Some of them can be get/put
111 | directly, others are on the stack, as we read/write the stack
112 | directly here, these function may only be called from within
113 | instruction decoding, otherwise the stack pointer is incorrect
114 | and the stack gets corrupted.
115fp_get_data_reg:
116 jmp ([0f:w,%pc,%d0.w*4])
117
118 .align 4
1190:
120 .long fp_get_d0, fp_get_d1
121 .long fp_get_d2, fp_get_d3
122 .long fp_get_d4, fp_get_d5
123 .long fp_get_d6, fp_get_d7
124
125fp_get_d0:
126 move.l (PT_D0+8,%sp),%d0
127 printf PREGISTER,"{d0->%08x}",1,%d0
128 rts
129
130fp_get_d1:
131 move.l (PT_D1+8,%sp),%d0
132 printf PREGISTER,"{d1->%08x}",1,%d0
133 rts
134
135fp_get_d2:
136 move.l (PT_D2+8,%sp),%d0
137 printf PREGISTER,"{d2->%08x}",1,%d0
138 rts
139
140fp_get_d3:
141 move.l %d3,%d0
142 printf PREGISTER,"{d3->%08x}",1,%d0
143 rts
144
145fp_get_d4:
146 move.l %d4,%d0
147 printf PREGISTER,"{d4->%08x}",1,%d0
148 rts
149
150fp_get_d5:
151 move.l %d5,%d0
152 printf PREGISTER,"{d5->%08x}",1,%d0
153 rts
154
155fp_get_d6:
156 move.l %d6,%d0
157 printf PREGISTER,"{d6->%08x}",1,%d0
158 rts
159
160fp_get_d7:
161 move.l %d7,%d0
162 printf PREGISTER,"{d7->%08x}",1,%d0
163 rts
164
165fp_put_data_reg:
166 jmp ([0f:w,%pc,%d1.w*4])
167
168 .align 4
1690:
170 .long fp_put_d0, fp_put_d1
171 .long fp_put_d2, fp_put_d3
172 .long fp_put_d4, fp_put_d5
173 .long fp_put_d6, fp_put_d7
174
175fp_put_d0:
176 printf PREGISTER,"{d0<-%08x}",1,%d0
177 move.l %d0,(PT_D0+8,%sp)
178 rts
179
180fp_put_d1:
181 printf PREGISTER,"{d1<-%08x}",1,%d0
182 move.l %d0,(PT_D1+8,%sp)
183 rts
184
185fp_put_d2:
186 printf PREGISTER,"{d2<-%08x}",1,%d0
187 move.l %d0,(PT_D2+8,%sp)
188 rts
189
190fp_put_d3:
191 printf PREGISTER,"{d3<-%08x}",1,%d0
192| move.l %d0,%d3
193 move.l %d0,(PT_D3+8,%sp)
194 rts
195
196fp_put_d4:
197 printf PREGISTER,"{d4<-%08x}",1,%d0
198| move.l %d0,%d4
199 move.l %d0,(PT_D4+8,%sp)
200 rts
201
202fp_put_d5:
203 printf PREGISTER,"{d5<-%08x}",1,%d0
204| move.l %d0,%d5
205 move.l %d0,(PT_D5+8,%sp)
206 rts
207
208fp_put_d6:
209 printf PREGISTER,"{d6<-%08x}",1,%d0
210 move.l %d0,%d6
211 rts
212
213fp_put_d7:
214 printf PREGISTER,"{d7<-%08x}",1,%d0
215 move.l %d0,%d7
216 rts
217
218fp_get_addr_reg:
219 jmp ([0f:w,%pc,%d0.w*4])
220
221 .align 4
2220:
223 .long fp_get_a0, fp_get_a1
224 .long fp_get_a2, fp_get_a3
225 .long fp_get_a4, fp_get_a5
226 .long fp_get_a6, fp_get_a7
227
228fp_get_a0:
229 move.l (PT_A0+8,%sp),%a0
230 printf PREGISTER,"{a0->%08x}",1,%a0
231 rts
232
233fp_get_a1:
234 move.l (PT_A1+8,%sp),%a0
235 printf PREGISTER,"{a1->%08x}",1,%a0
236 rts
237
238fp_get_a2:
239 move.l (PT_A2+8,%sp),%a0
240 printf PREGISTER,"{a2->%08x}",1,%a0
241 rts
242
243fp_get_a3:
244 move.l %a3,%a0
245 printf PREGISTER,"{a3->%08x}",1,%a0
246 rts
247
248fp_get_a4:
249 move.l %a4,%a0
250 printf PREGISTER,"{a4->%08x}",1,%a0
251 rts
252
253fp_get_a5:
254 move.l %a5,%a0
255 printf PREGISTER,"{a5->%08x}",1,%a0
256 rts
257
258fp_get_a6:
259 move.l %a6,%a0
260 printf PREGISTER,"{a6->%08x}",1,%a0
261 rts
262
263fp_get_a7:
264 move.l %usp,%a0
265 printf PREGISTER,"{a7->%08x}",1,%a0
266 rts
267
268fp_put_addr_reg:
269 jmp ([0f:w,%pc,%d0.w*4])
270
271 .align 4
2720:
273 .long fp_put_a0, fp_put_a1
274 .long fp_put_a2, fp_put_a3
275 .long fp_put_a4, fp_put_a5
276 .long fp_put_a6, fp_put_a7
277
278fp_put_a0:
279 printf PREGISTER,"{a0<-%08x}",1,%a0
280 move.l %a0,(PT_A0+8,%sp)
281 rts
282
283fp_put_a1:
284 printf PREGISTER,"{a1<-%08x}",1,%a0
285 move.l %a0,(PT_A1+8,%sp)
286 rts
287
288fp_put_a2:
289 printf PREGISTER,"{a2<-%08x}",1,%a0
290 move.l %a0,(PT_A2+8,%sp)
291 rts
292
293fp_put_a3:
294 printf PREGISTER,"{a3<-%08x}",1,%a0
295 move.l %a0,%a3
296 rts
297
298fp_put_a4:
299 printf PREGISTER,"{a4<-%08x}",1,%a0
300 move.l %a0,%a4
301 rts
302
303fp_put_a5:
304 printf PREGISTER,"{a5<-%08x}",1,%a0
305 move.l %a0,%a5
306 rts
307
308fp_put_a6:
309 printf PREGISTER,"{a6<-%08x}",1,%a0
310 move.l %a0,%a6
311 rts
312
313fp_put_a7:
314 printf PREGISTER,"{a7<-%08x}",1,%a0
315 move.l %a0,%usp
316 rts
317
318 .data
319 .align 4
320
321fp_debugprint:
322| .long PMDECODE
323 .long PMINSTR+PMDECODE+PMCONV+PMNORM
324| .long PMCONV+PMNORM+PMINSTR
325| .long 0
diff --git a/arch/m68k/math-emu/fp_log.c b/arch/m68k/math-emu/fp_log.c
new file mode 100644
index 000000000000..87b4f0158560
--- /dev/null
+++ b/arch/m68k/math-emu/fp_log.c
@@ -0,0 +1,223 @@
1/*
2
3 fp_trig.c: floating-point math routines for the Linux-m68k
4 floating point emulator.
5
6 Copyright (c) 1998-1999 David Huggins-Daines / Roman Zippel.
7
8 I hereby give permission, free of charge, to copy, modify, and
9 redistribute this software, in source or binary form, provided that
10 the above copyright notice and the following disclaimer are included
11 in all such copies.
12
13 THIS SOFTWARE IS PROVIDED "AS IS", WITH ABSOLUTELY NO WARRANTY, REAL
14 OR IMPLIED.
15
16*/
17
18#include "fp_emu.h"
19
20static const struct fp_ext fp_one =
21{
22 .exp = 0x3fff,
23};
24
25extern struct fp_ext *fp_fadd(struct fp_ext *dest, const struct fp_ext *src);
26extern struct fp_ext *fp_fdiv(struct fp_ext *dest, const struct fp_ext *src);
27extern struct fp_ext *fp_fmul(struct fp_ext *dest, const struct fp_ext *src);
28
29struct fp_ext *
30fp_fsqrt(struct fp_ext *dest, struct fp_ext *src)
31{
32 struct fp_ext tmp, src2;
33 int i, exp;
34
35 dprint(PINSTR, "fsqrt\n");
36
37 fp_monadic_check(dest, src);
38
39 if (IS_ZERO(dest))
40 return dest;
41
42 if (dest->sign) {
43 fp_set_nan(dest);
44 return dest;
45 }
46 if (IS_INF(dest))
47 return dest;
48
49 /*
50 * sqrt(m) * 2^(p) , if e = 2*p
51 * sqrt(m*2^e) =
52 * sqrt(2*m) * 2^(p) , if e = 2*p + 1
53 *
54 * So we use the last bit of the exponent to decide wether to
55 * use the m or 2*m.
56 *
57 * Since only the fractional part of the mantissa is stored and
58 * the integer part is assumed to be one, we place a 1 or 2 into
59 * the fixed point representation.
60 */
61 exp = dest->exp;
62 dest->exp = 0x3FFF;
63 if (!(exp & 1)) /* lowest bit of exponent is set */
64 dest->exp++;
65 fp_copy_ext(&src2, dest);
66
67 /*
68 * The taylor row arround a for sqrt(x) is:
69 * sqrt(x) = sqrt(a) + 1/(2*sqrt(a))*(x-a) + R
70 * With a=1 this gives:
71 * sqrt(x) = 1 + 1/2*(x-1)
72 * = 1/2*(1+x)
73 */
74 fp_fadd(dest, &fp_one);
75 dest->exp--; /* * 1/2 */
76
77 /*
78 * We now apply the newton rule to the function
79 * f(x) := x^2 - r
80 * which has a null point on x = sqrt(r).
81 *
82 * It gives:
83 * x' := x - f(x)/f'(x)
84 * = x - (x^2 -r)/(2*x)
85 * = x - (x - r/x)/2
86 * = (2*x - x + r/x)/2
87 * = (x + r/x)/2
88 */
89 for (i = 0; i < 9; i++) {
90 fp_copy_ext(&tmp, &src2);
91
92 fp_fdiv(&tmp, dest);
93 fp_fadd(dest, &tmp);
94 dest->exp--;
95 }
96
97 dest->exp += (exp - 0x3FFF) / 2;
98
99 return dest;
100}
101
102struct fp_ext *
103fp_fetoxm1(struct fp_ext *dest, struct fp_ext *src)
104{
105 uprint("fetoxm1\n");
106
107 fp_monadic_check(dest, src);
108
109 if (IS_ZERO(dest))
110 return dest;
111
112 return dest;
113}
114
115struct fp_ext *
116fp_fetox(struct fp_ext *dest, struct fp_ext *src)
117{
118 uprint("fetox\n");
119
120 fp_monadic_check(dest, src);
121
122 return dest;
123}
124
125struct fp_ext *
126fp_ftwotox(struct fp_ext *dest, struct fp_ext *src)
127{
128 uprint("ftwotox\n");
129
130 fp_monadic_check(dest, src);
131
132 return dest;
133}
134
135struct fp_ext *
136fp_ftentox(struct fp_ext *dest, struct fp_ext *src)
137{
138 uprint("ftentox\n");
139
140 fp_monadic_check(dest, src);
141
142 return dest;
143}
144
145struct fp_ext *
146fp_flogn(struct fp_ext *dest, struct fp_ext *src)
147{
148 uprint("flogn\n");
149
150 fp_monadic_check(dest, src);
151
152 return dest;
153}
154
155struct fp_ext *
156fp_flognp1(struct fp_ext *dest, struct fp_ext *src)
157{
158 uprint("flognp1\n");
159
160 fp_monadic_check(dest, src);
161
162 return dest;
163}
164
165struct fp_ext *
166fp_flog10(struct fp_ext *dest, struct fp_ext *src)
167{
168 uprint("flog10\n");
169
170 fp_monadic_check(dest, src);
171
172 return dest;
173}
174
175struct fp_ext *
176fp_flog2(struct fp_ext *dest, struct fp_ext *src)
177{
178 uprint("flog2\n");
179
180 fp_monadic_check(dest, src);
181
182 return dest;
183}
184
185struct fp_ext *
186fp_fgetexp(struct fp_ext *dest, struct fp_ext *src)
187{
188 dprint(PINSTR, "fgetexp\n");
189
190 fp_monadic_check(dest, src);
191
192 if (IS_INF(dest)) {
193 fp_set_nan(dest);
194 return dest;
195 }
196 if (IS_ZERO(dest))
197 return dest;
198
199 fp_conv_long2ext(dest, (int)dest->exp - 0x3FFF);
200
201 fp_normalize_ext(dest);
202
203 return dest;
204}
205
206struct fp_ext *
207fp_fgetman(struct fp_ext *dest, struct fp_ext *src)
208{
209 dprint(PINSTR, "fgetman\n");
210
211 fp_monadic_check(dest, src);
212
213 if (IS_ZERO(dest))
214 return dest;
215
216 if (IS_INF(dest))
217 return dest;
218
219 dest->exp = 0x3FFF;
220
221 return dest;
222}
223
diff --git a/arch/m68k/math-emu/fp_move.S b/arch/m68k/math-emu/fp_move.S
new file mode 100644
index 000000000000..71bdf83ba61a
--- /dev/null
+++ b/arch/m68k/math-emu/fp_move.S
@@ -0,0 +1,244 @@
1/*
2 * fp_move.S
3 *
4 * Copyright Roman Zippel, 1997. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, and the entire permission notice in its entirety,
11 * including the disclaimer of warranties.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 * products derived from this software without specific prior
17 * written permission.
18 *
19 * ALTERNATIVELY, this product may be distributed under the terms of
20 * the GNU General Public License, in which case the provisions of the GPL are
21 * required INSTEAD OF the above restrictions. (This clause is
22 * necessary due to a potential bad interaction between the GPL and
23 * the restrictions contained in a BSD-style copyright.)
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35 * OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include "fp_emu.h"
39#include "fp_decode.h"
40
41do_no_pc_mode=1
42
43 .globl fp_fmove_fp2mem
44
45fp_fmove_fp2mem:
46 clr.b (2+FPD_FPSR,FPDATA)
47 fp_decode_dest_format
48 move.w %d0,%d1 | store data size twice in %d1
49 swap %d1 | one can be trashed below
50 move.w %d0,%d1
51#ifdef FPU_EMU_DEBUG
52 lea 0f,%a0
53 clr.l %d0
54 move.b (%a0,%d1.w),%d0
55 printf PDECODE,"fmove.%c ",1,%d0
56 fp_decode_src_reg
57 printf PDECODE,"fp%d,",1,%d0
58
59 .data
600: .byte 'l','s','x','p','w','d','b','p'
61 .previous
62#endif
63
64 | encode addressing mode for dest
65 fp_decode_addr_mode
66
67 .long fp_data, fp_ill
68 .long fp_indirect, fp_postinc
69 .long fp_predecr, fp_disp16
70 .long fp_extmode0, fp_extmode1
71
72 | addressing mode: data register direct
73fp_data:
74 fp_mode_data_direct
75 move.w %d0,%d1
76 fp_decode_src_reg
77 fp_get_fp_reg
78 lea (FPD_TEMPFP1,FPDATA),%a1
79 move.l (%a0)+,(%a1)+
80 move.l (%a0)+,(%a1)+
81 move.l (%a0),(%a1)
82 lea (-8,%a1),%a0
83 swap %d1
84 move.l %d1,%d2
85 printf PDECODE,"\n"
86 jmp ([0f:w,%pc,%d1.w*4])
87
88 .align 4
890:
90 .long fp_data_long, fp_data_single
91 .long fp_ill, fp_ill
92 .long fp_data_word, fp_ill
93 .long fp_data_byte, fp_ill
94
95fp_data_byte:
96 jsr fp_normalize_ext
97 jsr fp_conv_ext2byte
98 move.l %d0,%d1
99 swap %d2
100 move.w %d2,%d0
101 jsr fp_get_data_reg
102 move.b %d1,%d0
103 move.w %d2,%d1
104 jsr fp_put_data_reg
105 jra fp_final
106
107fp_data_word:
108 jsr fp_normalize_ext
109 jsr fp_conv_ext2short
110 move.l %d0,%d1
111 swap %d2
112 move.w %d2,%d0
113 jsr fp_get_data_reg
114 move.w %d1,%d0
115 move.l %d2,%d1
116 jsr fp_put_data_reg
117 jra fp_final
118
119fp_data_long:
120 jsr fp_normalize_ext
121 jsr fp_conv_ext2long
122 swap %d2
123 move.w %d2,%d1
124 jsr fp_put_data_reg
125 jra fp_final
126
127fp_data_single:
128 jsr fp_normalize_ext
129 jsr fp_conv_ext2single
130 swap %d2
131 move.w %d2,%d1
132 jsr fp_put_data_reg
133 jra fp_final
134
135 | addressing mode: address register indirect
136fp_indirect:
137 fp_mode_addr_indirect
138 jra fp_putdest
139
140 | addressing mode: address register indirect with postincrement
141fp_postinc:
142 fp_mode_addr_indirect_postinc
143 jra fp_putdest
144
145 | addressing mode: address register indirect with predecrement
146fp_predecr:
147 fp_mode_addr_indirect_predec
148 jra fp_putdest
149
150 | addressing mode: address register indirect with 16bit displacement
151fp_disp16:
152 fp_mode_addr_indirect_disp16
153 jra fp_putdest
154
155fp_extmode0:
156 fp_mode_addr_indirect_extmode0
157 jra fp_putdest
158
159fp_extmode1:
160 fp_decode_addr_reg
161 jmp ([0f:w,%pc,%d0*4])
162
163 .align 4
1640:
165 .long fp_abs_short, fp_abs_long
166 .long fp_ill, fp_ill
167 .long fp_ill, fp_ill
168 .long fp_ill, fp_ill
169
170fp_abs_short:
171 fp_mode_abs_short
172 jra fp_putdest
173
174fp_abs_long:
175 fp_mode_abs_long
176 jra fp_putdest
177
178fp_putdest:
179 move.l %a0,%a1
180 fp_decode_src_reg
181 move.l %d1,%d2 | save size
182 fp_get_fp_reg
183 printf PDECODE,"\n"
184 addq.l #8,%a0
185 move.l (%a0),-(%sp)
186 move.l -(%a0),-(%sp)
187 move.l -(%a0),-(%sp)
188 move.l %sp,%a0
189 jsr fp_normalize_ext
190
191 swap %d2
192 jmp ([0f:w,%pc,%d2.w*4])
193
194 .align 4
1950:
196 .long fp_format_long, fp_format_single
197 .long fp_format_extended, fp_format_packed
198 .long fp_format_word, fp_format_double
199 .long fp_format_byte, fp_format_packed
200
201fp_format_long:
202 jsr fp_conv_ext2long
203 putuser.l %d0,(%a1),fp_err_ua1,%a1
204 jra fp_finish_move
205
206fp_format_single:
207 jsr fp_conv_ext2single
208 putuser.l %d0,(%a1),fp_err_ua1,%a1
209 jra fp_finish_move
210
211fp_format_extended:
212 move.l (%a0)+,%d0
213 lsl.w #1,%d0
214 lsl.l #7,%d0
215 lsl.l #8,%d0
216 putuser.l %d0,(%a1)+,fp_err_ua1,%a1
217 move.l (%a0)+,%d0
218 putuser.l %d0,(%a1)+,fp_err_ua1,%a1
219 move.l (%a0),%d0
220 putuser.l %d0,(%a1),fp_err_ua1,%a1
221 jra fp_finish_move
222
223fp_format_packed:
224 /* not supported yet */
225 lea (12,%sp),%sp
226 jra fp_ill
227
228fp_format_word:
229 jsr fp_conv_ext2short
230 putuser.w %d0,(%a1),fp_err_ua1,%a1
231 jra fp_finish_move
232
233fp_format_double:
234 jsr fp_conv_ext2double
235 jra fp_finish_move
236
237fp_format_byte:
238 jsr fp_conv_ext2byte
239 putuser.b %d0,(%a1),fp_err_ua1,%a1
240| jra fp_finish_move
241
242fp_finish_move:
243 lea (12,%sp),%sp
244 jra fp_final
diff --git a/arch/m68k/math-emu/fp_movem.S b/arch/m68k/math-emu/fp_movem.S
new file mode 100644
index 000000000000..8354d39e6c47
--- /dev/null
+++ b/arch/m68k/math-emu/fp_movem.S
@@ -0,0 +1,368 @@
1/*
2 * fp_movem.S
3 *
4 * Copyright Roman Zippel, 1997. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, and the entire permission notice in its entirety,
11 * including the disclaimer of warranties.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 * products derived from this software without specific prior
17 * written permission.
18 *
19 * ALTERNATIVELY, this product may be distributed under the terms of
20 * the GNU General Public License, in which case the provisions of the GPL are
21 * required INSTEAD OF the above restrictions. (This clause is
22 * necessary due to a potential bad interaction between the GPL and
23 * the restrictions contained in a BSD-style copyright.)
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35 * OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include "fp_emu.h"
39#include "fp_decode.h"
40
41| set flags for decode macros for fmovem
42do_fmovem=1
43
44 .globl fp_fmovem_fp, fp_fmovem_cr
45
46| %d1 contains the mask and count of the register list
47| for other register usage see fp_decode.h
48
49fp_fmovem_fp:
50 printf PDECODE,"fmovem.x "
51 | get register list and count them
52 btst #11,%d2
53 jne 1f
54 bfextu %d2{#24,#8},%d0 | static register list
55 jra 2f
561: bfextu %d2{#25,#3},%d0 | dynamic register list
57 jsr fp_get_data_reg
582: move.l %d0,%d1
59 swap %d1
60 jra 2f
611: addq.w #1,%d1 | count the # of registers in
622: lsr.b #1,%d0 | register list and keep it in %d1
63 jcs 1b
64 jne 2b
65 printf PDECODE,"#%08x",1,%d1
66#ifdef FPU_EMU_DEBUG
67 btst #12,%d2
68 jne 1f
69 printf PDECODE,"-" | decremental move
70 jra 2f
711: printf PDECODE,"+" | incremental move
722: btst #13,%d2
73 jeq 1f
74 printf PDECODE,"->" | fpu -> cpu
75 jra 2f
761: printf PDECODE,"<-" | fpu <- cpu
772:
78#endif
79
80 | decode address mode
81 fp_decode_addr_mode
82
83 .long fp_ill, fp_ill
84 .long fpr_indirect, fpr_postinc
85 .long fpr_predecr, fpr_disp16
86 .long fpr_extmode0, fpr_extmode1
87
88 | addressing mode: address register indirect
89fpr_indirect:
90 fp_mode_addr_indirect
91 jra fpr_do_movem
92
93 | addressing mode: address register indirect with postincrement
94fpr_postinc:
95 fp_mode_addr_indirect_postinc
96 jra fpr_do_movem
97
98fpr_predecr:
99 fp_mode_addr_indirect_predec
100 jra fpr_do_movem
101
102 | addressing mode: address register/programm counter indirect
103 | with 16bit displacement
104fpr_disp16:
105 fp_mode_addr_indirect_disp16
106 jra fpr_do_movem
107
108fpr_extmode0:
109 fp_mode_addr_indirect_extmode0
110 jra fpr_do_movem
111
112fpr_extmode1:
113 fp_decode_addr_reg
114 jmp ([0f:w,%pc,%d0*4])
115
116 .align 4
1170:
118 .long fpr_absolute_short, fpr_absolute_long
119 .long fpr_disp16, fpr_extmode0
120 .long fp_ill, fp_ill
121 .long fp_ill, fp_ill
122
123fpr_absolute_short:
124 fp_mode_abs_short
125 jra fpr_do_movem
126
127fpr_absolute_long:
128 fp_mode_abs_long
129| jra fpr_do_movem
130
131fpr_do_movem:
132 swap %d1 | get fpu register list
133 lea (FPD_FPREG,FPDATA),%a1
134 moveq #12,%d0
135 btst #12,%d2
136 jne 1f
137 lea (-12,%a1,%d0*8),%a1
138 neg.l %d0
1391: btst #13,%d2
140 jne 4f
141 | move register from memory into fpu
142 jra 3f
1431: printf PMOVEM,"(%p>%p)",2,%a0,%a1
144 getuser.l (%a0)+,%d2,fp_err_ua1,%a0
145 lsr.l #8,%d2
146 lsr.l #7,%d2
147 lsr.w #1,%d2
148 move.l %d2,(%a1)+
149 getuser.l (%a0)+,%d2,fp_err_ua1,%a0
150 move.l %d2,(%a1)+
151 getuser.l (%a0),%d2,fp_err_ua1,%a0
152 move.l %d2,(%a1)
153 subq.l #8,%a0
154 subq.l #8,%a1
155 add.l %d0,%a0
1562: add.l %d0,%a1
1573: lsl.b #1,%d1
158 jcs 1b
159 jne 2b
160 jra 5f
161 | move register from fpu into memory
1621: printf PMOVEM,"(%p>%p)",2,%a1,%a0
163 move.l (%a1)+,%d2
164 lsl.w #1,%d2
165 lsl.l #7,%d2
166 lsl.l #8,%d2
167 putuser.l %d2,(%a0)+,fp_err_ua1,%a0
168 move.l (%a1)+,%d2
169 putuser.l %d2,(%a0)+,fp_err_ua1,%a0
170 move.l (%a1),%d2
171 putuser.l %d2,(%a0),fp_err_ua1,%a0
172 subq.l #8,%a1
173 subq.l #8,%a0
174 add.l %d0,%a0
1752: add.l %d0,%a1
1764: lsl.b #1,%d1
177 jcs 1b
178 jne 2b
1795:
180 printf PDECODE,"\n"
181#if 0
182 lea (FPD_FPREG,FPDATA),%a0
183 printf PMOVEM,"fp:"
184 printx PMOVEM,%a0@(0)
185 printx PMOVEM,%a0@(12)
186 printf PMOVEM,"\n "
187 printx PMOVEM,%a0@(24)
188 printx PMOVEM,%a0@(36)
189 printf PMOVEM,"\n "
190 printx PMOVEM,%a0@(48)
191 printx PMOVEM,%a0@(60)
192 printf PMOVEM,"\n "
193 printx PMOVEM,%a0@(72)
194 printx PMOVEM,%a0@(84)
195 printf PMOVEM,"\n"
196#endif
197 jra fp_end
198
199| set flags for decode macros for fmovem control register
200do_fmovem=1
201do_fmovem_cr=1
202
203fp_fmovem_cr:
204 printf PDECODE,"fmovem.cr "
205 | get register list and count them
206 bfextu %d2{#19,#3},%d0
207 move.l %d0,%d1
208 swap %d1
209 jra 2f
2101: addq.w #1,%d1
2112: lsr.l #1,%d0
212 jcs 1b
213 jne 2b
214 printf PDECODE,"#%08x",1,%d1
215#ifdef FPU_EMU_DEBUG
216 btst #13,%d2
217 jeq 1f
218 printf PDECODE,"->" | fpu -> cpu
219 jra 2f
2201: printf PDECODE,"<-" | fpu <- cpu
2212:
222#endif
223
224 | decode address mode
225 fp_decode_addr_mode
226
227 .long fpc_data, fpc_addr
228 .long fpc_indirect, fpc_postinc
229 .long fpc_predecr, fpc_disp16
230 .long fpc_extmode0, fpc_extmode1
231
232fpc_data:
233 fp_mode_data_direct
234 move.w %d0,%d1
235 bfffo %d2{#19,#3},%d0
236 sub.w #19,%d0
237 lea (FPD_FPCR,FPDATA,%d0.w*4),%a1
238 btst #13,%d2
239 jne 1f
240 move.w %d1,%d0
241 jsr fp_get_data_reg
242 move.l %d0,(%a1)
243 jra fpc_movem_fin
2441: move.l (%a1),%d0
245 jsr fp_put_data_reg
246 jra fpc_movem_fin
247
248fpc_addr:
249 fp_decode_addr_reg
250 printf PDECODE,"a%d",1,%d0
251 btst #13,%d2
252 jne 1f
253 jsr fp_get_addr_reg
254 move.l %a0,(FPD_FPIAR,FPDATA)
255 jra fpc_movem_fin
2561: move.l (FPD_FPIAR,FPDATA),%a0
257 jsr fp_put_addr_reg
258 jra fpc_movem_fin
259
260fpc_indirect:
261 fp_mode_addr_indirect
262 jra fpc_do_movem
263
264fpc_postinc:
265 fp_mode_addr_indirect_postinc
266 jra fpc_do_movem
267
268fpc_predecr:
269 fp_mode_addr_indirect_predec
270 jra fpc_do_movem
271
272fpc_disp16:
273 fp_mode_addr_indirect_disp16
274 jra fpc_do_movem
275
276fpc_extmode0:
277 fp_mode_addr_indirect_extmode0
278 jra fpc_do_movem
279
280fpc_extmode1:
281 fp_decode_addr_reg
282 jmp ([0f:w,%pc,%d0*4])
283
284 .align 4
2850:
286 .long fpc_absolute_short, fpc_absolute_long
287 .long fpc_disp16, fpc_extmode0
288 .long fpc_immediate, fp_ill
289 .long fp_ill, fp_ill
290
291fpc_absolute_short:
292 fp_mode_abs_short
293 jra fpc_do_movem
294
295fpc_absolute_long:
296 fp_mode_abs_long
297 jra fpc_do_movem
298
299fpc_immediate:
300 fp_get_pc %a0
301 lea (%a0,%d1.w*4),%a1
302 fp_put_pc %a1
303 printf PDECODE,"#imm"
304| jra fpc_do_movem
305#if 0
306 swap %d1
307 lsl.l #5,%d1
308 lea (FPD_FPCR,FPDATA),%a0
309 jra 3f
3101: move.l %d0,(%a0)
3112: addq.l #4,%a0
3123: lsl.b #1,%d1
313 jcs 1b
314 jne 2b
315 jra fpc_movem_fin
316#endif
317
318fpc_do_movem:
319 swap %d1 | get fpu register list
320 lsl.l #5,%d1
321 lea (FPD_FPCR,FPDATA),%a1
3221: btst #13,%d2
323 jne 4f
324
325 | move register from memory into fpu
326 jra 3f
3271: printf PMOVEM,"(%p>%p)",2,%a0,%a1
328 getuser.l (%a0)+,%d0,fp_err_ua1,%a0
329 move.l %d0,(%a1)
3302: addq.l #4,%a1
3313: lsl.b #1,%d1
332 jcs 1b
333 jne 2b
334 jra fpc_movem_fin
335
336 | move register from fpu into memory
3371: printf PMOVEM,"(%p>%p)",2,%a1,%a0
338 move.l (%a1),%d0
339 putuser.l %d0,(%a0)+,fp_err_ua1,%a0
3402: addq.l #4,%a1
3414: lsl.b #1,%d1
342 jcs 1b
343 jne 2b
344
345fpc_movem_fin:
346 and.l #0x0000fff0,(FPD_FPCR,FPDATA)
347 and.l #0x0ffffff8,(FPD_FPSR,FPDATA)
348 move.l (FPD_FPCR,FPDATA),%d0
349 lsr.l #4,%d0
350 moveq #3,%d1
351 and.l %d0,%d1
352 move.w %d1,(FPD_RND,FPDATA)
353 lsr.l #2,%d0
354 moveq #3,%d1
355 and.l %d0,%d1
356 move.w %d1,(FPD_PREC,FPDATA)
357 printf PDECODE,"\n"
358#if 0
359 printf PMOVEM,"fpcr : %08x\n",1,FPDATA@(FPD_FPCR)
360 printf PMOVEM,"fpsr : %08x\n",1,FPDATA@(FPD_FPSR)
361 printf PMOVEM,"fpiar: %08x\n",1,FPDATA@(FPD_FPIAR)
362 clr.l %d0
363 move.w (FPD_PREC,FPDATA),%d0
364 printf PMOVEM,"prec : %04x\n",1,%d0
365 move.w (FPD_RND,FPDATA),%d0
366 printf PMOVEM,"rnd : %04x\n",1,%d0
367#endif
368 jra fp_end
diff --git a/arch/m68k/math-emu/fp_scan.S b/arch/m68k/math-emu/fp_scan.S
new file mode 100644
index 000000000000..e4146ed574db
--- /dev/null
+++ b/arch/m68k/math-emu/fp_scan.S
@@ -0,0 +1,478 @@
1/*
2 * fp_scan.S
3 *
4 * Copyright Roman Zippel, 1997. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, and the entire permission notice in its entirety,
11 * including the disclaimer of warranties.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 * products derived from this software without specific prior
17 * written permission.
18 *
19 * ALTERNATIVELY, this product may be distributed under the terms of
20 * the GNU General Public License, in which case the provisions of the GPL are
21 * required INSTEAD OF the above restrictions. (This clause is
22 * necessary due to a potential bad interaction between the GPL and
23 * the restrictions contained in a BSD-style copyright.)
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35 * OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include "fp_emu.h"
39#include "fp_decode.h"
40
41 .globl fp_scan, fp_datasize
42
43 .data
44
45| %d2 - first two instr words
46| %d1 - operand size
47
48/* operand formats are:
49
50 Long = 0, i.e. fmove.l
51 Single, i.e. fmove.s
52 Extended, i.e. fmove.x
53 Packed-BCD, i.e. fmove.p
54 Word, i.e. fmove.w
55 Double, i.e. fmove.d
56*/
57
58 .text
59
60| On entry:
61| FPDATA - base of emulated FPU registers
62
63fp_scan:
64| normal fpu instruction? (this excludes fsave/frestore)
65 fp_get_pc %a0
66 printf PDECODE,"%08x: ",1,%a0
67 getuser.b (%a0),%d0,fp_err_ua1,%a0
68#if 1
69 cmp.b #0xf2,%d0 | cpid = 1
70#else
71 cmp.b #0xfc,%d0 | cpid = 6
72#endif
73 jne fp_nonstd
74| first two instruction words are kept in %d2
75 getuser.l (%a0)+,%d2,fp_err_ua1,%a0
76 fp_put_pc %a0
77fp_decode_cond: | separate conditional instr
78 fp_decode_cond_instr_type
79
80 .long fp_decode_move, fp_fscc
81 .long fp_fbccw, fp_fbccl
82
83fp_decode_move: | separate move instr
84 fp_decode_move_instr_type
85
86 .long fp_fgen_fp, fp_ill
87 .long fp_fgen_ea, fp_fmove_fp2mem
88 .long fp_fmovem_cr, fp_fmovem_cr
89 .long fp_fmovem_fp, fp_fmovem_fp
90
91| now all arithmetic instr and a few move instr are left
92fp_fgen_fp: | source is a fpu register
93 clr.b (FPD_FPSR+2,FPDATA) | clear the exception byte
94 fp_decode_sourcespec
95 printf PDECODE,"f<op>.x fp%d",1,%d0
96 fp_get_fp_reg
97 lea (FPD_TEMPFP1,FPDATA),%a1 | copy src into a temp location
98 move.l (%a0)+,(%a1)+
99 move.l (%a0)+,(%a1)+
100 move.l (%a0),(%a1)
101 lea (-8,%a1),%a0
102 jra fp_getdest
103
104fp_fgen_ea: | source is <ea>
105 clr.b (FPD_FPSR+2,FPDATA) | clear the exception byte
106 | sort out fmovecr, keep data size in %d1
107 fp_decode_sourcespec
108 cmp.w #7,%d0
109 jeq fp_fmovecr
110 move.w %d0,%d1 | store data size twice in %d1
111 swap %d1 | one can be trashed below
112 move.w %d0,%d1
113#ifdef FPU_EMU_DEBUG
114 lea 0f,%a0
115 clr.l %d0
116 move.b (%a0,%d1.w),%d0
117 printf PDECODE,"f<op>.%c ",1,%d0
118
119 .data
1200: .byte 'l','s','x','p','w','d','b',0
121 .previous
122#endif
123
124/*
125 fp_getsource, fp_getdest
126
127 basically, we end up with a pointer to the source operand in
128 %a1, and a pointer to the destination operand in %a0. both
129 are, of course, 96-bit extended floating point numbers.
130*/
131
132fp_getsource:
133 | decode addressing mode for source
134 fp_decode_addr_mode
135
136 .long fp_data, fp_ill
137 .long fp_indirect, fp_postinc
138 .long fp_predecr, fp_disp16
139 .long fp_extmode0, fp_extmode1
140
141 | addressing mode: data register direct
142fp_data:
143 fp_mode_data_direct
144 jsr fp_get_data_reg
145 lea (FPD_TEMPFP1,FPDATA),%a0
146 jmp ([0f:w,%pc,%d1.w*4])
147
148 .align 4
1490:
150 .long fp_data_long, fp_data_single
151 .long fp_ill, fp_ill
152 .long fp_data_word, fp_ill
153 .long fp_data_byte, fp_ill
154
155 | data types that fit in an integer data register
156fp_data_byte:
157 extb.l %d0
158 jra fp_data_long
159
160fp_data_word:
161 ext.l %d0
162
163fp_data_long:
164 jsr fp_conv_long2ext
165 jra fp_getdest
166
167fp_data_single:
168 jsr fp_conv_single2ext
169 jra fp_getdest
170
171 | addressing mode: address register indirect
172fp_indirect:
173 fp_mode_addr_indirect
174 jra fp_fetchsource
175
176 | addressing mode: address register indirect with postincrement
177fp_postinc:
178 fp_mode_addr_indirect_postinc
179 jra fp_fetchsource
180
181 | addressing mode: address register indirect with predecrement
182fp_predecr:
183 fp_mode_addr_indirect_predec
184 jra fp_fetchsource
185
186 | addressing mode: address register/programm counter indirect
187 | with 16bit displacement
188fp_disp16:
189 fp_mode_addr_indirect_disp16
190 jra fp_fetchsource
191
192 | all other indirect addressing modes will finally end up here
193fp_extmode0:
194 fp_mode_addr_indirect_extmode0
195 jra fp_fetchsource
196
197| all pc relative addressing modes and immediate/absolute modes end up here
198| the first ones are sent to fp_extmode0 or fp_disp16
199| and only the latter are handled here
200fp_extmode1:
201 fp_decode_addr_reg
202 jmp ([0f:w,%pc,%d0*4])
203
204 .align 4
2050:
206 .long fp_abs_short, fp_abs_long
207 .long fp_disp16, fp_extmode0
208 .long fp_immediate, fp_ill
209 .long fp_ill, fp_ill
210
211 | addressing mode: absolute short
212fp_abs_short:
213 fp_mode_abs_short
214 jra fp_fetchsource
215
216 | addressing mode: absolute long
217fp_abs_long:
218 fp_mode_abs_long
219 jra fp_fetchsource
220
221 | addressing mode: immediate data
222fp_immediate:
223 printf PDECODE,"#"
224 fp_get_pc %a0
225 move.w (fp_datasize,%d1.w*2),%d0
226 addq.w #1,%d0
227 and.w #-2,%d0
228#ifdef FPU_EMU_DEBUG
229 movem.l %d0/%d1,-(%sp)
230 movel %a0,%a1
231 clr.l %d1
232 jra 2f
2331: getuser.b (%a1)+,%d1,fp_err_ua1,%a1
234 printf PDECODE,"%02x",1,%d1
2352: dbra %d0,1b
236 movem.l (%sp)+,%d0/%d1
237#endif
238 lea (%a0,%d0.w),%a1
239 fp_put_pc %a1
240| jra fp_fetchsource
241
242fp_fetchsource:
243 move.l %a0,%a1
244 swap %d1
245 lea (FPD_TEMPFP1,FPDATA),%a0
246 jmp ([0f:w,%pc,%d1.w*4])
247
248 .align 4
2490: .long fp_long, fp_single
250 .long fp_ext, fp_pack
251 .long fp_word, fp_double
252 .long fp_byte, fp_ill
253
254fp_long:
255 getuser.l (%a1),%d0,fp_err_ua1,%a1
256 jsr fp_conv_long2ext
257 jra fp_getdest
258
259fp_single:
260 getuser.l (%a1),%d0,fp_err_ua1,%a1
261 jsr fp_conv_single2ext
262 jra fp_getdest
263
264fp_ext:
265 getuser.l (%a1)+,%d0,fp_err_ua1,%a1
266 lsr.l #8,%d0
267 lsr.l #7,%d0
268 lsr.w #1,%d0
269 move.l %d0,(%a0)+
270 getuser.l (%a1)+,%d0,fp_err_ua1,%a1
271 move.l %d0,(%a0)+
272 getuser.l (%a1),%d0,fp_err_ua1,%a1
273 move.l %d0,(%a0)
274 subq.l #8,%a0
275 jra fp_getdest
276
277fp_pack:
278 /* not supported yet */
279 jra fp_ill
280
281fp_word:
282 getuser.w (%a1),%d0,fp_err_ua1,%a1
283 ext.l %d0
284 jsr fp_conv_long2ext
285 jra fp_getdest
286
287fp_double:
288 jsr fp_conv_double2ext
289 jra fp_getdest
290
291fp_byte:
292 getuser.b (%a1),%d0,fp_err_ua1,%a1
293 extb.l %d0
294 jsr fp_conv_long2ext
295| jra fp_getdest
296
297fp_getdest:
298 move.l %a0,%a1
299 bfextu %d2{#22,#3},%d0
300 printf PDECODE,",fp%d\n",1,%d0
301 fp_get_fp_reg
302 movem.l %a0/%a1,-(%sp)
303 pea fp_finalrounding
304 bfextu %d2{#25,#7},%d0
305 jmp ([0f:w,%pc,%d0*4])
306
307 .align 4
3080:
309 .long fp_fmove_mem2fp, fp_fint, fp_fsinh, fp_fintrz
310 .long fp_fsqrt, fp_ill, fp_flognp1, fp_ill
311 .long fp_fetoxm1, fp_ftanh, fp_fatan, fp_ill
312 .long fp_fasin, fp_fatanh, fp_fsin, fp_ftan
313 .long fp_fetox, fp_ftwotox, fp_ftentox, fp_ill
314 .long fp_flogn, fp_flog10, fp_flog2, fp_ill
315 .long fp_fabs, fp_fcosh, fp_fneg, fp_ill
316 .long fp_facos, fp_fcos, fp_fgetexp, fp_fgetman
317 .long fp_fdiv, fp_fmod, fp_fadd, fp_fmul
318 .long fpa_fsgldiv, fp_frem, fp_fscale, fpa_fsglmul
319 .long fp_fsub, fp_ill, fp_ill, fp_ill
320 .long fp_ill, fp_ill, fp_ill, fp_ill
321 .long fp_fsincos0, fp_fsincos1, fp_fsincos2, fp_fsincos3
322 .long fp_fsincos4, fp_fsincos5, fp_fsincos6, fp_fsincos7
323 .long fp_fcmp, fp_ill, fp_ftst, fp_ill
324 .long fp_ill, fp_ill, fp_ill, fp_ill
325 .long fp_fsmove, fp_fssqrt, fp_ill, fp_ill
326 .long fp_fdmove, fp_fdsqrt, fp_ill, fp_ill
327 .long fp_ill, fp_ill, fp_ill, fp_ill
328 .long fp_ill, fp_ill, fp_ill, fp_ill
329 .long fp_ill, fp_ill, fp_ill, fp_ill
330 .long fp_ill, fp_ill, fp_ill, fp_ill
331 .long fp_fsabs, fp_ill, fp_fsneg, fp_ill
332 .long fp_fdabs, fp_ill, fp_fdneg, fp_ill
333 .long fp_fsdiv, fp_ill, fp_fsadd, fp_fsmul
334 .long fp_fddiv, fp_ill, fp_fdadd, fp_fdmul
335 .long fp_fssub, fp_ill, fp_ill, fp_ill
336 .long fp_fdsub, fp_ill, fp_ill, fp_ill
337 .long fp_ill, fp_ill, fp_ill, fp_ill
338 .long fp_ill, fp_ill, fp_ill, fp_ill
339 .long fp_ill, fp_ill, fp_ill, fp_ill
340 .long fp_ill, fp_ill, fp_ill, fp_ill
341
342 | Instructions follow
343
344 | Move an (emulated) ROM constant
345fp_fmovecr:
346 bfextu %d2{#27,#5},%d0
347 printf PINSTR,"fp_fmovecr #%d",1,%d0
348 move.l %d0,%d1
349 add.l %d0,%d0
350 add.l %d1,%d0
351 lea (fp_constants,%d0*4),%a0
352 move.l #0x801cc0ff,%d0
353 addq.l #1,%d1
354 lsl.l %d1,%d0
355 jcc 1f
356 fp_set_sr FPSR_EXC_INEX2 | INEX2 exception
3571: moveq #-128,%d0 | continue with fmove
358 and.l %d0,%d2
359 jra fp_getdest
360
361 .data
362 .align 4
363fp_constants:
364 .long 0x00004000,0xc90fdaa2,0x2168c235 | pi
365 .extend 0,0,0,0,0,0,0,0,0,0
366 .long 0x00003ffd,0x9a209a84,0xfbcff798 | log10(2)
367 .long 0x00004000,0xadf85458,0xa2bb4a9a | e
368 .long 0x00003fff,0xb8aa3b29,0x5c17f0bc | log2(e)
369 .long 0x00003ffd,0xde5bd8a9,0x37287195 | log10(e)
370 .long 0x00000000,0x00000000,0x00000000 | 0.0
371 .long 0x00003ffe,0xb17217f7,0xd1cf79ac | 1n(2)
372 .long 0x00004000,0x935d8ddd,0xaaa8ac17 | 1n(10)
373 | read this as "1.0 * 2^0" - note the high bit in the mantissa
374 .long 0x00003fff,0x80000000,0x00000000 | 10^0
375 .long 0x00004002,0xa0000000,0x00000000 | 10^1
376 .long 0x00004005,0xc8000000,0x00000000 | 10^2
377 .long 0x0000400c,0x9c400000,0x00000000 | 10^4
378 .long 0x00004019,0xbebc2000,0x00000000 | 10^8
379 .long 0x00004034,0x8e1bc9bf,0x04000000 | 10^16
380 .long 0x00004069,0x9dc5ada8,0x2b70b59e | 10^32
381 .long 0x000040d3,0xc2781f49,0xffcfa6d5 | 10^64
382 .long 0x000041a8,0x93ba47c9,0x80e98ce0 | 10^128
383 .long 0x00004351,0xaa7eebfb,0x9df9de8e | 10^256
384 .long 0x000046a3,0xe319a0ae,0xa60e91c7 | 10^512
385 .long 0x00004d48,0xc9767586,0x81750c17 | 10^1024
386 .long 0x00005a92,0x9e8b3b5d,0xc53d5de5 | 10^2048
387 .long 0x00007525,0xc4605202,0x8a20979b | 10^4096
388 .previous
389
390fp_fmove_mem2fp:
391 printf PINSTR,"fmove %p,%p\n",2,%a0,%a1
392 move.l (%a1)+,(%a0)+
393 move.l (%a1)+,(%a0)+
394 move.l (%a1),(%a0)
395 subq.l #8,%a0
396 rts
397
398fpa_fsglmul:
399 move.l #fp_finalrounding_single_fast,(%sp)
400 jra fp_fsglmul
401
402fpa_fsgldiv:
403 move.l #fp_finalrounding_single_fast,(%sp)
404 jra fp_fsgldiv
405
406.macro fp_dosingleprec instr
407 printf PINSTR,"single "
408 move.l #fp_finalrounding_single,(%sp)
409 jra \instr
410.endm
411
412.macro fp_dodoubleprec instr
413 printf PINSTR,"double "
414 move.l #fp_finalrounding_double,(%sp)
415 jra \instr
416.endm
417
418fp_fsmove:
419 fp_dosingleprec fp_fmove_mem2fp
420
421fp_fssqrt:
422 fp_dosingleprec fp_fsqrt
423
424fp_fdmove:
425 fp_dodoubleprec fp_fmove_mem2fp
426
427fp_fdsqrt:
428 fp_dodoubleprec fp_fsqrt
429
430fp_fsabs:
431 fp_dosingleprec fp_fabs
432
433fp_fsneg:
434 fp_dosingleprec fp_fneg
435
436fp_fdabs:
437 fp_dodoubleprec fp_fabs
438
439fp_fdneg:
440 fp_dodoubleprec fp_fneg
441
442fp_fsdiv:
443 fp_dosingleprec fp_fdiv
444
445fp_fsadd:
446 fp_dosingleprec fp_fadd
447
448fp_fsmul:
449 fp_dosingleprec fp_fmul
450
451fp_fddiv:
452 fp_dodoubleprec fp_fdiv
453
454fp_fdadd:
455 fp_dodoubleprec fp_fadd
456
457fp_fdmul:
458 fp_dodoubleprec fp_fmul
459
460fp_fssub:
461 fp_dosingleprec fp_fsub
462
463fp_fdsub:
464 fp_dodoubleprec fp_fsub
465
466fp_nonstd:
467 fp_get_pc %a0
468 getuser.l (%a0),%d0,fp_err_ua1,%a0
469 printf ,"nonstd ((%08x)=%08x)\n",2,%a0,%d0
470 moveq #-1,%d0
471 rts
472
473 .data
474 .align 4
475
476 | data sizes corresponding to the operand formats
477fp_datasize:
478 .word 4, 4, 12, 12, 2, 8, 1, 0
diff --git a/arch/m68k/math-emu/fp_trig.c b/arch/m68k/math-emu/fp_trig.c
new file mode 100644
index 000000000000..6361d0784df2
--- /dev/null
+++ b/arch/m68k/math-emu/fp_trig.c
@@ -0,0 +1,183 @@
1/*
2
3 fp_trig.c: floating-point math routines for the Linux-m68k
4 floating point emulator.
5
6 Copyright (c) 1998-1999 David Huggins-Daines / Roman Zippel.
7
8 I hereby give permission, free of charge, to copy, modify, and
9 redistribute this software, in source or binary form, provided that
10 the above copyright notice and the following disclaimer are included
11 in all such copies.
12
13 THIS SOFTWARE IS PROVIDED "AS IS", WITH ABSOLUTELY NO WARRANTY, REAL
14 OR IMPLIED.
15
16*/
17
18#include "fp_emu.h"
19#include "fp_trig.h"
20
21struct fp_ext *
22fp_fsin(struct fp_ext *dest, struct fp_ext *src)
23{
24 uprint("fsin\n");
25
26 fp_monadic_check(dest, src);
27
28 return dest;
29}
30
31struct fp_ext *
32fp_fcos(struct fp_ext *dest, struct fp_ext *src)
33{
34 uprint("fcos\n");
35
36 fp_monadic_check(dest, src);
37
38 return dest;
39}
40
41struct fp_ext *
42fp_ftan(struct fp_ext *dest, struct fp_ext *src)
43{
44 uprint("ftan\n");
45
46 fp_monadic_check(dest, src);
47
48 return dest;
49}
50
51struct fp_ext *
52fp_fasin(struct fp_ext *dest, struct fp_ext *src)
53{
54 uprint("fasin\n");
55
56 fp_monadic_check(dest, src);
57
58 return dest;
59}
60
61struct fp_ext *
62fp_facos(struct fp_ext *dest, struct fp_ext *src)
63{
64 uprint("facos\n");
65
66 fp_monadic_check(dest, src);
67
68 return dest;
69}
70
71struct fp_ext *
72fp_fatan(struct fp_ext *dest, struct fp_ext *src)
73{
74 uprint("fatan\n");
75
76 fp_monadic_check(dest, src);
77
78 return dest;
79}
80
81struct fp_ext *
82fp_fsinh(struct fp_ext *dest, struct fp_ext *src)
83{
84 uprint("fsinh\n");
85
86 fp_monadic_check(dest, src);
87
88 return dest;
89}
90
91struct fp_ext *
92fp_fcosh(struct fp_ext *dest, struct fp_ext *src)
93{
94 uprint("fcosh\n");
95
96 fp_monadic_check(dest, src);
97
98 return dest;
99}
100
101struct fp_ext *
102fp_ftanh(struct fp_ext *dest, struct fp_ext *src)
103{
104 uprint("ftanh\n");
105
106 fp_monadic_check(dest, src);
107
108 return dest;
109}
110
111struct fp_ext *
112fp_fatanh(struct fp_ext *dest, struct fp_ext *src)
113{
114 uprint("fatanh\n");
115
116 fp_monadic_check(dest, src);
117
118 return dest;
119}
120
121struct fp_ext *
122fp_fsincos0(struct fp_ext *dest, struct fp_ext *src)
123{
124 uprint("fsincos0\n");
125
126 return dest;
127}
128
129struct fp_ext *
130fp_fsincos1(struct fp_ext *dest, struct fp_ext *src)
131{
132 uprint("fsincos1\n");
133
134 return dest;
135}
136
137struct fp_ext *
138fp_fsincos2(struct fp_ext *dest, struct fp_ext *src)
139{
140 uprint("fsincos2\n");
141
142 return dest;
143}
144
145struct fp_ext *
146fp_fsincos3(struct fp_ext *dest, struct fp_ext *src)
147{
148 uprint("fsincos3\n");
149
150 return dest;
151}
152
153struct fp_ext *
154fp_fsincos4(struct fp_ext *dest, struct fp_ext *src)
155{
156 uprint("fsincos4\n");
157
158 return dest;
159}
160
161struct fp_ext *
162fp_fsincos5(struct fp_ext *dest, struct fp_ext *src)
163{
164 uprint("fsincos5\n");
165
166 return dest;
167}
168
169struct fp_ext *
170fp_fsincos6(struct fp_ext *dest, struct fp_ext *src)
171{
172 uprint("fsincos6\n");
173
174 return dest;
175}
176
177struct fp_ext *
178fp_fsincos7(struct fp_ext *dest, struct fp_ext *src)
179{
180 uprint("fsincos7\n");
181
182 return dest;
183}
diff --git a/arch/m68k/math-emu/fp_trig.h b/arch/m68k/math-emu/fp_trig.h
new file mode 100644
index 000000000000..af8b247e9c98
--- /dev/null
+++ b/arch/m68k/math-emu/fp_trig.h
@@ -0,0 +1,32 @@
1/*
2
3 fp_trig.h: floating-point math routines for the Linux-m68k
4 floating point emulator.
5
6 Copyright (c) 1998 David Huggins-Daines.
7
8 I hereby give permission, free of charge, to copy, modify, and
9 redistribute this software, in source or binary form, provided that
10 the above copyright notice and the following disclaimer are included
11 in all such copies.
12
13 THIS SOFTWARE IS PROVIDED "AS IS", WITH ABSOLUTELY NO WARRANTY, REAL
14 OR IMPLIED.
15
16*/
17
18#ifndef FP_TRIG_H
19#define FP_TRIG_H
20
21#include "fp_emu.h"
22
23/* floating point trigonometric instructions:
24
25 the arguments to these are in the "internal" extended format, that
26 is, an "exploded" version of the 96-bit extended fp format used by
27 the 68881.
28
29 they return a status code, which should end up in %d0, if all goes
30 well. */
31
32#endif /* FP_TRIG__H */
diff --git a/arch/m68k/math-emu/fp_util.S b/arch/m68k/math-emu/fp_util.S
new file mode 100644
index 000000000000..a9f7f0129067
--- /dev/null
+++ b/arch/m68k/math-emu/fp_util.S
@@ -0,0 +1,1455 @@
1/*
2 * fp_util.S
3 *
4 * Copyright Roman Zippel, 1997. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, and the entire permission notice in its entirety,
11 * including the disclaimer of warranties.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote
16 * products derived from this software without specific prior
17 * written permission.
18 *
19 * ALTERNATIVELY, this product may be distributed under the terms of
20 * the GNU General Public License, in which case the provisions of the GPL are
21 * required INSTEAD OF the above restrictions. (This clause is
22 * necessary due to a potential bad interaction between the GPL and
23 * the restrictions contained in a BSD-style copyright.)
24 *
25 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
29 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
33 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
35 * OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <linux/config.h>
39#include "fp_emu.h"
40
41/*
42 * Here are lots of conversion and normalization functions mainly
43 * used by fp_scan.S
44 * Note that these functions are optimized for "normal" numbers,
45 * these are handled first and exit as fast as possible, this is
46 * especially important for fp_normalize_ext/fp_conv_ext2ext, as
47 * it's called very often.
48 * The register usage is optimized for fp_scan.S and which register
49 * is currently at that time unused, be careful if you want change
50 * something here. %d0 and %d1 is always usable, sometimes %d2 (or
51 * only the lower half) most function have to return the %a0
52 * unmodified, so that the caller can immediately reuse it.
53 */
54
55 .globl fp_ill, fp_end
56
57 | exits from fp_scan:
58 | illegal instruction
59fp_ill:
60 printf ,"fp_illegal\n"
61 rts
62 | completed instruction
63fp_end:
64 tst.l (TASK_MM-8,%a2)
65 jmi 1f
66 tst.l (TASK_MM-4,%a2)
67 jmi 1f
68 tst.l (TASK_MM,%a2)
69 jpl 2f
701: printf ,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
712: clr.l %d0
72 rts
73
74 .globl fp_conv_long2ext, fp_conv_single2ext
75 .globl fp_conv_double2ext, fp_conv_ext2ext
76 .globl fp_normalize_ext, fp_normalize_double
77 .globl fp_normalize_single, fp_normalize_single_fast
78 .globl fp_conv_ext2double, fp_conv_ext2single
79 .globl fp_conv_ext2long, fp_conv_ext2short
80 .globl fp_conv_ext2byte
81 .globl fp_finalrounding_single, fp_finalrounding_single_fast
82 .globl fp_finalrounding_double
83 .globl fp_finalrounding, fp_finaltest, fp_final
84
85/*
86 * First several conversion functions from a source operand
87 * into the extended format. Note, that only fp_conv_ext2ext
88 * normalizes the number and is always called after the other
89 * conversion functions, which only move the information into
90 * fp_ext structure.
91 */
92
93 | fp_conv_long2ext:
94 |
95 | args: %d0 = source (32-bit long)
96 | %a0 = destination (ptr to struct fp_ext)
97
98fp_conv_long2ext:
99 printf PCONV,"l2e: %p -> %p(",2,%d0,%a0
100 clr.l %d1 | sign defaults to zero
101 tst.l %d0
102 jeq fp_l2e_zero | is source zero?
103 jpl 1f | positive?
104 moveq #1,%d1
105 neg.l %d0
1061: swap %d1
107 move.w #0x3fff+31,%d1
108 move.l %d1,(%a0)+ | set sign / exp
109 move.l %d0,(%a0)+ | set mantissa
110 clr.l (%a0)
111 subq.l #8,%a0 | restore %a0
112 printx PCONV,%a0@
113 printf PCONV,")\n"
114 rts
115 | source is zero
116fp_l2e_zero:
117 clr.l (%a0)+
118 clr.l (%a0)+
119 clr.l (%a0)
120 subq.l #8,%a0
121 printx PCONV,%a0@
122 printf PCONV,")\n"
123 rts
124
125 | fp_conv_single2ext
126 | args: %d0 = source (single-precision fp value)
127 | %a0 = dest (struct fp_ext *)
128
129fp_conv_single2ext:
130 printf PCONV,"s2e: %p -> %p(",2,%d0,%a0
131 move.l %d0,%d1
132 lsl.l #8,%d0 | shift mantissa
133 lsr.l #8,%d1 | exponent / sign
134 lsr.l #7,%d1
135 lsr.w #8,%d1
136 jeq fp_s2e_small | zero / denormal?
137 cmp.w #0xff,%d1 | NaN / Inf?
138 jeq fp_s2e_large
139 bset #31,%d0 | set explizit bit
140 add.w #0x3fff-0x7f,%d1 | re-bias the exponent.
1419: move.l %d1,(%a0)+ | fp_ext.sign, fp_ext.exp
142 move.l %d0,(%a0)+ | high lword of fp_ext.mant
143 clr.l (%a0) | low lword = 0
144 subq.l #8,%a0
145 printx PCONV,%a0@
146 printf PCONV,")\n"
147 rts
148 | zeros and denormalized
149fp_s2e_small:
150 | exponent is zero, so explizit bit is already zero too
151 tst.l %d0
152 jeq 9b
153 move.w #0x4000-0x7f,%d1
154 jra 9b
155 | infinities and NAN
156fp_s2e_large:
157 bclr #31,%d0 | clear explizit bit
158 move.w #0x7fff,%d1
159 jra 9b
160
161fp_conv_double2ext:
162#ifdef FPU_EMU_DEBUG
163 getuser.l %a1@(0),%d0,fp_err_ua2,%a1
164 getuser.l %a1@(4),%d1,fp_err_ua2,%a1
165 printf PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
166#endif
167 getuser.l (%a1)+,%d0,fp_err_ua2,%a1
168 move.l %d0,%d1
169 lsl.l #8,%d0 | shift high mantissa
170 lsl.l #3,%d0
171 lsr.l #8,%d1 | exponent / sign
172 lsr.l #7,%d1
173 lsr.w #5,%d1
174 jeq fp_d2e_small | zero / denormal?
175 cmp.w #0x7ff,%d1 | NaN / Inf?
176 jeq fp_d2e_large
177 bset #31,%d0 | set explizit bit
178 add.w #0x3fff-0x3ff,%d1 | re-bias the exponent.
1799: move.l %d1,(%a0)+ | fp_ext.sign, fp_ext.exp
180 move.l %d0,(%a0)+
181 getuser.l (%a1)+,%d0,fp_err_ua2,%a1
182 move.l %d0,%d1
183 lsl.l #8,%d0
184 lsl.l #3,%d0
185 move.l %d0,(%a0)
186 moveq #21,%d0
187 lsr.l %d0,%d1
188 or.l %d1,-(%a0)
189 subq.l #4,%a0
190 printx PCONV,%a0@
191 printf PCONV,")\n"
192 rts
193 | zeros and denormalized
194fp_d2e_small:
195 | exponent is zero, so explizit bit is already zero too
196 tst.l %d0
197 jeq 9b
198 move.w #0x4000-0x3ff,%d1
199 jra 9b
200 | infinities and NAN
201fp_d2e_large:
202 bclr #31,%d0 | clear explizit bit
203 move.w #0x7fff,%d1
204 jra 9b
205
206 | fp_conv_ext2ext:
207 | originally used to get longdouble from userspace, now it's
208 | called before arithmetic operations to make sure the number
209 | is normalized [maybe rename it?].
210 | args: %a0 = dest (struct fp_ext *)
211 | returns 0 in %d0 for a NaN, otherwise 1
212
213fp_conv_ext2ext:
214 printf PCONV,"e2e: %p(",1,%a0
215 printx PCONV,%a0@
216 printf PCONV,"), "
217 move.l (%a0)+,%d0
218 cmp.w #0x7fff,%d0 | Inf / NaN?
219 jeq fp_e2e_large
220 move.l (%a0),%d0
221 jpl fp_e2e_small | zero / denorm?
222 | The high bit is set, so normalization is irrelevant.
223fp_e2e_checkround:
224 subq.l #4,%a0
225#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
226 move.b (%a0),%d0
227 jne fp_e2e_round
228#endif
229 printf PCONV,"%p(",1,%a0
230 printx PCONV,%a0@
231 printf PCONV,")\n"
232 moveq #1,%d0
233 rts
234#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
235fp_e2e_round:
236 fp_set_sr FPSR_EXC_INEX2
237 clr.b (%a0)
238 move.w (FPD_RND,FPDATA),%d2
239 jne fp_e2e_roundother | %d2 == 0, round to nearest
240 tst.b %d0 | test guard bit
241 jpl 9f | zero is closer
242 btst #0,(11,%a0) | test lsb bit
243 jne fp_e2e_doroundup | round to infinity
244 lsl.b #1,%d0 | check low bits
245 jeq 9f | round to zero
246fp_e2e_doroundup:
247 addq.l #1,(8,%a0)
248 jcc 9f
249 addq.l #1,(4,%a0)
250 jcc 9f
251 move.w #0x8000,(4,%a0)
252 addq.w #1,(2,%a0)
2539: printf PNORM,"%p(",1,%a0
254 printx PNORM,%a0@
255 printf PNORM,")\n"
256 rts
257fp_e2e_roundother:
258 subq.w #2,%d2
259 jcs 9b | %d2 < 2, round to zero
260 jhi 1f | %d2 > 2, round to +infinity
261 tst.b (1,%a0) | to -inf
262 jne fp_e2e_doroundup | negative, round to infinity
263 jra 9b | positive, round to zero
2641: tst.b (1,%a0) | to +inf
265 jeq fp_e2e_doroundup | positive, round to infinity
266 jra 9b | negative, round to zero
267#endif
268 | zeros and subnormals:
269 | try to normalize these anyway.
270fp_e2e_small:
271 jne fp_e2e_small1 | high lword zero?
272 move.l (4,%a0),%d0
273 jne fp_e2e_small2
274#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
275 clr.l %d0
276 move.b (-4,%a0),%d0
277 jne fp_e2e_small3
278#endif
279 | Genuine zero.
280 clr.w -(%a0)
281 subq.l #2,%a0
282 printf PNORM,"%p(",1,%a0
283 printx PNORM,%a0@
284 printf PNORM,")\n"
285 moveq #1,%d0
286 rts
287 | definitely subnormal, need to shift all 64 bits
288fp_e2e_small1:
289 bfffo %d0{#0,#32},%d1
290 move.w -(%a0),%d2
291 sub.w %d1,%d2
292 jcc 1f
293 | Pathologically small, denormalize.
294 add.w %d2,%d1
295 clr.w %d2
2961: move.w %d2,(%a0)+
297 move.w %d1,%d2
298 jeq fp_e2e_checkround
299 | fancy 64-bit double-shift begins here
300 lsl.l %d2,%d0
301 move.l %d0,(%a0)+
302 move.l (%a0),%d0
303 move.l %d0,%d1
304 lsl.l %d2,%d0
305 move.l %d0,(%a0)
306 neg.w %d2
307 and.w #0x1f,%d2
308 lsr.l %d2,%d1
309 or.l %d1,-(%a0)
310#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
311fp_e2e_extra1:
312 clr.l %d0
313 move.b (-4,%a0),%d0
314 neg.w %d2
315 add.w #24,%d2
316 jcc 1f
317 clr.b (-4,%a0)
318 lsl.l %d2,%d0
319 or.l %d0,(4,%a0)
320 jra fp_e2e_checkround
3211: addq.w #8,%d2
322 lsl.l %d2,%d0
323 move.b %d0,(-4,%a0)
324 lsr.l #8,%d0
325 or.l %d0,(4,%a0)
326#endif
327 jra fp_e2e_checkround
328 | pathologically small subnormal
329fp_e2e_small2:
330 bfffo %d0{#0,#32},%d1
331 add.w #32,%d1
332 move.w -(%a0),%d2
333 sub.w %d1,%d2
334 jcc 1f
335 | Beyond pathologically small, denormalize.
336 add.w %d2,%d1
337 clr.w %d2
3381: move.w %d2,(%a0)+
339 ext.l %d1
340 jeq fp_e2e_checkround
341 clr.l (4,%a0)
342 sub.w #32,%d2
343 jcs 1f
344 lsl.l %d1,%d0 | lower lword needs only to be shifted
345 move.l %d0,(%a0) | into the higher lword
346#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
347 clr.l %d0
348 move.b (-4,%a0),%d0
349 clr.b (-4,%a0)
350 neg.w %d1
351 add.w #32,%d1
352 bfins %d0,(%a0){%d1,#8}
353#endif
354 jra fp_e2e_checkround
3551: neg.w %d1 | lower lword is splitted between
356 bfins %d0,(%a0){%d1,#32} | higher and lower lword
357#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
358 jra fp_e2e_checkround
359#else
360 move.w %d1,%d2
361 jra fp_e2e_extra1
362 | These are extremely small numbers, that will mostly end up as zero
363 | anyway, so this is only important for correct rounding.
364fp_e2e_small3:
365 bfffo %d0{#24,#8},%d1
366 add.w #40,%d1
367 move.w -(%a0),%d2
368 sub.w %d1,%d2
369 jcc 1f
370 | Pathologically small, denormalize.
371 add.w %d2,%d1
372 clr.w %d2
3731: move.w %d2,(%a0)+
374 ext.l %d1
375 jeq fp_e2e_checkround
376 cmp.w #8,%d1
377 jcs 2f
3781: clr.b (-4,%a0)
379 sub.w #64,%d1
380 jcs 1f
381 add.w #24,%d1
382 lsl.l %d1,%d0
383 move.l %d0,(%a0)
384 jra fp_e2e_checkround
3851: neg.w %d1
386 bfins %d0,(%a0){%d1,#8}
387 jra fp_e2e_checkround
3882: lsl.l %d1,%d0
389 move.b %d0,(-4,%a0)
390 lsr.l #8,%d0
391 move.b %d0,(7,%a0)
392 jra fp_e2e_checkround
393#endif
3941: move.l %d0,%d1 | lower lword is splitted between
395 lsl.l %d2,%d0 | higher and lower lword
396 move.l %d0,(%a0)
397 move.l %d1,%d0
398 neg.w %d2
399 add.w #32,%d2
400 lsr.l %d2,%d0
401 move.l %d0,-(%a0)
402 jra fp_e2e_checkround
403 | Infinities and NaNs
404fp_e2e_large:
405 move.l (%a0)+,%d0
406 jne 3f
4071: tst.l (%a0)
408 jne 4f
409 moveq #1,%d0
4102: subq.l #8,%a0
411 printf PCONV,"%p(",1,%a0
412 printx PCONV,%a0@
413 printf PCONV,")\n"
414 rts
415 | we have maybe a NaN, shift off the highest bit
4163: lsl.l #1,%d0
417 jeq 1b
418 | we have a NaN, clear the return value
4194: clrl %d0
420 jra 2b
421
422
423/*
424 * Normalization functions. Call these on the output of general
425 * FP operators, and before any conversion into the destination
426 * formats. fp_normalize_ext has always to be called first, the
427 * following conversion functions expect an already normalized
428 * number.
429 */
430
431 | fp_normalize_ext:
432 | normalize an extended in extended (unpacked) format, basically
433 | it does the same as fp_conv_ext2ext, additionally it also does
434 | the necessary postprocessing checks.
435 | args: %a0 (struct fp_ext *)
436 | NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
437
438fp_normalize_ext:
439 printf PNORM,"ne: %p(",1,%a0
440 printx PNORM,%a0@
441 printf PNORM,"), "
442 move.l (%a0)+,%d0
443 cmp.w #0x7fff,%d0 | Inf / NaN?
444 jeq fp_ne_large
445 move.l (%a0),%d0
446 jpl fp_ne_small | zero / denorm?
447 | The high bit is set, so normalization is irrelevant.
448fp_ne_checkround:
449 subq.l #4,%a0
450#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
451 move.b (%a0),%d0
452 jne fp_ne_round
453#endif
454 printf PNORM,"%p(",1,%a0
455 printx PNORM,%a0@
456 printf PNORM,")\n"
457 rts
458#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
459fp_ne_round:
460 fp_set_sr FPSR_EXC_INEX2
461 clr.b (%a0)
462 move.w (FPD_RND,FPDATA),%d2
463 jne fp_ne_roundother | %d2 == 0, round to nearest
464 tst.b %d0 | test guard bit
465 jpl 9f | zero is closer
466 btst #0,(11,%a0) | test lsb bit
467 jne fp_ne_doroundup | round to infinity
468 lsl.b #1,%d0 | check low bits
469 jeq 9f | round to zero
470fp_ne_doroundup:
471 addq.l #1,(8,%a0)
472 jcc 9f
473 addq.l #1,(4,%a0)
474 jcc 9f
475 addq.w #1,(2,%a0)
476 move.w #0x8000,(4,%a0)
4779: printf PNORM,"%p(",1,%a0
478 printx PNORM,%a0@
479 printf PNORM,")\n"
480 rts
481fp_ne_roundother:
482 subq.w #2,%d2
483 jcs 9b | %d2 < 2, round to zero
484 jhi 1f | %d2 > 2, round to +infinity
485 tst.b (1,%a0) | to -inf
486 jne fp_ne_doroundup | negative, round to infinity
487 jra 9b | positive, round to zero
4881: tst.b (1,%a0) | to +inf
489 jeq fp_ne_doroundup | positive, round to infinity
490 jra 9b | negative, round to zero
491#endif
492 | Zeros and subnormal numbers
493 | These are probably merely subnormal, rather than "denormalized"
494 | numbers, so we will try to make them normal again.
495fp_ne_small:
496 jne fp_ne_small1 | high lword zero?
497 move.l (4,%a0),%d0
498 jne fp_ne_small2
499#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
500 clr.l %d0
501 move.b (-4,%a0),%d0
502 jne fp_ne_small3
503#endif
504 | Genuine zero.
505 clr.w -(%a0)
506 subq.l #2,%a0
507 printf PNORM,"%p(",1,%a0
508 printx PNORM,%a0@
509 printf PNORM,")\n"
510 rts
511 | Subnormal.
512fp_ne_small1:
513 bfffo %d0{#0,#32},%d1
514 move.w -(%a0),%d2
515 sub.w %d1,%d2
516 jcc 1f
517 | Pathologically small, denormalize.
518 add.w %d2,%d1
519 clr.w %d2
520 fp_set_sr FPSR_EXC_UNFL
5211: move.w %d2,(%a0)+
522 move.w %d1,%d2
523 jeq fp_ne_checkround
524 | This is exactly the same 64-bit double shift as seen above.
525 lsl.l %d2,%d0
526 move.l %d0,(%a0)+
527 move.l (%a0),%d0
528 move.l %d0,%d1
529 lsl.l %d2,%d0
530 move.l %d0,(%a0)
531 neg.w %d2
532 and.w #0x1f,%d2
533 lsr.l %d2,%d1
534 or.l %d1,-(%a0)
535#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
536fp_ne_extra1:
537 clr.l %d0
538 move.b (-4,%a0),%d0
539 neg.w %d2
540 add.w #24,%d2
541 jcc 1f
542 clr.b (-4,%a0)
543 lsl.l %d2,%d0
544 or.l %d0,(4,%a0)
545 jra fp_ne_checkround
5461: addq.w #8,%d2
547 lsl.l %d2,%d0
548 move.b %d0,(-4,%a0)
549 lsr.l #8,%d0
550 or.l %d0,(4,%a0)
551#endif
552 jra fp_ne_checkround
553 | May or may not be subnormal, if so, only 32 bits to shift.
554fp_ne_small2:
555 bfffo %d0{#0,#32},%d1
556 add.w #32,%d1
557 move.w -(%a0),%d2
558 sub.w %d1,%d2
559 jcc 1f
560 | Beyond pathologically small, denormalize.
561 add.w %d2,%d1
562 clr.w %d2
563 fp_set_sr FPSR_EXC_UNFL
5641: move.w %d2,(%a0)+
565 ext.l %d1
566 jeq fp_ne_checkround
567 clr.l (4,%a0)
568 sub.w #32,%d1
569 jcs 1f
570 lsl.l %d1,%d0 | lower lword needs only to be shifted
571 move.l %d0,(%a0) | into the higher lword
572#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
573 clr.l %d0
574 move.b (-4,%a0),%d0
575 clr.b (-4,%a0)
576 neg.w %d1
577 add.w #32,%d1
578 bfins %d0,(%a0){%d1,#8}
579#endif
580 jra fp_ne_checkround
5811: neg.w %d1 | lower lword is splitted between
582 bfins %d0,(%a0){%d1,#32} | higher and lower lword
583#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
584 jra fp_ne_checkround
585#else
586 move.w %d1,%d2
587 jra fp_ne_extra1
588 | These are extremely small numbers, that will mostly end up as zero
589 | anyway, so this is only important for correct rounding.
590fp_ne_small3:
591 bfffo %d0{#24,#8},%d1
592 add.w #40,%d1
593 move.w -(%a0),%d2
594 sub.w %d1,%d2
595 jcc 1f
596 | Pathologically small, denormalize.
597 add.w %d2,%d1
598 clr.w %d2
5991: move.w %d2,(%a0)+
600 ext.l %d1
601 jeq fp_ne_checkround
602 cmp.w #8,%d1
603 jcs 2f
6041: clr.b (-4,%a0)
605 sub.w #64,%d1
606 jcs 1f
607 add.w #24,%d1
608 lsl.l %d1,%d0
609 move.l %d0,(%a0)
610 jra fp_ne_checkround
6111: neg.w %d1
612 bfins %d0,(%a0){%d1,#8}
613 jra fp_ne_checkround
6142: lsl.l %d1,%d0
615 move.b %d0,(-4,%a0)
616 lsr.l #8,%d0
617 move.b %d0,(7,%a0)
618 jra fp_ne_checkround
619#endif
620 | Infinities and NaNs, again, same as above.
621fp_ne_large:
622 move.l (%a0)+,%d0
623 jne 3f
6241: tst.l (%a0)
625 jne 4f
6262: subq.l #8,%a0
627 printf PNORM,"%p(",1,%a0
628 printx PNORM,%a0@
629 printf PNORM,")\n"
630 rts
631 | we have maybe a NaN, shift off the highest bit
6323: move.l %d0,%d1
633 lsl.l #1,%d1
634 jne 4f
635 clr.l (-4,%a0)
636 jra 1b
637 | we have a NaN, test if it is signaling
6384: bset #30,%d0
639 jne 2b
640 fp_set_sr FPSR_EXC_SNAN
641 move.l %d0,(-4,%a0)
642 jra 2b
643
644 | these next two do rounding as per the IEEE standard.
645 | values for the rounding modes appear to be:
646 | 0: Round to nearest
647 | 1: Round to zero
648 | 2: Round to -Infinity
649 | 3: Round to +Infinity
650 | both functions expect that fp_normalize was already
651 | called (and extended argument is already normalized
652 | as far as possible), these are used if there is different
653 | rounding precision is selected and before converting
654 | into single/double
655
656 | fp_normalize_double:
657 | normalize an extended with double (52-bit) precision
658 | args: %a0 (struct fp_ext *)
659
660fp_normalize_double:
661 printf PNORM,"nd: %p(",1,%a0
662 printx PNORM,%a0@
663 printf PNORM,"), "
664 move.l (%a0)+,%d2
665 tst.w %d2
666 jeq fp_nd_zero | zero / denormalized
667 cmp.w #0x7fff,%d2
668 jeq fp_nd_huge | NaN / infinitive.
669 sub.w #0x4000-0x3ff,%d2 | will the exponent fit?
670 jcs fp_nd_small | too small.
671 cmp.w #0x7fe,%d2
672 jcc fp_nd_large | too big.
673 addq.l #4,%a0
674 move.l (%a0),%d0 | low lword of mantissa
675 | now, round off the low 11 bits.
676fp_nd_round:
677 moveq #21,%d1
678 lsl.l %d1,%d0 | keep 11 low bits.
679 jne fp_nd_checkround | Are they non-zero?
680 | nothing to do here
6819: subq.l #8,%a0
682 printf PNORM,"%p(",1,%a0
683 printx PNORM,%a0@
684 printf PNORM,")\n"
685 rts
686 | Be careful with the X bit! It contains the lsb
687 | from the shift above, it is needed for round to nearest.
688fp_nd_checkround:
689 fp_set_sr FPSR_EXC_INEX2 | INEX2 bit
690 and.w #0xf800,(2,%a0) | clear bits 0-10
691 move.w (FPD_RND,FPDATA),%d2 | rounding mode
692 jne 2f | %d2 == 0, round to nearest
693 tst.l %d0 | test guard bit
694 jpl 9b | zero is closer
695 | here we test the X bit by adding it to %d2
696 clr.w %d2 | first set z bit, addx only clears it
697 addx.w %d2,%d2 | test lsb bit
698 | IEEE754-specified "round to even" behaviour. If the guard
699 | bit is set, then the number is odd, so rounding works like
700 | in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
701 | Otherwise, an equal distance rounds towards zero, so as not
702 | to produce an odd number. This is strange, but it is what
703 | the standard says.
704 jne fp_nd_doroundup | round to infinity
705 lsl.l #1,%d0 | check low bits
706 jeq 9b | round to zero
707fp_nd_doroundup:
708 | round (the mantissa, that is) towards infinity
709 add.l #0x800,(%a0)
710 jcc 9b | no overflow, good.
711 addq.l #1,-(%a0) | extend to high lword
712 jcc 1f | no overflow, good.
713 | Yow! we have managed to overflow the mantissa. Since this
714 | only happens when %d1 was 0xfffff800, it is now zero, so
715 | reset the high bit, and increment the exponent.
716 move.w #0x8000,(%a0)
717 addq.w #1,-(%a0)
718 cmp.w #0x43ff,(%a0)+ | exponent now overflown?
719 jeq fp_nd_large | yes, so make it infinity.
7201: subq.l #4,%a0
721 printf PNORM,"%p(",1,%a0
722 printx PNORM,%a0@
723 printf PNORM,")\n"
724 rts
7252: subq.w #2,%d2
726 jcs 9b | %d2 < 2, round to zero
727 jhi 3f | %d2 > 2, round to +infinity
728 | Round to +Inf or -Inf. High word of %d2 contains the
729 | sign of the number, by the way.
730 swap %d2 | to -inf
731 tst.b %d2
732 jne fp_nd_doroundup | negative, round to infinity
733 jra 9b | positive, round to zero
7343: swap %d2 | to +inf
735 tst.b %d2
736 jeq fp_nd_doroundup | positive, round to infinity
737 jra 9b | negative, round to zero
738 | Exponent underflow. Try to make a denormal, and set it to
739 | the smallest possible fraction if this fails.
740fp_nd_small:
741 fp_set_sr FPSR_EXC_UNFL | set UNFL bit
742 move.w #0x3c01,(-2,%a0) | 2**-1022
743 neg.w %d2 | degree of underflow
744 cmp.w #32,%d2 | single or double shift?
745 jcc 1f
746 | Again, another 64-bit double shift.
747 move.l (%a0),%d0
748 move.l %d0,%d1
749 lsr.l %d2,%d0
750 move.l %d0,(%a0)+
751 move.l (%a0),%d0
752 lsr.l %d2,%d0
753 neg.w %d2
754 add.w #32,%d2
755 lsl.l %d2,%d1
756 or.l %d1,%d0
757 move.l (%a0),%d1
758 move.l %d0,(%a0)
759 | Check to see if we shifted off any significant bits
760 lsl.l %d2,%d1
761 jeq fp_nd_round | Nope, round.
762 bset #0,%d0 | Yes, so set the "sticky bit".
763 jra fp_nd_round | Now, round.
764 | Another 64-bit single shift and store
7651: sub.w #32,%d2
766 cmp.w #32,%d2 | Do we really need to shift?
767 jcc 2f | No, the number is too small.
768 move.l (%a0),%d0
769 clr.l (%a0)+
770 move.l %d0,%d1
771 lsr.l %d2,%d0
772 neg.w %d2
773 add.w #32,%d2
774 | Again, check to see if we shifted off any significant bits.
775 tst.l (%a0)
776 jeq 1f
777 bset #0,%d0 | Sticky bit.
7781: move.l %d0,(%a0)
779 lsl.l %d2,%d1
780 jeq fp_nd_round
781 bset #0,%d0
782 jra fp_nd_round
783 | Sorry, the number is just too small.
7842: clr.l (%a0)+
785 clr.l (%a0)
786 moveq #1,%d0 | Smallest possible fraction,
787 jra fp_nd_round | round as desired.
788 | zero and denormalized
789fp_nd_zero:
790 tst.l (%a0)+
791 jne 1f
792 tst.l (%a0)
793 jne 1f
794 subq.l #8,%a0
795 printf PNORM,"%p(",1,%a0
796 printx PNORM,%a0@
797 printf PNORM,")\n"
798 rts | zero. nothing to do.
799 | These are not merely subnormal numbers, but true denormals,
800 | i.e. pathologically small (exponent is 2**-16383) numbers.
801 | It is clearly impossible for even a normal extended number
802 | with that exponent to fit into double precision, so just
803 | write these ones off as "too darn small".
8041: fp_set_sr FPSR_EXC_UNFL | Set UNFL bit
805 clr.l (%a0)
806 clr.l -(%a0)
807 move.w #0x3c01,-(%a0) | i.e. 2**-1022
808 addq.l #6,%a0
809 moveq #1,%d0
810 jra fp_nd_round | round.
811 | Exponent overflow. Just call it infinity.
812fp_nd_large:
813 move.w #0x7ff,%d0
814 and.w (6,%a0),%d0
815 jeq 1f
816 fp_set_sr FPSR_EXC_INEX2
8171: fp_set_sr FPSR_EXC_OVFL
818 move.w (FPD_RND,FPDATA),%d2
819 jne 3f | %d2 = 0 round to nearest
8201: move.w #0x7fff,(-2,%a0)
821 clr.l (%a0)+
822 clr.l (%a0)
8232: subq.l #8,%a0
824 printf PNORM,"%p(",1,%a0
825 printx PNORM,%a0@
826 printf PNORM,")\n"
827 rts
8283: subq.w #2,%d2
829 jcs 5f | %d2 < 2, round to zero
830 jhi 4f | %d2 > 2, round to +infinity
831 tst.b (-3,%a0) | to -inf
832 jne 1b
833 jra 5f
8344: tst.b (-3,%a0) | to +inf
835 jeq 1b
8365: move.w #0x43fe,(-2,%a0)
837 moveq #-1,%d0
838 move.l %d0,(%a0)+
839 move.w #0xf800,%d0
840 move.l %d0,(%a0)
841 jra 2b
842 | Infinities or NaNs
843fp_nd_huge:
844 subq.l #4,%a0
845 printf PNORM,"%p(",1,%a0
846 printx PNORM,%a0@
847 printf PNORM,")\n"
848 rts
849
850 | fp_normalize_single:
851 | normalize an extended with single (23-bit) precision
852 | args: %a0 (struct fp_ext *)
853
854fp_normalize_single:
855 printf PNORM,"ns: %p(",1,%a0
856 printx PNORM,%a0@
857 printf PNORM,") "
858 addq.l #2,%a0
859 move.w (%a0)+,%d2
860 jeq fp_ns_zero | zero / denormalized
861 cmp.w #0x7fff,%d2
862 jeq fp_ns_huge | NaN / infinitive.
863 sub.w #0x4000-0x7f,%d2 | will the exponent fit?
864 jcs fp_ns_small | too small.
865 cmp.w #0xfe,%d2
866 jcc fp_ns_large | too big.
867 move.l (%a0)+,%d0 | get high lword of mantissa
868fp_ns_round:
869 tst.l (%a0) | check the low lword
870 jeq 1f
871 | Set a sticky bit if it is non-zero. This should only
872 | affect the rounding in what would otherwise be equal-
873 | distance situations, which is what we want it to do.
874 bset #0,%d0
8751: clr.l (%a0) | zap it from memory.
876 | now, round off the low 8 bits of the hi lword.
877 tst.b %d0 | 8 low bits.
878 jne fp_ns_checkround | Are they non-zero?
879 | nothing to do here
880 subq.l #8,%a0
881 printf PNORM,"%p(",1,%a0
882 printx PNORM,%a0@
883 printf PNORM,")\n"
884 rts
885fp_ns_checkround:
886 fp_set_sr FPSR_EXC_INEX2 | INEX2 bit
887 clr.b -(%a0) | clear low byte of high lword
888 subq.l #3,%a0
889 move.w (FPD_RND,FPDATA),%d2 | rounding mode
890 jne 2f | %d2 == 0, round to nearest
891 tst.b %d0 | test guard bit
892 jpl 9f | zero is closer
893 btst #8,%d0 | test lsb bit
894 | round to even behaviour, see above.
895 jne fp_ns_doroundup | round to infinity
896 lsl.b #1,%d0 | check low bits
897 jeq 9f | round to zero
898fp_ns_doroundup:
899 | round (the mantissa, that is) towards infinity
900 add.l #0x100,(%a0)
901 jcc 9f | no overflow, good.
902 | Overflow. This means that the %d1 was 0xffffff00, so it
903 | is now zero. We will set the mantissa to reflect this, and
904 | increment the exponent (checking for overflow there too)
905 move.w #0x8000,(%a0)
906 addq.w #1,-(%a0)
907 cmp.w #0x407f,(%a0)+ | exponent now overflown?
908 jeq fp_ns_large | yes, so make it infinity.
9099: subq.l #4,%a0
910 printf PNORM,"%p(",1,%a0
911 printx PNORM,%a0@
912 printf PNORM,")\n"
913 rts
914 | check nondefault rounding modes
9152: subq.w #2,%d2
916 jcs 9b | %d2 < 2, round to zero
917 jhi 3f | %d2 > 2, round to +infinity
918 tst.b (-3,%a0) | to -inf
919 jne fp_ns_doroundup | negative, round to infinity
920 jra 9b | positive, round to zero
9213: tst.b (-3,%a0) | to +inf
922 jeq fp_ns_doroundup | positive, round to infinity
923 jra 9b | negative, round to zero
924 | Exponent underflow. Try to make a denormal, and set it to
925 | the smallest possible fraction if this fails.
926fp_ns_small:
927 fp_set_sr FPSR_EXC_UNFL | set UNFL bit
928 move.w #0x3f81,(-2,%a0) | 2**-126
929 neg.w %d2 | degree of underflow
930 cmp.w #32,%d2 | single or double shift?
931 jcc 2f
932 | a 32-bit shift.
933 move.l (%a0),%d0
934 move.l %d0,%d1
935 lsr.l %d2,%d0
936 move.l %d0,(%a0)+
937 | Check to see if we shifted off any significant bits.
938 neg.w %d2
939 add.w #32,%d2
940 lsl.l %d2,%d1
941 jeq 1f
942 bset #0,%d0 | Sticky bit.
943 | Check the lower lword
9441: tst.l (%a0)
945 jeq fp_ns_round
946 clr (%a0)
947 bset #0,%d0 | Sticky bit.
948 jra fp_ns_round
949 | Sorry, the number is just too small.
9502: clr.l (%a0)+
951 clr.l (%a0)
952 moveq #1,%d0 | Smallest possible fraction,
953 jra fp_ns_round | round as desired.
954 | Exponent overflow. Just call it infinity.
955fp_ns_large:
956 tst.b (3,%a0)
957 jeq 1f
958 fp_set_sr FPSR_EXC_INEX2
9591: fp_set_sr FPSR_EXC_OVFL
960 move.w (FPD_RND,FPDATA),%d2
961 jne 3f | %d2 = 0 round to nearest
9621: move.w #0x7fff,(-2,%a0)
963 clr.l (%a0)+
964 clr.l (%a0)
9652: subq.l #8,%a0
966 printf PNORM,"%p(",1,%a0
967 printx PNORM,%a0@
968 printf PNORM,")\n"
969 rts
9703: subq.w #2,%d2
971 jcs 5f | %d2 < 2, round to zero
972 jhi 4f | %d2 > 2, round to +infinity
973 tst.b (-3,%a0) | to -inf
974 jne 1b
975 jra 5f
9764: tst.b (-3,%a0) | to +inf
977 jeq 1b
9785: move.w #0x407e,(-2,%a0)
979 move.l #0xffffff00,(%a0)+
980 clr.l (%a0)
981 jra 2b
982 | zero and denormalized
983fp_ns_zero:
984 tst.l (%a0)+
985 jne 1f
986 tst.l (%a0)
987 jne 1f
988 subq.l #8,%a0
989 printf PNORM,"%p(",1,%a0
990 printx PNORM,%a0@
991 printf PNORM,")\n"
992 rts | zero. nothing to do.
993 | These are not merely subnormal numbers, but true denormals,
994 | i.e. pathologically small (exponent is 2**-16383) numbers.
995 | It is clearly impossible for even a normal extended number
996 | with that exponent to fit into single precision, so just
997 | write these ones off as "too darn small".
9981: fp_set_sr FPSR_EXC_UNFL | Set UNFL bit
999 clr.l (%a0)
1000 clr.l -(%a0)
1001 move.w #0x3f81,-(%a0) | i.e. 2**-126
1002 addq.l #6,%a0
1003 moveq #1,%d0
1004 jra fp_ns_round | round.
1005 | Infinities or NaNs
1006fp_ns_huge:
1007 subq.l #4,%a0
1008 printf PNORM,"%p(",1,%a0
1009 printx PNORM,%a0@
1010 printf PNORM,")\n"
1011 rts
1012
1013 | fp_normalize_single_fast:
1014 | normalize an extended with single (23-bit) precision
1015 | this is only used by fsgldiv/fsgdlmul, where the
1016 | operand is not completly normalized.
1017 | args: %a0 (struct fp_ext *)
1018
1019fp_normalize_single_fast:
1020 printf PNORM,"nsf: %p(",1,%a0
1021 printx PNORM,%a0@
1022 printf PNORM,") "
1023 addq.l #2,%a0
1024 move.w (%a0)+,%d2
1025 cmp.w #0x7fff,%d2
1026 jeq fp_nsf_huge | NaN / infinitive.
1027 move.l (%a0)+,%d0 | get high lword of mantissa
1028fp_nsf_round:
1029 tst.l (%a0) | check the low lword
1030 jeq 1f
1031 | Set a sticky bit if it is non-zero. This should only
1032 | affect the rounding in what would otherwise be equal-
1033 | distance situations, which is what we want it to do.
1034 bset #0,%d0
10351: clr.l (%a0) | zap it from memory.
1036 | now, round off the low 8 bits of the hi lword.
1037 tst.b %d0 | 8 low bits.
1038 jne fp_nsf_checkround | Are they non-zero?
1039 | nothing to do here
1040 subq.l #8,%a0
1041 printf PNORM,"%p(",1,%a0
1042 printx PNORM,%a0@
1043 printf PNORM,")\n"
1044 rts
1045fp_nsf_checkround:
1046 fp_set_sr FPSR_EXC_INEX2 | INEX2 bit
1047 clr.b -(%a0) | clear low byte of high lword
1048 subq.l #3,%a0
1049 move.w (FPD_RND,FPDATA),%d2 | rounding mode
1050 jne 2f | %d2 == 0, round to nearest
1051 tst.b %d0 | test guard bit
1052 jpl 9f | zero is closer
1053 btst #8,%d0 | test lsb bit
1054 | round to even behaviour, see above.
1055 jne fp_nsf_doroundup | round to infinity
1056 lsl.b #1,%d0 | check low bits
1057 jeq 9f | round to zero
1058fp_nsf_doroundup:
1059 | round (the mantissa, that is) towards infinity
1060 add.l #0x100,(%a0)
1061 jcc 9f | no overflow, good.
1062 | Overflow. This means that the %d1 was 0xffffff00, so it
1063 | is now zero. We will set the mantissa to reflect this, and
1064 | increment the exponent (checking for overflow there too)
1065 move.w #0x8000,(%a0)
1066 addq.w #1,-(%a0)
1067 cmp.w #0x407f,(%a0)+ | exponent now overflown?
1068 jeq fp_nsf_large | yes, so make it infinity.
10699: subq.l #4,%a0
1070 printf PNORM,"%p(",1,%a0
1071 printx PNORM,%a0@
1072 printf PNORM,")\n"
1073 rts
1074 | check nondefault rounding modes
10752: subq.w #2,%d2
1076 jcs 9b | %d2 < 2, round to zero
1077 jhi 3f | %d2 > 2, round to +infinity
1078 tst.b (-3,%a0) | to -inf
1079 jne fp_nsf_doroundup | negative, round to infinity
1080 jra 9b | positive, round to zero
10813: tst.b (-3,%a0) | to +inf
1082 jeq fp_nsf_doroundup | positive, round to infinity
1083 jra 9b | negative, round to zero
1084 | Exponent overflow. Just call it infinity.
1085fp_nsf_large:
1086 tst.b (3,%a0)
1087 jeq 1f
1088 fp_set_sr FPSR_EXC_INEX2
10891: fp_set_sr FPSR_EXC_OVFL
1090 move.w (FPD_RND,FPDATA),%d2
1091 jne 3f | %d2 = 0 round to nearest
10921: move.w #0x7fff,(-2,%a0)
1093 clr.l (%a0)+
1094 clr.l (%a0)
10952: subq.l #8,%a0
1096 printf PNORM,"%p(",1,%a0
1097 printx PNORM,%a0@
1098 printf PNORM,")\n"
1099 rts
11003: subq.w #2,%d2
1101 jcs 5f | %d2 < 2, round to zero
1102 jhi 4f | %d2 > 2, round to +infinity
1103 tst.b (-3,%a0) | to -inf
1104 jne 1b
1105 jra 5f
11064: tst.b (-3,%a0) | to +inf
1107 jeq 1b
11085: move.w #0x407e,(-2,%a0)
1109 move.l #0xffffff00,(%a0)+
1110 clr.l (%a0)
1111 jra 2b
1112 | Infinities or NaNs
1113fp_nsf_huge:
1114 subq.l #4,%a0
1115 printf PNORM,"%p(",1,%a0
1116 printx PNORM,%a0@
1117 printf PNORM,")\n"
1118 rts
1119
1120 | conv_ext2int (macro):
1121 | Generates a subroutine that converts an extended value to an
1122 | integer of a given size, again, with the appropriate type of
1123 | rounding.
1124
1125 | Macro arguments:
1126 | s: size, as given in an assembly instruction.
1127 | b: number of bits in that size.
1128
1129 | Subroutine arguments:
1130 | %a0: source (struct fp_ext *)
1131
1132 | Returns the integer in %d0 (like it should)
1133
1134.macro conv_ext2int s,b
1135 .set inf,(1<<(\b-1))-1 | i.e. MAXINT
1136 printf PCONV,"e2i%d: %p(",2,#\b,%a0
1137 printx PCONV,%a0@
1138 printf PCONV,") "
1139 addq.l #2,%a0
1140 move.w (%a0)+,%d2 | exponent
1141 jeq fp_e2i_zero\b | zero / denorm (== 0, here)
1142 cmp.w #0x7fff,%d2
1143 jeq fp_e2i_huge\b | Inf / NaN
1144 sub.w #0x3ffe,%d2
1145 jcs fp_e2i_small\b
1146 cmp.w #\b,%d2
1147 jhi fp_e2i_large\b
1148 move.l (%a0),%d0
1149 move.l %d0,%d1
1150 lsl.l %d2,%d1
1151 jne fp_e2i_round\b
1152 tst.l (4,%a0)
1153 jne fp_e2i_round\b
1154 neg.w %d2
1155 add.w #32,%d2
1156 lsr.l %d2,%d0
11579: tst.w (-4,%a0)
1158 jne 1f
1159 tst.\s %d0
1160 jmi fp_e2i_large\b
1161 printf PCONV,"-> %p\n",1,%d0
1162 rts
11631: neg.\s %d0
1164 jeq 1f
1165 jpl fp_e2i_large\b
11661: printf PCONV,"-> %p\n",1,%d0
1167 rts
1168fp_e2i_round\b:
1169 fp_set_sr FPSR_EXC_INEX2 | INEX2 bit
1170 neg.w %d2
1171 add.w #32,%d2
1172 .if \b>16
1173 jeq 5f
1174 .endif
1175 lsr.l %d2,%d0
1176 move.w (FPD_RND,FPDATA),%d2 | rounding mode
1177 jne 2f | %d2 == 0, round to nearest
1178 tst.l %d1 | test guard bit
1179 jpl 9b | zero is closer
1180 btst %d2,%d0 | test lsb bit (%d2 still 0)
1181 jne fp_e2i_doroundup\b
1182 lsl.l #1,%d1 | check low bits
1183 jne fp_e2i_doroundup\b
1184 tst.l (4,%a0)
1185 jeq 9b
1186fp_e2i_doroundup\b:
1187 addq.l #1,%d0
1188 jra 9b
1189 | check nondefault rounding modes
11902: subq.w #2,%d2
1191 jcs 9b | %d2 < 2, round to zero
1192 jhi 3f | %d2 > 2, round to +infinity
1193 tst.w (-4,%a0) | to -inf
1194 jne fp_e2i_doroundup\b | negative, round to infinity
1195 jra 9b | positive, round to zero
11963: tst.w (-4,%a0) | to +inf
1197 jeq fp_e2i_doroundup\b | positive, round to infinity
1198 jra 9b | negative, round to zero
1199 | we are only want -2**127 get correctly rounded here,
1200 | since the guard bit is in the lower lword.
1201 | everything else ends up anyway as overflow.
1202 .if \b>16
12035: move.w (FPD_RND,FPDATA),%d2 | rounding mode
1204 jne 2b | %d2 == 0, round to nearest
1205 move.l (4,%a0),%d1 | test guard bit
1206 jpl 9b | zero is closer
1207 lsl.l #1,%d1 | check low bits
1208 jne fp_e2i_doroundup\b
1209 jra 9b
1210 .endif
1211fp_e2i_zero\b:
1212 clr.l %d0
1213 tst.l (%a0)+
1214 jne 1f
1215 tst.l (%a0)
1216 jeq 3f
12171: subq.l #4,%a0
1218 fp_clr_sr FPSR_EXC_UNFL | fp_normalize_ext has set this bit
1219fp_e2i_small\b:
1220 fp_set_sr FPSR_EXC_INEX2
1221 clr.l %d0
1222 move.w (FPD_RND,FPDATA),%d2 | rounding mode
1223 subq.w #2,%d2
1224 jcs 3f | %d2 < 2, round to nearest/zero
1225 jhi 2f | %d2 > 2, round to +infinity
1226 tst.w (-4,%a0) | to -inf
1227 jeq 3f
1228 subq.\s #1,%d0
1229 jra 3f
12302: tst.w (-4,%a0) | to +inf
1231 jne 3f
1232 addq.\s #1,%d0
12333: printf PCONV,"-> %p\n",1,%d0
1234 rts
1235fp_e2i_large\b:
1236 fp_set_sr FPSR_EXC_OPERR
1237 move.\s #inf,%d0
1238 tst.w (-4,%a0)
1239 jeq 1f
1240 addq.\s #1,%d0
12411: printf PCONV,"-> %p\n",1,%d0
1242 rts
1243fp_e2i_huge\b:
1244 move.\s (%a0),%d0
1245 tst.l (%a0)
1246 jne 1f
1247 tst.l (%a0)
1248 jeq fp_e2i_large\b
1249 | fp_normalize_ext has set this bit already
1250 | and made the number nonsignaling
12511: fp_tst_sr FPSR_EXC_SNAN
1252 jne 1f
1253 fp_set_sr FPSR_EXC_OPERR
12541: printf PCONV,"-> %p\n",1,%d0
1255 rts
1256.endm
1257
1258fp_conv_ext2long:
1259 conv_ext2int l,32
1260
1261fp_conv_ext2short:
1262 conv_ext2int w,16
1263
1264fp_conv_ext2byte:
1265 conv_ext2int b,8
1266
1267fp_conv_ext2double:
1268 jsr fp_normalize_double
1269 printf PCONV,"e2d: %p(",1,%a0
1270 printx PCONV,%a0@
1271 printf PCONV,"), "
1272 move.l (%a0)+,%d2
1273 cmp.w #0x7fff,%d2
1274 jne 1f
1275 move.w #0x7ff,%d2
1276 move.l (%a0)+,%d0
1277 jra 2f
12781: sub.w #0x3fff-0x3ff,%d2
1279 move.l (%a0)+,%d0
1280 jmi 2f
1281 clr.w %d2
12822: lsl.w #5,%d2
1283 lsl.l #7,%d2
1284 lsl.l #8,%d2
1285 move.l %d0,%d1
1286 lsl.l #1,%d0
1287 lsr.l #4,%d0
1288 lsr.l #8,%d0
1289 or.l %d2,%d0
1290 putuser.l %d0,(%a1)+,fp_err_ua2,%a1
1291 moveq #21,%d0
1292 lsl.l %d0,%d1
1293 move.l (%a0),%d0
1294 lsr.l #4,%d0
1295 lsr.l #7,%d0
1296 or.l %d1,%d0
1297 putuser.l %d0,(%a1),fp_err_ua2,%a1
1298#ifdef FPU_EMU_DEBUG
1299 getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
1300 getuser.l %a1@(0),%d1,fp_err_ua2,%a1
1301 printf PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
1302#endif
1303 rts
1304
1305fp_conv_ext2single:
1306 jsr fp_normalize_single
1307 printf PCONV,"e2s: %p(",1,%a0
1308 printx PCONV,%a0@
1309 printf PCONV,"), "
1310 move.l (%a0)+,%d1
1311 cmp.w #0x7fff,%d1
1312 jne 1f
1313 move.w #0xff,%d1
1314 move.l (%a0)+,%d0
1315 jra 2f
13161: sub.w #0x3fff-0x7f,%d1
1317 move.l (%a0)+,%d0
1318 jmi 2f
1319 clr.w %d1
13202: lsl.w #8,%d1
1321 lsl.l #7,%d1
1322 lsl.l #8,%d1
1323 bclr #31,%d0
1324 lsr.l #8,%d0
1325 or.l %d1,%d0
1326 printf PCONV,"%08x\n",1,%d0
1327 rts
1328
1329 | special return addresses for instr that
1330 | encode the rounding precision in the opcode
1331 | (e.g. fsmove,fdmove)
1332
1333fp_finalrounding_single:
1334 addq.l #8,%sp
1335 jsr fp_normalize_ext
1336 jsr fp_normalize_single
1337 jra fp_finaltest
1338
1339fp_finalrounding_single_fast:
1340 addq.l #8,%sp
1341 jsr fp_normalize_ext
1342 jsr fp_normalize_single_fast
1343 jra fp_finaltest
1344
1345fp_finalrounding_double:
1346 addq.l #8,%sp
1347 jsr fp_normalize_ext
1348 jsr fp_normalize_double
1349 jra fp_finaltest
1350
1351 | fp_finaltest:
1352 | set the emulated status register based on the outcome of an
1353 | emulated instruction.
1354
1355fp_finalrounding:
1356 addq.l #8,%sp
1357| printf ,"f: %p\n",1,%a0
1358 jsr fp_normalize_ext
1359 move.w (FPD_PREC,FPDATA),%d0
1360 subq.w #1,%d0
1361 jcs fp_finaltest
1362 jne 1f
1363 jsr fp_normalize_single
1364 jra 2f
13651: jsr fp_normalize_double
13662:| printf ,"f: %p\n",1,%a0
1367fp_finaltest:
1368 | First, we do some of the obvious tests for the exception
1369 | status byte and condition code bytes of fp_sr here, so that
1370 | they do not have to be handled individually by every
1371 | emulated instruction.
1372 clr.l %d0
1373 addq.l #1,%a0
1374 tst.b (%a0)+ | sign
1375 jeq 1f
1376 bset #FPSR_CC_NEG-24,%d0 | N bit
13771: cmp.w #0x7fff,(%a0)+ | exponent
1378 jeq 2f
1379 | test for zero
1380 moveq #FPSR_CC_Z-24,%d1
1381 tst.l (%a0)+
1382 jne 9f
1383 tst.l (%a0)
1384 jne 9f
1385 jra 8f
1386 | infinitiv and NAN
13872: moveq #FPSR_CC_NAN-24,%d1
1388 move.l (%a0)+,%d2
1389 lsl.l #1,%d2 | ignore high bit
1390 jne 8f
1391 tst.l (%a0)
1392 jne 8f
1393 moveq #FPSR_CC_INF-24,%d1
13948: bset %d1,%d0
13959: move.b %d0,(FPD_FPSR+0,FPDATA) | set condition test result
1396 | move instructions enter here
1397 | Here, we test things in the exception status byte, and set
1398 | other things in the accrued exception byte accordingly.
1399 | Emulated instructions can set various things in the former,
1400 | as defined in fp_emu.h.
1401fp_final:
1402 move.l (FPD_FPSR,FPDATA),%d0
1403#if 0
1404 btst #FPSR_EXC_SNAN,%d0 | EXC_SNAN
1405 jne 1f
1406 btst #FPSR_EXC_OPERR,%d0 | EXC_OPERR
1407 jeq 2f
14081: bset #FPSR_AEXC_IOP,%d0 | set IOP bit
14092: btst #FPSR_EXC_OVFL,%d0 | EXC_OVFL
1410 jeq 1f
1411 bset #FPSR_AEXC_OVFL,%d0 | set OVFL bit
14121: btst #FPSR_EXC_UNFL,%d0 | EXC_UNFL
1413 jeq 1f
1414 btst #FPSR_EXC_INEX2,%d0 | EXC_INEX2
1415 jeq 1f
1416 bset #FPSR_AEXC_UNFL,%d0 | set UNFL bit
14171: btst #FPSR_EXC_DZ,%d0 | EXC_INEX1
1418 jeq 1f
1419 bset #FPSR_AEXC_DZ,%d0 | set DZ bit
14201: btst #FPSR_EXC_OVFL,%d0 | EXC_OVFL
1421 jne 1f
1422 btst #FPSR_EXC_INEX2,%d0 | EXC_INEX2
1423 jne 1f
1424 btst #FPSR_EXC_INEX1,%d0 | EXC_INEX1
1425 jeq 2f
14261: bset #FPSR_AEXC_INEX,%d0 | set INEX bit
14272: move.l %d0,(FPD_FPSR,FPDATA)
1428#else
1429 | same as above, greatly optimized, but untested (yet)
1430 move.l %d0,%d2
1431 lsr.l #5,%d0
1432 move.l %d0,%d1
1433 lsr.l #4,%d1
1434 or.l %d0,%d1
1435 and.b #0x08,%d1
1436 move.l %d2,%d0
1437 lsr.l #6,%d0
1438 or.l %d1,%d0
1439 move.l %d2,%d1
1440 lsr.l #4,%d1
1441 or.b #0xdf,%d1
1442 and.b %d1,%d0
1443 move.l %d2,%d1
1444 lsr.l #7,%d1
1445 and.b #0x80,%d1
1446 or.b %d1,%d0
1447 and.b #0xf8,%d0
1448 or.b %d0,%d2
1449 move.l %d2,(FPD_FPSR,FPDATA)
1450#endif
1451 move.b (FPD_FPSR+2,FPDATA),%d0
1452 and.b (FPD_FPCR+2,FPDATA),%d0
1453 jeq 1f
1454 printf ,"send signal!!!\n"
14551: jra fp_end
diff --git a/arch/m68k/math-emu/multi_arith.h b/arch/m68k/math-emu/multi_arith.h
new file mode 100644
index 000000000000..02251e5afd89
--- /dev/null
+++ b/arch/m68k/math-emu/multi_arith.h
@@ -0,0 +1,819 @@
1/* multi_arith.h: multi-precision integer arithmetic functions, needed
2 to do extended-precision floating point.
3
4 (c) 1998 David Huggins-Daines.
5
6 Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c)
7 David Mosberger-Tang.
8
9 You may copy, modify, and redistribute this file under the terms of
10 the GNU General Public License, version 2, or any later version, at
11 your convenience. */
12
13/* Note:
14
15 These are not general multi-precision math routines. Rather, they
16 implement the subset of integer arithmetic that we need in order to
17 multiply, divide, and normalize 128-bit unsigned mantissae. */
18
19#ifndef MULTI_ARITH_H
20#define MULTI_ARITH_H
21
22#if 0 /* old code... */
23
24/* Unsigned only, because we don't need signs to multiply and divide. */
25typedef unsigned int int128[4];
26
27/* Word order */
28enum {
29 MSW128,
30 NMSW128,
31 NLSW128,
32 LSW128
33};
34
35/* big-endian */
36#define LO_WORD(ll) (((unsigned int *) &ll)[1])
37#define HI_WORD(ll) (((unsigned int *) &ll)[0])
38
39/* Convenience functions to stuff various integer values into int128s */
40
41static inline void zero128(int128 a)
42{
43 a[LSW128] = a[NLSW128] = a[NMSW128] = a[MSW128] = 0;
44}
45
46/* Human-readable word order in the arguments */
47static inline void set128(unsigned int i3, unsigned int i2, unsigned int i1,
48 unsigned int i0, int128 a)
49{
50 a[LSW128] = i0;
51 a[NLSW128] = i1;
52 a[NMSW128] = i2;
53 a[MSW128] = i3;
54}
55
56/* Convenience functions (for testing as well) */
57static inline void int64_to_128(unsigned long long src, int128 dest)
58{
59 dest[LSW128] = (unsigned int) src;
60 dest[NLSW128] = src >> 32;
61 dest[NMSW128] = dest[MSW128] = 0;
62}
63
64static inline void int128_to_64(const int128 src, unsigned long long *dest)
65{
66 *dest = src[LSW128] | (long long) src[NLSW128] << 32;
67}
68
69static inline void put_i128(const int128 a)
70{
71 printk("%08x %08x %08x %08x\n", a[MSW128], a[NMSW128],
72 a[NLSW128], a[LSW128]);
73}
74
75/* Internal shifters:
76
77 Note that these are only good for 0 < count < 32.
78 */
79
80static inline void _lsl128(unsigned int count, int128 a)
81{
82 a[MSW128] = (a[MSW128] << count) | (a[NMSW128] >> (32 - count));
83 a[NMSW128] = (a[NMSW128] << count) | (a[NLSW128] >> (32 - count));
84 a[NLSW128] = (a[NLSW128] << count) | (a[LSW128] >> (32 - count));
85 a[LSW128] <<= count;
86}
87
88static inline void _lsr128(unsigned int count, int128 a)
89{
90 a[LSW128] = (a[LSW128] >> count) | (a[NLSW128] << (32 - count));
91 a[NLSW128] = (a[NLSW128] >> count) | (a[NMSW128] << (32 - count));
92 a[NMSW128] = (a[NMSW128] >> count) | (a[MSW128] << (32 - count));
93 a[MSW128] >>= count;
94}
95
96/* Should be faster, one would hope */
97
98static inline void lslone128(int128 a)
99{
100 asm volatile ("lsl.l #1,%0\n"
101 "roxl.l #1,%1\n"
102 "roxl.l #1,%2\n"
103 "roxl.l #1,%3\n"
104 :
105 "=d" (a[LSW128]),
106 "=d"(a[NLSW128]),
107 "=d"(a[NMSW128]),
108 "=d"(a[MSW128])
109 :
110 "0"(a[LSW128]),
111 "1"(a[NLSW128]),
112 "2"(a[NMSW128]),
113 "3"(a[MSW128]));
114}
115
116static inline void lsrone128(int128 a)
117{
118 asm volatile ("lsr.l #1,%0\n"
119 "roxr.l #1,%1\n"
120 "roxr.l #1,%2\n"
121 "roxr.l #1,%3\n"
122 :
123 "=d" (a[MSW128]),
124 "=d"(a[NMSW128]),
125 "=d"(a[NLSW128]),
126 "=d"(a[LSW128])
127 :
128 "0"(a[MSW128]),
129 "1"(a[NMSW128]),
130 "2"(a[NLSW128]),
131 "3"(a[LSW128]));
132}
133
134/* Generalized 128-bit shifters:
135
136 These bit-shift to a multiple of 32, then move whole longwords. */
137
138static inline void lsl128(unsigned int count, int128 a)
139{
140 int wordcount, i;
141
142 if (count % 32)
143 _lsl128(count % 32, a);
144
145 if (0 == (wordcount = count / 32))
146 return;
147
148 /* argh, gak, endian-sensitive */
149 for (i = 0; i < 4 - wordcount; i++) {
150 a[i] = a[i + wordcount];
151 }
152 for (i = 3; i >= 4 - wordcount; --i) {
153 a[i] = 0;
154 }
155}
156
157static inline void lsr128(unsigned int count, int128 a)
158{
159 int wordcount, i;
160
161 if (count % 32)
162 _lsr128(count % 32, a);
163
164 if (0 == (wordcount = count / 32))
165 return;
166
167 for (i = 3; i >= wordcount; --i) {
168 a[i] = a[i - wordcount];
169 }
170 for (i = 0; i < wordcount; i++) {
171 a[i] = 0;
172 }
173}
174
175static inline int orl128(int a, int128 b)
176{
177 b[LSW128] |= a;
178}
179
180static inline int btsthi128(const int128 a)
181{
182 return a[MSW128] & 0x80000000;
183}
184
185/* test bits (numbered from 0 = LSB) up to and including "top" */
186static inline int bftestlo128(int top, const int128 a)
187{
188 int r = 0;
189
190 if (top > 31)
191 r |= a[LSW128];
192 if (top > 63)
193 r |= a[NLSW128];
194 if (top > 95)
195 r |= a[NMSW128];
196
197 r |= a[3 - (top / 32)] & ((1 << (top % 32 + 1)) - 1);
198
199 return (r != 0);
200}
201
202/* Aargh. We need these because GCC is broken */
203/* FIXME: do them in assembly, for goodness' sake! */
204static inline void mask64(int pos, unsigned long long *mask)
205{
206 *mask = 0;
207
208 if (pos < 32) {
209 LO_WORD(*mask) = (1 << pos) - 1;
210 return;
211 }
212 LO_WORD(*mask) = -1;
213 HI_WORD(*mask) = (1 << (pos - 32)) - 1;
214}
215
216static inline void bset64(int pos, unsigned long long *dest)
217{
218 /* This conditional will be optimized away. Thanks, GCC! */
219 if (pos < 32)
220 asm volatile ("bset %1,%0":"=m"
221 (LO_WORD(*dest)):"id"(pos));
222 else
223 asm volatile ("bset %1,%0":"=m"
224 (HI_WORD(*dest)):"id"(pos - 32));
225}
226
227static inline int btst64(int pos, unsigned long long dest)
228{
229 if (pos < 32)
230 return (0 != (LO_WORD(dest) & (1 << pos)));
231 else
232 return (0 != (HI_WORD(dest) & (1 << (pos - 32))));
233}
234
235static inline void lsl64(int count, unsigned long long *dest)
236{
237 if (count < 32) {
238 HI_WORD(*dest) = (HI_WORD(*dest) << count)
239 | (LO_WORD(*dest) >> count);
240 LO_WORD(*dest) <<= count;
241 return;
242 }
243 count -= 32;
244 HI_WORD(*dest) = LO_WORD(*dest) << count;
245 LO_WORD(*dest) = 0;
246}
247
248static inline void lsr64(int count, unsigned long long *dest)
249{
250 if (count < 32) {
251 LO_WORD(*dest) = (LO_WORD(*dest) >> count)
252 | (HI_WORD(*dest) << (32 - count));
253 HI_WORD(*dest) >>= count;
254 return;
255 }
256 count -= 32;
257 LO_WORD(*dest) = HI_WORD(*dest) >> count;
258 HI_WORD(*dest) = 0;
259}
260#endif
261
262static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
263{
264 reg->exp += cnt;
265
266 switch (cnt) {
267 case 0 ... 8:
268 reg->lowmant = reg->mant.m32[1] << (8 - cnt);
269 reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
270 (reg->mant.m32[0] << (32 - cnt));
271 reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
272 break;
273 case 9 ... 32:
274 reg->lowmant = reg->mant.m32[1] >> (cnt - 8);
275 if (reg->mant.m32[1] << (40 - cnt))
276 reg->lowmant |= 1;
277 reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
278 (reg->mant.m32[0] << (32 - cnt));
279 reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
280 break;
281 case 33 ... 39:
282 asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg->lowmant)
283 : "m" (reg->mant.m32[0]), "d" (64 - cnt));
284 if (reg->mant.m32[1] << (40 - cnt))
285 reg->lowmant |= 1;
286 reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
287 reg->mant.m32[0] = 0;
288 break;
289 case 40 ... 71:
290 reg->lowmant = reg->mant.m32[0] >> (cnt - 40);
291 if ((reg->mant.m32[0] << (72 - cnt)) || reg->mant.m32[1])
292 reg->lowmant |= 1;
293 reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
294 reg->mant.m32[0] = 0;
295 break;
296 default:
297 reg->lowmant = reg->mant.m32[0] || reg->mant.m32[1];
298 reg->mant.m32[0] = 0;
299 reg->mant.m32[1] = 0;
300 break;
301 }
302}
303
304static inline int fp_overnormalize(struct fp_ext *reg)
305{
306 int shift;
307
308 if (reg->mant.m32[0]) {
309 asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[0]));
310 reg->mant.m32[0] = (reg->mant.m32[0] << shift) | (reg->mant.m32[1] >> (32 - shift));
311 reg->mant.m32[1] = (reg->mant.m32[1] << shift);
312 } else {
313 asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[1]));
314 reg->mant.m32[0] = (reg->mant.m32[1] << shift);
315 reg->mant.m32[1] = 0;
316 shift += 32;
317 }
318
319 return shift;
320}
321
322static inline int fp_addmant(struct fp_ext *dest, struct fp_ext *src)
323{
324 int carry;
325
326 /* we assume here, gcc only insert move and a clr instr */
327 asm volatile ("add.b %1,%0" : "=d,g" (dest->lowmant)
328 : "g,d" (src->lowmant), "0,0" (dest->lowmant));
329 asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[1])
330 : "d" (src->mant.m32[1]), "0" (dest->mant.m32[1]));
331 asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[0])
332 : "d" (src->mant.m32[0]), "0" (dest->mant.m32[0]));
333 asm volatile ("addx.l %0,%0" : "=d" (carry) : "0" (0));
334
335 return carry;
336}
337
338static inline int fp_addcarry(struct fp_ext *reg)
339{
340 if (++reg->exp == 0x7fff) {
341 if (reg->mant.m64)
342 fp_set_sr(FPSR_EXC_INEX2);
343 reg->mant.m64 = 0;
344 fp_set_sr(FPSR_EXC_OVFL);
345 return 0;
346 }
347 reg->lowmant = (reg->mant.m32[1] << 7) | (reg->lowmant ? 1 : 0);
348 reg->mant.m32[1] = (reg->mant.m32[1] >> 1) |
349 (reg->mant.m32[0] << 31);
350 reg->mant.m32[0] = (reg->mant.m32[0] >> 1) | 0x80000000;
351
352 return 1;
353}
354
355static inline void fp_submant(struct fp_ext *dest, struct fp_ext *src1,
356 struct fp_ext *src2)
357{
358 /* we assume here, gcc only insert move and a clr instr */
359 asm volatile ("sub.b %1,%0" : "=d,g" (dest->lowmant)
360 : "g,d" (src2->lowmant), "0,0" (src1->lowmant));
361 asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[1])
362 : "d" (src2->mant.m32[1]), "0" (src1->mant.m32[1]));
363 asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[0])
364 : "d" (src2->mant.m32[0]), "0" (src1->mant.m32[0]));
365}
366
367#define fp_mul64(desth, destl, src1, src2) ({ \
368 asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth) \
369 : "g" (src1), "0" (src2)); \
370})
371#define fp_div64(quot, rem, srch, srcl, div) \
372 asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem) \
373 : "dm" (div), "1" (srch), "0" (srcl))
374#define fp_add64(dest1, dest2, src1, src2) ({ \
375 asm ("add.l %1,%0" : "=d,dm" (dest2) \
376 : "dm,d" (src2), "0,0" (dest2)); \
377 asm ("addx.l %1,%0" : "=d" (dest1) \
378 : "d" (src1), "0" (dest1)); \
379})
380#define fp_addx96(dest, src) ({ \
381 /* we assume here, gcc only insert move and a clr instr */ \
382 asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2]) \
383 : "g,d" (temp.m32[1]), "0,0" (dest->m32[2])); \
384 asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1]) \
385 : "d" (temp.m32[0]), "0" (dest->m32[1])); \
386 asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0]) \
387 : "d" (0), "0" (dest->m32[0])); \
388})
389#define fp_sub64(dest, src) ({ \
390 asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1]) \
391 : "dm,d" (src.m32[1]), "0,0" (dest.m32[1])); \
392 asm ("subx.l %1,%0" : "=d" (dest.m32[0]) \
393 : "d" (src.m32[0]), "0" (dest.m32[0])); \
394})
395#define fp_sub96c(dest, srch, srcm, srcl) ({ \
396 char carry; \
397 asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2]) \
398 : "dm,d" (srcl), "0,0" (dest.m32[2])); \
399 asm ("subx.l %1,%0" : "=d" (dest.m32[1]) \
400 : "d" (srcm), "0" (dest.m32[1])); \
401 asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0]) \
402 : "d" (srch), "1" (dest.m32[0])); \
403 carry; \
404})
405
406static inline void fp_multiplymant(union fp_mant128 *dest, struct fp_ext *src1,
407 struct fp_ext *src2)
408{
409 union fp_mant64 temp;
410
411 fp_mul64(dest->m32[0], dest->m32[1], src1->mant.m32[0], src2->mant.m32[0]);
412 fp_mul64(dest->m32[2], dest->m32[3], src1->mant.m32[1], src2->mant.m32[1]);
413
414 fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[0], src2->mant.m32[1]);
415 fp_addx96(dest, temp);
416
417 fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[1], src2->mant.m32[0]);
418 fp_addx96(dest, temp);
419}
420
421static inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src,
422 struct fp_ext *div)
423{
424 union fp_mant128 tmp;
425 union fp_mant64 tmp64;
426 unsigned long *mantp = dest->m32;
427 unsigned long fix, rem, first, dummy;
428 int i;
429
430 /* the algorithm below requires dest to be smaller than div,
431 but both have the high bit set */
432 if (src->mant.m64 >= div->mant.m64) {
433 fp_sub64(src->mant, div->mant);
434 *mantp = 1;
435 } else
436 *mantp = 0;
437 mantp++;
438
439 /* basic idea behind this algorithm: we can't divide two 64bit numbers
440 (AB/CD) directly, but we can calculate AB/C0, but this means this
441 quotient is off by C0/CD, so we have to multiply the first result
442 to fix the result, after that we have nearly the correct result
443 and only a few corrections are needed. */
444
445 /* C0/CD can be precalculated, but it's an 64bit division again, but
446 we can make it a bit easier, by dividing first through C so we get
447 10/1D and now only a single shift and the value fits into 32bit. */
448 fix = 0x80000000;
449 dummy = div->mant.m32[1] / div->mant.m32[0] + 1;
450 dummy = (dummy >> 1) | fix;
451 fp_div64(fix, dummy, fix, 0, dummy);
452 fix--;
453
454 for (i = 0; i < 3; i++, mantp++) {
455 if (src->mant.m32[0] == div->mant.m32[0]) {
456 fp_div64(first, rem, 0, src->mant.m32[1], div->mant.m32[0]);
457
458 fp_mul64(*mantp, dummy, first, fix);
459 *mantp += fix;
460 } else {
461 fp_div64(first, rem, src->mant.m32[0], src->mant.m32[1], div->mant.m32[0]);
462
463 fp_mul64(*mantp, dummy, first, fix);
464 }
465
466 fp_mul64(tmp.m32[0], tmp.m32[1], div->mant.m32[0], first - *mantp);
467 fp_add64(tmp.m32[0], tmp.m32[1], 0, rem);
468 tmp.m32[2] = 0;
469
470 fp_mul64(tmp64.m32[0], tmp64.m32[1], *mantp, div->mant.m32[1]);
471 fp_sub96c(tmp, 0, tmp64.m32[0], tmp64.m32[1]);
472
473 src->mant.m32[0] = tmp.m32[1];
474 src->mant.m32[1] = tmp.m32[2];
475
476 while (!fp_sub96c(tmp, 0, div->mant.m32[0], div->mant.m32[1])) {
477 src->mant.m32[0] = tmp.m32[1];
478 src->mant.m32[1] = tmp.m32[2];
479 *mantp += 1;
480 }
481 }
482}
483
484#if 0
485static inline unsigned int fp_fls128(union fp_mant128 *src)
486{
487 unsigned long data;
488 unsigned int res, off;
489
490 if ((data = src->m32[0]))
491 off = 0;
492 else if ((data = src->m32[1]))
493 off = 32;
494 else if ((data = src->m32[2]))
495 off = 64;
496 else if ((data = src->m32[3]))
497 off = 96;
498 else
499 return 128;
500
501 asm ("bfffo %1{#0,#32},%0" : "=d" (res) : "dm" (data));
502 return res + off;
503}
504
505static inline void fp_shiftmant128(union fp_mant128 *src, int shift)
506{
507 unsigned long sticky;
508
509 switch (shift) {
510 case 0:
511 return;
512 case 1:
513 asm volatile ("lsl.l #1,%0"
514 : "=d" (src->m32[3]) : "0" (src->m32[3]));
515 asm volatile ("roxl.l #1,%0"
516 : "=d" (src->m32[2]) : "0" (src->m32[2]));
517 asm volatile ("roxl.l #1,%0"
518 : "=d" (src->m32[1]) : "0" (src->m32[1]));
519 asm volatile ("roxl.l #1,%0"
520 : "=d" (src->m32[0]) : "0" (src->m32[0]));
521 return;
522 case 2 ... 31:
523 src->m32[0] = (src->m32[0] << shift) | (src->m32[1] >> (32 - shift));
524 src->m32[1] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
525 src->m32[2] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
526 src->m32[3] = (src->m32[3] << shift);
527 return;
528 case 32 ... 63:
529 shift -= 32;
530 src->m32[0] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
531 src->m32[1] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
532 src->m32[2] = (src->m32[3] << shift);
533 src->m32[3] = 0;
534 return;
535 case 64 ... 95:
536 shift -= 64;
537 src->m32[0] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
538 src->m32[1] = (src->m32[3] << shift);
539 src->m32[2] = src->m32[3] = 0;
540 return;
541 case 96 ... 127:
542 shift -= 96;
543 src->m32[0] = (src->m32[3] << shift);
544 src->m32[1] = src->m32[2] = src->m32[3] = 0;
545 return;
546 case -31 ... -1:
547 shift = -shift;
548 sticky = 0;
549 if (src->m32[3] << (32 - shift))
550 sticky = 1;
551 src->m32[3] = (src->m32[3] >> shift) | (src->m32[2] << (32 - shift)) | sticky;
552 src->m32[2] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift));
553 src->m32[1] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
554 src->m32[0] = (src->m32[0] >> shift);
555 return;
556 case -63 ... -32:
557 shift = -shift - 32;
558 sticky = 0;
559 if ((src->m32[2] << (32 - shift)) || src->m32[3])
560 sticky = 1;
561 src->m32[3] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift)) | sticky;
562 src->m32[2] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
563 src->m32[1] = (src->m32[0] >> shift);
564 src->m32[0] = 0;
565 return;
566 case -95 ... -64:
567 shift = -shift - 64;
568 sticky = 0;
569 if ((src->m32[1] << (32 - shift)) || src->m32[2] || src->m32[3])
570 sticky = 1;
571 src->m32[3] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift)) | sticky;
572 src->m32[2] = (src->m32[0] >> shift);
573 src->m32[1] = src->m32[0] = 0;
574 return;
575 case -127 ... -96:
576 shift = -shift - 96;
577 sticky = 0;
578 if ((src->m32[0] << (32 - shift)) || src->m32[1] || src->m32[2] || src->m32[3])
579 sticky = 1;
580 src->m32[3] = (src->m32[0] >> shift) | sticky;
581 src->m32[2] = src->m32[1] = src->m32[0] = 0;
582 return;
583 }
584
585 if (shift < 0 && (src->m32[0] || src->m32[1] || src->m32[2] || src->m32[3]))
586 src->m32[3] = 1;
587 else
588 src->m32[3] = 0;
589 src->m32[2] = 0;
590 src->m32[1] = 0;
591 src->m32[0] = 0;
592}
593#endif
594
595static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
596 int shift)
597{
598 unsigned long tmp;
599
600 switch (shift) {
601 case 0:
602 dest->mant.m64 = src->m64[0];
603 dest->lowmant = src->m32[2] >> 24;
604 if (src->m32[3] || (src->m32[2] << 8))
605 dest->lowmant |= 1;
606 break;
607 case 1:
608 asm volatile ("lsl.l #1,%0"
609 : "=d" (tmp) : "0" (src->m32[2]));
610 asm volatile ("roxl.l #1,%0"
611 : "=d" (dest->mant.m32[1]) : "0" (src->m32[1]));
612 asm volatile ("roxl.l #1,%0"
613 : "=d" (dest->mant.m32[0]) : "0" (src->m32[0]));
614 dest->lowmant = tmp >> 24;
615 if (src->m32[3] || (tmp << 8))
616 dest->lowmant |= 1;
617 break;
618 case 31:
619 asm volatile ("lsr.l #1,%1; roxr.l #1,%0"
620 : "=d" (dest->mant.m32[0])
621 : "d" (src->m32[0]), "0" (src->m32[1]));
622 asm volatile ("roxr.l #1,%0"
623 : "=d" (dest->mant.m32[1]) : "0" (src->m32[2]));
624 asm volatile ("roxr.l #1,%0"
625 : "=d" (tmp) : "0" (src->m32[3]));
626 dest->lowmant = tmp >> 24;
627 if (src->m32[3] << 7)
628 dest->lowmant |= 1;
629 break;
630 case 32:
631 dest->mant.m32[0] = src->m32[1];
632 dest->mant.m32[1] = src->m32[2];
633 dest->lowmant = src->m32[3] >> 24;
634 if (src->m32[3] << 8)
635 dest->lowmant |= 1;
636 break;
637 }
638}
639
640#if 0 /* old code... */
641static inline int fls(unsigned int a)
642{
643 int r;
644
645 asm volatile ("bfffo %1{#0,#32},%0"
646 : "=d" (r) : "md" (a));
647 return r;
648}
649
650/* fls = "find last set" (cf. ffs(3)) */
651static inline int fls128(const int128 a)
652{
653 if (a[MSW128])
654 return fls(a[MSW128]);
655 if (a[NMSW128])
656 return fls(a[NMSW128]) + 32;
657 /* XXX: it probably never gets beyond this point in actual
658 use, but that's indicative of a more general problem in the
659 algorithm (i.e. as per the actual 68881 implementation, we
660 really only need at most 67 bits of precision [plus
661 overflow]) so I'm not going to fix it. */
662 if (a[NLSW128])
663 return fls(a[NLSW128]) + 64;
664 if (a[LSW128])
665 return fls(a[LSW128]) + 96;
666 else
667 return -1;
668}
669
670static inline int zerop128(const int128 a)
671{
672 return !(a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
673}
674
675static inline int nonzerop128(const int128 a)
676{
677 return (a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
678}
679
680/* Addition and subtraction */
681/* Do these in "pure" assembly, because "extended" asm is unmanageable
682 here */
683static inline void add128(const int128 a, int128 b)
684{
685 /* rotating carry flags */
686 unsigned int carry[2];
687
688 carry[0] = a[LSW128] > (0xffffffff - b[LSW128]);
689 b[LSW128] += a[LSW128];
690
691 carry[1] = a[NLSW128] > (0xffffffff - b[NLSW128] - carry[0]);
692 b[NLSW128] = a[NLSW128] + b[NLSW128] + carry[0];
693
694 carry[0] = a[NMSW128] > (0xffffffff - b[NMSW128] - carry[1]);
695 b[NMSW128] = a[NMSW128] + b[NMSW128] + carry[1];
696
697 b[MSW128] = a[MSW128] + b[MSW128] + carry[0];
698}
699
700/* Note: assembler semantics: "b -= a" */
701static inline void sub128(const int128 a, int128 b)
702{
703 /* rotating borrow flags */
704 unsigned int borrow[2];
705
706 borrow[0] = b[LSW128] < a[LSW128];
707 b[LSW128] -= a[LSW128];
708
709 borrow[1] = b[NLSW128] < a[NLSW128] + borrow[0];
710 b[NLSW128] = b[NLSW128] - a[NLSW128] - borrow[0];
711
712 borrow[0] = b[NMSW128] < a[NMSW128] + borrow[1];
713 b[NMSW128] = b[NMSW128] - a[NMSW128] - borrow[1];
714
715 b[MSW128] = b[MSW128] - a[MSW128] - borrow[0];
716}
717
718/* Poor man's 64-bit expanding multiply */
719static inline void mul64(unsigned long long a, unsigned long long b, int128 c)
720{
721 unsigned long long acc;
722 int128 acc128;
723
724 zero128(acc128);
725 zero128(c);
726
727 /* first the low words */
728 if (LO_WORD(a) && LO_WORD(b)) {
729 acc = (long long) LO_WORD(a) * LO_WORD(b);
730 c[NLSW128] = HI_WORD(acc);
731 c[LSW128] = LO_WORD(acc);
732 }
733 /* Next the high words */
734 if (HI_WORD(a) && HI_WORD(b)) {
735 acc = (long long) HI_WORD(a) * HI_WORD(b);
736 c[MSW128] = HI_WORD(acc);
737 c[NMSW128] = LO_WORD(acc);
738 }
739 /* The middle words */
740 if (LO_WORD(a) && HI_WORD(b)) {
741 acc = (long long) LO_WORD(a) * HI_WORD(b);
742 acc128[NMSW128] = HI_WORD(acc);
743 acc128[NLSW128] = LO_WORD(acc);
744 add128(acc128, c);
745 }
746 /* The first and last words */
747 if (HI_WORD(a) && LO_WORD(b)) {
748 acc = (long long) HI_WORD(a) * LO_WORD(b);
749 acc128[NMSW128] = HI_WORD(acc);
750 acc128[NLSW128] = LO_WORD(acc);
751 add128(acc128, c);
752 }
753}
754
755/* Note: unsigned */
756static inline int cmp128(int128 a, int128 b)
757{
758 if (a[MSW128] < b[MSW128])
759 return -1;
760 if (a[MSW128] > b[MSW128])
761 return 1;
762 if (a[NMSW128] < b[NMSW128])
763 return -1;
764 if (a[NMSW128] > b[NMSW128])
765 return 1;
766 if (a[NLSW128] < b[NLSW128])
767 return -1;
768 if (a[NLSW128] > b[NLSW128])
769 return 1;
770
771 return (signed) a[LSW128] - b[LSW128];
772}
773
774inline void div128(int128 a, int128 b, int128 c)
775{
776 int128 mask;
777
778 /* Algorithm:
779
780 Shift the divisor until it's at least as big as the
781 dividend, keeping track of the position to which we've
782 shifted it, i.e. the power of 2 which we've multiplied it
783 by.
784
785 Then, for this power of 2 (the mask), and every one smaller
786 than it, subtract the mask from the dividend and add it to
787 the quotient until the dividend is smaller than the raised
788 divisor. At this point, divide the dividend and the mask
789 by 2 (i.e. shift one place to the right). Lather, rinse,
790 and repeat, until there are no more powers of 2 left. */
791
792 /* FIXME: needless to say, there's room for improvement here too. */
793
794 /* Shift up */
795 /* XXX: since it just has to be "at least as big", we can
796 probably eliminate this horribly wasteful loop. I will
797 have to prove this first, though */
798 set128(0, 0, 0, 1, mask);
799 while (cmp128(b, a) < 0 && !btsthi128(b)) {
800 lslone128(b);
801 lslone128(mask);
802 }
803
804 /* Shift down */
805 zero128(c);
806 do {
807 if (cmp128(a, b) >= 0) {
808 sub128(b, a);
809 add128(mask, c);
810 }
811 lsrone128(mask);
812 lsrone128(b);
813 } while (nonzerop128(mask));
814
815 /* The remainder is in a... */
816}
817#endif
818
819#endif /* MULTI_ARITH_H */