aboutsummaryrefslogtreecommitdiffstats
path: root/arch/m68k/math-emu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/m68k/math-emu')
-rw-r--r--arch/m68k/math-emu/multi_arith.h530
1 files changed, 0 insertions, 530 deletions
diff --git a/arch/m68k/math-emu/multi_arith.h b/arch/m68k/math-emu/multi_arith.h
index 4ad0ca918e2e..4b5eb3d85638 100644
--- a/arch/m68k/math-emu/multi_arith.h
+++ b/arch/m68k/math-emu/multi_arith.h
@@ -19,246 +19,6 @@
19#ifndef MULTI_ARITH_H 19#ifndef MULTI_ARITH_H
20#define MULTI_ARITH_H 20#define MULTI_ARITH_H
21 21
22#if 0 /* old code... */
23
24/* Unsigned only, because we don't need signs to multiply and divide. */
25typedef unsigned int int128[4];
26
27/* Word order */
28enum {
29 MSW128,
30 NMSW128,
31 NLSW128,
32 LSW128
33};
34
35/* big-endian */
36#define LO_WORD(ll) (((unsigned int *) &ll)[1])
37#define HI_WORD(ll) (((unsigned int *) &ll)[0])
38
39/* Convenience functions to stuff various integer values into int128s */
40
41static inline void zero128(int128 a)
42{
43 a[LSW128] = a[NLSW128] = a[NMSW128] = a[MSW128] = 0;
44}
45
46/* Human-readable word order in the arguments */
47static inline void set128(unsigned int i3, unsigned int i2, unsigned int i1,
48 unsigned int i0, int128 a)
49{
50 a[LSW128] = i0;
51 a[NLSW128] = i1;
52 a[NMSW128] = i2;
53 a[MSW128] = i3;
54}
55
56/* Convenience functions (for testing as well) */
57static inline void int64_to_128(unsigned long long src, int128 dest)
58{
59 dest[LSW128] = (unsigned int) src;
60 dest[NLSW128] = src >> 32;
61 dest[NMSW128] = dest[MSW128] = 0;
62}
63
64static inline void int128_to_64(const int128 src, unsigned long long *dest)
65{
66 *dest = src[LSW128] | (long long) src[NLSW128] << 32;
67}
68
69static inline void put_i128(const int128 a)
70{
71 printk("%08x %08x %08x %08x\n", a[MSW128], a[NMSW128],
72 a[NLSW128], a[LSW128]);
73}
74
75/* Internal shifters:
76
77 Note that these are only good for 0 < count < 32.
78 */
79
80static inline void _lsl128(unsigned int count, int128 a)
81{
82 a[MSW128] = (a[MSW128] << count) | (a[NMSW128] >> (32 - count));
83 a[NMSW128] = (a[NMSW128] << count) | (a[NLSW128] >> (32 - count));
84 a[NLSW128] = (a[NLSW128] << count) | (a[LSW128] >> (32 - count));
85 a[LSW128] <<= count;
86}
87
88static inline void _lsr128(unsigned int count, int128 a)
89{
90 a[LSW128] = (a[LSW128] >> count) | (a[NLSW128] << (32 - count));
91 a[NLSW128] = (a[NLSW128] >> count) | (a[NMSW128] << (32 - count));
92 a[NMSW128] = (a[NMSW128] >> count) | (a[MSW128] << (32 - count));
93 a[MSW128] >>= count;
94}
95
96/* Should be faster, one would hope */
97
98static inline void lslone128(int128 a)
99{
100 asm volatile ("lsl.l #1,%0\n"
101 "roxl.l #1,%1\n"
102 "roxl.l #1,%2\n"
103 "roxl.l #1,%3\n"
104 :
105 "=d" (a[LSW128]),
106 "=d"(a[NLSW128]),
107 "=d"(a[NMSW128]),
108 "=d"(a[MSW128])
109 :
110 "0"(a[LSW128]),
111 "1"(a[NLSW128]),
112 "2"(a[NMSW128]),
113 "3"(a[MSW128]));
114}
115
116static inline void lsrone128(int128 a)
117{
118 asm volatile ("lsr.l #1,%0\n"
119 "roxr.l #1,%1\n"
120 "roxr.l #1,%2\n"
121 "roxr.l #1,%3\n"
122 :
123 "=d" (a[MSW128]),
124 "=d"(a[NMSW128]),
125 "=d"(a[NLSW128]),
126 "=d"(a[LSW128])
127 :
128 "0"(a[MSW128]),
129 "1"(a[NMSW128]),
130 "2"(a[NLSW128]),
131 "3"(a[LSW128]));
132}
133
134/* Generalized 128-bit shifters:
135
136 These bit-shift to a multiple of 32, then move whole longwords. */
137
138static inline void lsl128(unsigned int count, int128 a)
139{
140 int wordcount, i;
141
142 if (count % 32)
143 _lsl128(count % 32, a);
144
145 if (0 == (wordcount = count / 32))
146 return;
147
148 /* argh, gak, endian-sensitive */
149 for (i = 0; i < 4 - wordcount; i++) {
150 a[i] = a[i + wordcount];
151 }
152 for (i = 3; i >= 4 - wordcount; --i) {
153 a[i] = 0;
154 }
155}
156
157static inline void lsr128(unsigned int count, int128 a)
158{
159 int wordcount, i;
160
161 if (count % 32)
162 _lsr128(count % 32, a);
163
164 if (0 == (wordcount = count / 32))
165 return;
166
167 for (i = 3; i >= wordcount; --i) {
168 a[i] = a[i - wordcount];
169 }
170 for (i = 0; i < wordcount; i++) {
171 a[i] = 0;
172 }
173}
174
175static inline int orl128(int a, int128 b)
176{
177 b[LSW128] |= a;
178}
179
180static inline int btsthi128(const int128 a)
181{
182 return a[MSW128] & 0x80000000;
183}
184
185/* test bits (numbered from 0 = LSB) up to and including "top" */
186static inline int bftestlo128(int top, const int128 a)
187{
188 int r = 0;
189
190 if (top > 31)
191 r |= a[LSW128];
192 if (top > 63)
193 r |= a[NLSW128];
194 if (top > 95)
195 r |= a[NMSW128];
196
197 r |= a[3 - (top / 32)] & ((1 << (top % 32 + 1)) - 1);
198
199 return (r != 0);
200}
201
202/* Aargh. We need these because GCC is broken */
203/* FIXME: do them in assembly, for goodness' sake! */
204static inline void mask64(int pos, unsigned long long *mask)
205{
206 *mask = 0;
207
208 if (pos < 32) {
209 LO_WORD(*mask) = (1 << pos) - 1;
210 return;
211 }
212 LO_WORD(*mask) = -1;
213 HI_WORD(*mask) = (1 << (pos - 32)) - 1;
214}
215
216static inline void bset64(int pos, unsigned long long *dest)
217{
218 /* This conditional will be optimized away. Thanks, GCC! */
219 if (pos < 32)
220 asm volatile ("bset %1,%0":"=m"
221 (LO_WORD(*dest)):"id"(pos));
222 else
223 asm volatile ("bset %1,%0":"=m"
224 (HI_WORD(*dest)):"id"(pos - 32));
225}
226
227static inline int btst64(int pos, unsigned long long dest)
228{
229 if (pos < 32)
230 return (0 != (LO_WORD(dest) & (1 << pos)));
231 else
232 return (0 != (HI_WORD(dest) & (1 << (pos - 32))));
233}
234
235static inline void lsl64(int count, unsigned long long *dest)
236{
237 if (count < 32) {
238 HI_WORD(*dest) = (HI_WORD(*dest) << count)
239 | (LO_WORD(*dest) >> count);
240 LO_WORD(*dest) <<= count;
241 return;
242 }
243 count -= 32;
244 HI_WORD(*dest) = LO_WORD(*dest) << count;
245 LO_WORD(*dest) = 0;
246}
247
248static inline void lsr64(int count, unsigned long long *dest)
249{
250 if (count < 32) {
251 LO_WORD(*dest) = (LO_WORD(*dest) >> count)
252 | (HI_WORD(*dest) << (32 - count));
253 HI_WORD(*dest) >>= count;
254 return;
255 }
256 count -= 32;
257 LO_WORD(*dest) = HI_WORD(*dest) >> count;
258 HI_WORD(*dest) = 0;
259}
260#endif
261
262static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt) 22static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
263{ 23{
264 reg->exp += cnt; 24 reg->exp += cnt;
@@ -481,117 +241,6 @@ static inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src,
481 } 241 }
482} 242}
483 243
484#if 0
485static inline unsigned int fp_fls128(union fp_mant128 *src)
486{
487 unsigned long data;
488 unsigned int res, off;
489
490 if ((data = src->m32[0]))
491 off = 0;
492 else if ((data = src->m32[1]))
493 off = 32;
494 else if ((data = src->m32[2]))
495 off = 64;
496 else if ((data = src->m32[3]))
497 off = 96;
498 else
499 return 128;
500
501 asm ("bfffo %1{#0,#32},%0" : "=d" (res) : "dm" (data));
502 return res + off;
503}
504
505static inline void fp_shiftmant128(union fp_mant128 *src, int shift)
506{
507 unsigned long sticky;
508
509 switch (shift) {
510 case 0:
511 return;
512 case 1:
513 asm volatile ("lsl.l #1,%0"
514 : "=d" (src->m32[3]) : "0" (src->m32[3]));
515 asm volatile ("roxl.l #1,%0"
516 : "=d" (src->m32[2]) : "0" (src->m32[2]));
517 asm volatile ("roxl.l #1,%0"
518 : "=d" (src->m32[1]) : "0" (src->m32[1]));
519 asm volatile ("roxl.l #1,%0"
520 : "=d" (src->m32[0]) : "0" (src->m32[0]));
521 return;
522 case 2 ... 31:
523 src->m32[0] = (src->m32[0] << shift) | (src->m32[1] >> (32 - shift));
524 src->m32[1] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
525 src->m32[2] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
526 src->m32[3] = (src->m32[3] << shift);
527 return;
528 case 32 ... 63:
529 shift -= 32;
530 src->m32[0] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
531 src->m32[1] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
532 src->m32[2] = (src->m32[3] << shift);
533 src->m32[3] = 0;
534 return;
535 case 64 ... 95:
536 shift -= 64;
537 src->m32[0] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
538 src->m32[1] = (src->m32[3] << shift);
539 src->m32[2] = src->m32[3] = 0;
540 return;
541 case 96 ... 127:
542 shift -= 96;
543 src->m32[0] = (src->m32[3] << shift);
544 src->m32[1] = src->m32[2] = src->m32[3] = 0;
545 return;
546 case -31 ... -1:
547 shift = -shift;
548 sticky = 0;
549 if (src->m32[3] << (32 - shift))
550 sticky = 1;
551 src->m32[3] = (src->m32[3] >> shift) | (src->m32[2] << (32 - shift)) | sticky;
552 src->m32[2] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift));
553 src->m32[1] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
554 src->m32[0] = (src->m32[0] >> shift);
555 return;
556 case -63 ... -32:
557 shift = -shift - 32;
558 sticky = 0;
559 if ((src->m32[2] << (32 - shift)) || src->m32[3])
560 sticky = 1;
561 src->m32[3] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift)) | sticky;
562 src->m32[2] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
563 src->m32[1] = (src->m32[0] >> shift);
564 src->m32[0] = 0;
565 return;
566 case -95 ... -64:
567 shift = -shift - 64;
568 sticky = 0;
569 if ((src->m32[1] << (32 - shift)) || src->m32[2] || src->m32[3])
570 sticky = 1;
571 src->m32[3] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift)) | sticky;
572 src->m32[2] = (src->m32[0] >> shift);
573 src->m32[1] = src->m32[0] = 0;
574 return;
575 case -127 ... -96:
576 shift = -shift - 96;
577 sticky = 0;
578 if ((src->m32[0] << (32 - shift)) || src->m32[1] || src->m32[2] || src->m32[3])
579 sticky = 1;
580 src->m32[3] = (src->m32[0] >> shift) | sticky;
581 src->m32[2] = src->m32[1] = src->m32[0] = 0;
582 return;
583 }
584
585 if (shift < 0 && (src->m32[0] || src->m32[1] || src->m32[2] || src->m32[3]))
586 src->m32[3] = 1;
587 else
588 src->m32[3] = 0;
589 src->m32[2] = 0;
590 src->m32[1] = 0;
591 src->m32[0] = 0;
592}
593#endif
594
595static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src, 244static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
596 int shift) 245 int shift)
597{ 246{
@@ -637,183 +286,4 @@ static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
637 } 286 }
638} 287}
639 288
640#if 0 /* old code... */
641static inline int fls(unsigned int a)
642{
643 int r;
644
645 asm volatile ("bfffo %1{#0,#32},%0"
646 : "=d" (r) : "md" (a));
647 return r;
648}
649
650/* fls = "find last set" (cf. ffs(3)) */
651static inline int fls128(const int128 a)
652{
653 if (a[MSW128])
654 return fls(a[MSW128]);
655 if (a[NMSW128])
656 return fls(a[NMSW128]) + 32;
657 /* XXX: it probably never gets beyond this point in actual
658 use, but that's indicative of a more general problem in the
659 algorithm (i.e. as per the actual 68881 implementation, we
660 really only need at most 67 bits of precision [plus
661 overflow]) so I'm not going to fix it. */
662 if (a[NLSW128])
663 return fls(a[NLSW128]) + 64;
664 if (a[LSW128])
665 return fls(a[LSW128]) + 96;
666 else
667 return -1;
668}
669
670static inline int zerop128(const int128 a)
671{
672 return !(a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
673}
674
675static inline int nonzerop128(const int128 a)
676{
677 return (a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
678}
679
680/* Addition and subtraction */
681/* Do these in "pure" assembly, because "extended" asm is unmanageable
682 here */
683static inline void add128(const int128 a, int128 b)
684{
685 /* rotating carry flags */
686 unsigned int carry[2];
687
688 carry[0] = a[LSW128] > (0xffffffff - b[LSW128]);
689 b[LSW128] += a[LSW128];
690
691 carry[1] = a[NLSW128] > (0xffffffff - b[NLSW128] - carry[0]);
692 b[NLSW128] = a[NLSW128] + b[NLSW128] + carry[0];
693
694 carry[0] = a[NMSW128] > (0xffffffff - b[NMSW128] - carry[1]);
695 b[NMSW128] = a[NMSW128] + b[NMSW128] + carry[1];
696
697 b[MSW128] = a[MSW128] + b[MSW128] + carry[0];
698}
699
700/* Note: assembler semantics: "b -= a" */
701static inline void sub128(const int128 a, int128 b)
702{
703 /* rotating borrow flags */
704 unsigned int borrow[2];
705
706 borrow[0] = b[LSW128] < a[LSW128];
707 b[LSW128] -= a[LSW128];
708
709 borrow[1] = b[NLSW128] < a[NLSW128] + borrow[0];
710 b[NLSW128] = b[NLSW128] - a[NLSW128] - borrow[0];
711
712 borrow[0] = b[NMSW128] < a[NMSW128] + borrow[1];
713 b[NMSW128] = b[NMSW128] - a[NMSW128] - borrow[1];
714
715 b[MSW128] = b[MSW128] - a[MSW128] - borrow[0];
716}
717
718/* Poor man's 64-bit expanding multiply */
719static inline void mul64(unsigned long long a, unsigned long long b, int128 c)
720{
721 unsigned long long acc;
722 int128 acc128;
723
724 zero128(acc128);
725 zero128(c);
726
727 /* first the low words */
728 if (LO_WORD(a) && LO_WORD(b)) {
729 acc = (long long) LO_WORD(a) * LO_WORD(b);
730 c[NLSW128] = HI_WORD(acc);
731 c[LSW128] = LO_WORD(acc);
732 }
733 /* Next the high words */
734 if (HI_WORD(a) && HI_WORD(b)) {
735 acc = (long long) HI_WORD(a) * HI_WORD(b);
736 c[MSW128] = HI_WORD(acc);
737 c[NMSW128] = LO_WORD(acc);
738 }
739 /* The middle words */
740 if (LO_WORD(a) && HI_WORD(b)) {
741 acc = (long long) LO_WORD(a) * HI_WORD(b);
742 acc128[NMSW128] = HI_WORD(acc);
743 acc128[NLSW128] = LO_WORD(acc);
744 add128(acc128, c);
745 }
746 /* The first and last words */
747 if (HI_WORD(a) && LO_WORD(b)) {
748 acc = (long long) HI_WORD(a) * LO_WORD(b);
749 acc128[NMSW128] = HI_WORD(acc);
750 acc128[NLSW128] = LO_WORD(acc);
751 add128(acc128, c);
752 }
753}
754
755/* Note: unsigned */
756static inline int cmp128(int128 a, int128 b)
757{
758 if (a[MSW128] < b[MSW128])
759 return -1;
760 if (a[MSW128] > b[MSW128])
761 return 1;
762 if (a[NMSW128] < b[NMSW128])
763 return -1;
764 if (a[NMSW128] > b[NMSW128])
765 return 1;
766 if (a[NLSW128] < b[NLSW128])
767 return -1;
768 if (a[NLSW128] > b[NLSW128])
769 return 1;
770
771 return (signed) a[LSW128] - b[LSW128];
772}
773
774inline void div128(int128 a, int128 b, int128 c)
775{
776 int128 mask;
777
778 /* Algorithm:
779
780 Shift the divisor until it's at least as big as the
781 dividend, keeping track of the position to which we've
782 shifted it, i.e. the power of 2 which we've multiplied it
783 by.
784
785 Then, for this power of 2 (the mask), and every one smaller
786 than it, subtract the mask from the dividend and add it to
787 the quotient until the dividend is smaller than the raised
788 divisor. At this point, divide the dividend and the mask
789 by 2 (i.e. shift one place to the right). Lather, rinse,
790 and repeat, until there are no more powers of 2 left. */
791
792 /* FIXME: needless to say, there's room for improvement here too. */
793
794 /* Shift up */
795 /* XXX: since it just has to be "at least as big", we can
796 probably eliminate this horribly wasteful loop. I will
797 have to prove this first, though */
798 set128(0, 0, 0, 1, mask);
799 while (cmp128(b, a) < 0 && !btsthi128(b)) {
800 lslone128(b);
801 lslone128(mask);
802 }
803
804 /* Shift down */
805 zero128(c);
806 do {
807 if (cmp128(a, b) >= 0) {
808 sub128(b, a);
809 add128(mask, c);
810 }
811 lsrone128(mask);
812 lsrone128(b);
813 } while (nonzerop128(mask));
814
815 /* The remainder is in a... */
816}
817#endif
818
819#endif /* MULTI_ARITH_H */ 289#endif /* MULTI_ARITH_H */