m68k/math-emu: Remove commented out old code

It's been unused for ages, and contains bugs (e.g. incorrect shifts in lsl64()). Reported-by: Jonathan Elchison <jelchison@gmail.com> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
author: Geert Uytterhoeven <geert@linux-m68k.org> 2011-06-12 05:01:21 -0400
committer: Geert Uytterhoeven <geert@linux-m68k.org> 2011-07-30 15:21:40 -0400
commit: ffe6c42aa3f731f8707e49c39b4b37310ed363e9 (patch)
tree: 5feac6358f9bddf949122074f939f3106de24b5f /arch/m68k/math-emu
parent: b2cb92417d301f46801695243df5061a9bd31dd5 (diff)
1 files changed, 0 insertions, 530 deletions
diff --git a/arch/m68k/math-emu/multi_arith.h b/arch/m68k/math-emu/multi_arith.h
index 4ad0ca918e2e..4b5eb3d85638 100644
--- a/arch/m68k/math-emu/multi_arith.h
+++ b/arch/m68k/math-emu/multi_arith.h
@@ -19,246 +19,6 @@
 #ifndef MULTI_ARITH_H
 #define MULTI_ARITH_H
-#if 0   /* old code... */
-/* Unsigned only, because we don't need signs to multiply and divide. */
-typedef unsigned int int128[4];
-/* Word order */
-enum {
-        MSW128,
-        NMSW128,
-        NLSW128,
-        LSW128
-};
-/* big-endian */
-#define LO_WORD(ll) (((unsigned int *) &ll)[1])
-#define HI_WORD(ll) (((unsigned int *) &ll)[0])
-/* Convenience functions to stuff various integer values into int128s */
-static inline void zero128(int128 a)
-{
-        a[LSW128] = a[NLSW128] = a[NMSW128] = a[MSW128] = 0;
-}
-/* Human-readable word order in the arguments */
-static inline void set128(unsigned int i3, unsigned int i2, unsigned int i1,
-                          unsigned int i0, int128 a)
-{
-        a[LSW128] = i0;
-        a[NLSW128] = i1;
-        a[NMSW128] = i2;
-        a[MSW128] = i3;
-}
-/* Convenience functions (for testing as well) */
-static inline void int64_to_128(unsigned long long src, int128 dest)
-{
-        dest[LSW128] = (unsigned int) src;
-        dest[NLSW128] = src >> 32;
-        dest[NMSW128] = dest[MSW128] = 0;
-}
-static inline void int128_to_64(const int128 src, unsigned long long *dest)
-{
-        *dest = src[LSW128] | (long long) src[NLSW128] << 32;
-}
-static inline void put_i128(const int128 a)
-{
-        printk("%08x %08x %08x %08x\n", a[MSW128], a[NMSW128],
-               a[NLSW128], a[LSW128]);
-}
-/* Internal shifters:
-   Note that these are only good for 0 < count < 32.
- */
-static inline void _lsl128(unsigned int count, int128 a)
-{
-        a[MSW128] = (a[MSW128] << count) | (a[NMSW128] >> (32 - count));
-        a[NMSW128] = (a[NMSW128] << count) | (a[NLSW128] >> (32 - count));
-        a[NLSW128] = (a[NLSW128] << count) | (a[LSW128] >> (32 - count));
-        a[LSW128] <<= count;
-}
-static inline void _lsr128(unsigned int count, int128 a)
-{
-        a[LSW128] = (a[LSW128] >> count) | (a[NLSW128] << (32 - count));
-        a[NLSW128] = (a[NLSW128] >> count) | (a[NMSW128] << (32 - count));
-        a[NMSW128] = (a[NMSW128] >> count) | (a[MSW128] << (32 - count));
-        a[MSW128] >>= count;
-}
-/* Should be faster, one would hope */
-static inline void lslone128(int128 a)
-{
-        asm volatile ("lsl.l #1,%0\n"
-                      "roxl.l #1,%1\n"
-                      "roxl.l #1,%2\n"
-                      "roxl.l #1,%3\n"
-                      :
-                      "=d" (a[LSW128]),
-                      "=d"(a[NLSW128]),
-                      "=d"(a[NMSW128]),
-                      "=d"(a[MSW128])
-                      :
-                      "0"(a[LSW128]),
-                      "1"(a[NLSW128]),
-                      "2"(a[NMSW128]),
-                      "3"(a[MSW128]));
-}
-static inline void lsrone128(int128 a)
-{
-        asm volatile ("lsr.l #1,%0\n"
-                      "roxr.l #1,%1\n"
-                      "roxr.l #1,%2\n"
-                      "roxr.l #1,%3\n"
-                      :
-                      "=d" (a[MSW128]),
-                      "=d"(a[NMSW128]),
-                      "=d"(a[NLSW128]),
-                      "=d"(a[LSW128])
-                      :
-                      "0"(a[MSW128]),
-                      "1"(a[NMSW128]),
-                      "2"(a[NLSW128]),
-                      "3"(a[LSW128]));
-}
-/* Generalized 128-bit shifters:
-   These bit-shift to a multiple of 32, then move whole longwords.  */
-static inline void lsl128(unsigned int count, int128 a)
-{
-        int wordcount, i;
-        if (count % 32)
-                _lsl128(count % 32, a);
-        if (0 == (wordcount = count / 32))
-                return;
-        /* argh, gak, endian-sensitive */
-        for (i = 0; i < 4 - wordcount; i++) {
-                a[i] = a[i + wordcount];
-        }
-        for (i = 3; i >= 4 - wordcount; --i) {
-                a[i] = 0;
-        }
-}
-static inline void lsr128(unsigned int count, int128 a)
-{
-        int wordcount, i;
-        if (count % 32)
-                _lsr128(count % 32, a);
-        if (0 == (wordcount = count / 32))
-                return;
-        for (i = 3; i >= wordcount; --i) {
-                a[i] = a[i - wordcount];
-        }
-        for (i = 0; i < wordcount; i++) {
-                a[i] = 0;
-        }
-}
-static inline int orl128(int a, int128 b)
-{
-        b[LSW128] |= a;
-}
-static inline int btsthi128(const int128 a)
-{
-        return a[MSW128] & 0x80000000;
-}
-/* test bits (numbered from 0 = LSB) up to and including "top" */
-static inline int bftestlo128(int top, const int128 a)
-{
-        int r = 0;
-        if (top > 31)
-                r |= a[LSW128];
-        if (top > 63)
-                r |= a[NLSW128];
-        if (top > 95)
-                r |= a[NMSW128];
-        r |= a[3 - (top / 32)] & ((1 << (top % 32 + 1)) - 1);
-        return (r != 0);
-}
-/* Aargh.  We need these because GCC is broken */
-/* FIXME: do them in assembly, for goodness' sake! */
-static inline void mask64(int pos, unsigned long long *mask)
-{
-        *mask = 0;
-        if (pos < 32) {
-                LO_WORD(*mask) = (1 << pos) - 1;
-                return;
-        }
-        LO_WORD(*mask) = -1;
-        HI_WORD(*mask) = (1 << (pos - 32)) - 1;
-}
-static inline void bset64(int pos, unsigned long long *dest)
-{
-        /* This conditional will be optimized away.  Thanks, GCC! */
-        if (pos < 32)
-                asm volatile ("bset %1,%0":"=m"
-                              (LO_WORD(*dest)):"id"(pos));
-        else
-                asm volatile ("bset %1,%0":"=m"
-                              (HI_WORD(*dest)):"id"(pos - 32));
-}
-static inline int btst64(int pos, unsigned long long dest)
-{
-        if (pos < 32)
-                return (0 != (LO_WORD(dest) & (1 << pos)));
-        else
-                return (0 != (HI_WORD(dest) & (1 << (pos - 32))));
-}
-static inline void lsl64(int count, unsigned long long *dest)
-{
-        if (count < 32) {
-                HI_WORD(*dest) = (HI_WORD(*dest) << count)
-                    | (LO_WORD(*dest) >> count);
-                LO_WORD(*dest) <<= count;
-                return;
-        }
-        count -= 32;
-        HI_WORD(*dest) = LO_WORD(*dest) << count;
-        LO_WORD(*dest) = 0;
-}
-static inline void lsr64(int count, unsigned long long *dest)
-{
-        if (count < 32) {
-                LO_WORD(*dest) = (LO_WORD(*dest) >> count)
-                    | (HI_WORD(*dest) << (32 - count));
-                HI_WORD(*dest) >>= count;
-                return;
-        }
-        count -= 32;
-        LO_WORD(*dest) = HI_WORD(*dest) >> count;
-        HI_WORD(*dest) = 0;
-}
-#endif
 static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
 {
        reg->exp += cnt;
@@ -481,117 +241,6 @@ static inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src,
        }
 }
-#if 0
-static inline unsigned int fp_fls128(union fp_mant128 *src)
-{
-        unsigned long data;
-        unsigned int res, off;
-        if ((data = src->m32[0]))
-                off = 0;
-        else if ((data = src->m32[1]))
-                off = 32;
-        else if ((data = src->m32[2]))
-                off = 64;
-        else if ((data = src->m32[3]))
-                off = 96;
-        else
-                return 128;
-        asm ("bfffo %1{#0,#32},%0" : "=d" (res) : "dm" (data));
-        return res + off;
-}
-static inline void fp_shiftmant128(union fp_mant128 *src, int shift)
-{
-        unsigned long sticky;
-        switch (shift) {
-        case 0:
-                return;
-        case 1:
-                asm volatile ("lsl.l #1,%0"
-                        : "=d" (src->m32[3]) : "0" (src->m32[3]));
-                asm volatile ("roxl.l #1,%0"
-                        : "=d" (src->m32[2]) : "0" (src->m32[2]));
-                asm volatile ("roxl.l #1,%0"
-                        : "=d" (src->m32[1]) : "0" (src->m32[1]));
-                asm volatile ("roxl.l #1,%0"
-                        : "=d" (src->m32[0]) : "0" (src->m32[0]));
-                return;
-        case 2 ... 31:
-                src->m32[0] = (src->m32[0] << shift) | (src->m32[1] >> (32 - shift));
-                src->m32[1] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
-                src->m32[2] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
-                src->m32[3] = (src->m32[3] << shift);
-                return;
-        case 32 ... 63:
-                shift -= 32;
-                src->m32[0] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
-                src->m32[1] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
-                src->m32[2] = (src->m32[3] << shift);
-                src->m32[3] = 0;
-                return;
-        case 64 ... 95:
-                shift -= 64;
-                src->m32[0] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
-                src->m32[1] = (src->m32[3] << shift);
-                src->m32[2] = src->m32[3] = 0;
-                return;
-        case 96 ... 127:
-                shift -= 96;
-                src->m32[0] = (src->m32[3] << shift);
-                src->m32[1] = src->m32[2] = src->m32[3] = 0;
-                return;
-        case -31 ... -1:
-                shift = -shift;
-                sticky = 0;
-                if (src->m32[3] << (32 - shift))
-                        sticky = 1;
-                src->m32[3] = (src->m32[3] >> shift) | (src->m32[2] << (32 - shift)) | sticky;
-                src->m32[2] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift));
-                src->m32[1] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
-                src->m32[0] = (src->m32[0] >> shift);
-                return;
-        case -63 ... -32:
-                shift = -shift - 32;
-                sticky = 0;
-                if ((src->m32[2] << (32 - shift)) || src->m32[3])
-                        sticky = 1;
-                src->m32[3] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift)) | sticky;
-                src->m32[2] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
-                src->m32[1] = (src->m32[0] >> shift);
-                src->m32[0] = 0;
-                return;
-        case -95 ... -64:
-                shift = -shift - 64;
-                sticky = 0;
-                if ((src->m32[1] << (32 - shift)) || src->m32[2] || src->m32[3])
-                        sticky = 1;
-                src->m32[3] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift)) | sticky;
-                src->m32[2] = (src->m32[0] >> shift);
-                src->m32[1] = src->m32[0] = 0;
-                return;
-        case -127 ... -96:
-                shift = -shift - 96;
-                sticky = 0;
-                if ((src->m32[0] << (32 - shift)) || src->m32[1] || src->m32[2] || src->m32[3])
-                        sticky = 1;
-                src->m32[3] = (src->m32[0] >> shift) | sticky;
-                src->m32[2] = src->m32[1] = src->m32[0] = 0;
-                return;
-        }
-        if (shift < 0 && (src->m32[0] || src->m32[1] || src->m32[2] || src->m32[3]))
-                src->m32[3] = 1;
-        else
-                src->m32[3] = 0;
-        src->m32[2] = 0;
-        src->m32[1] = 0;
-        src->m32[0] = 0;
-}
-#endif
 static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
                                 int shift)
 {
@@ -637,183 +286,4 @@ static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
        }
 }
-#if 0 /* old code... */
-static inline int fls(unsigned int a)
-{
-        int r;
-        asm volatile ("bfffo %1{#0,#32},%0"
-                      : "=d" (r) : "md" (a));
-        return r;
-}
-/* fls = "find last set" (cf. ffs(3)) */
-static inline int fls128(const int128 a)
-{
-        if (a[MSW128])
-                return fls(a[MSW128]);
-        if (a[NMSW128])
-                return fls(a[NMSW128]) + 32;
-        /* XXX: it probably never gets beyond this point in actual
-           use, but that's indicative of a more general problem in the
-           algorithm (i.e. as per the actual 68881 implementation, we
-           really only need at most 67 bits of precision [plus
-           overflow]) so I'm not going to fix it. */
-        if (a[NLSW128])
-                return fls(a[NLSW128]) + 64;
-        if (a[LSW128])
-                return fls(a[LSW128]) + 96;
-        else
-                return -1;
-}
-static inline int zerop128(const int128 a)
-{
-        return !(a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
-}
-static inline int nonzerop128(const int128 a)
-{
-        return (a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
-}
-/* Addition and subtraction */
-/* Do these in "pure" assembly, because "extended" asm is unmanageable
-   here */
-static inline void add128(const int128 a, int128 b)
-{
-        /* rotating carry flags */
-        unsigned int carry[2];
-        carry[0] = a[LSW128] > (0xffffffff - b[LSW128]);
-        b[LSW128] += a[LSW128];
-        carry[1] = a[NLSW128] > (0xffffffff - b[NLSW128] - carry[0]);
-        b[NLSW128] = a[NLSW128] + b[NLSW128] + carry[0];
-        carry[0] = a[NMSW128] > (0xffffffff - b[NMSW128] - carry[1]);
-        b[NMSW128] = a[NMSW128] + b[NMSW128] + carry[1];
-        b[MSW128] = a[MSW128] + b[MSW128] + carry[0];
-}
-/* Note: assembler semantics: "b -= a" */
-static inline void sub128(const int128 a, int128 b)
-{
-        /* rotating borrow flags */
-        unsigned int borrow[2];
-        borrow[0] = b[LSW128] < a[LSW128];
-        b[LSW128] -= a[LSW128];
-        borrow[1] = b[NLSW128] < a[NLSW128] + borrow[0];
-        b[NLSW128] = b[NLSW128] - a[NLSW128] - borrow[0];
-        borrow[0] = b[NMSW128] < a[NMSW128] + borrow[1];
-        b[NMSW128] = b[NMSW128] - a[NMSW128] - borrow[1];
-        b[MSW128] = b[MSW128] - a[MSW128] - borrow[0];
-}
-/* Poor man's 64-bit expanding multiply */
-static inline void mul64(unsigned long long a, unsigned long long b, int128 c)
-{
-        unsigned long long acc;
-        int128 acc128;
-        zero128(acc128);
-        zero128(c);
-        /* first the low words */
-        if (LO_WORD(a) && LO_WORD(b)) {
-                acc = (long long) LO_WORD(a) * LO_WORD(b);
-                c[NLSW128] = HI_WORD(acc);
-                c[LSW128] = LO_WORD(acc);
-        }
-        /* Next the high words */
-        if (HI_WORD(a) && HI_WORD(b)) {
-                acc = (long long) HI_WORD(a) * HI_WORD(b);
-                c[MSW128] = HI_WORD(acc);
-                c[NMSW128] = LO_WORD(acc);
-        }
-        /* The middle words */
-        if (LO_WORD(a) && HI_WORD(b)) {
-                acc = (long long) LO_WORD(a) * HI_WORD(b);
-                acc128[NMSW128] = HI_WORD(acc);
-                acc128[NLSW128] = LO_WORD(acc);
-                add128(acc128, c);
-        }
-        /* The first and last words */
-        if (HI_WORD(a) && LO_WORD(b)) {
-                acc = (long long) HI_WORD(a) * LO_WORD(b);
-                acc128[NMSW128] = HI_WORD(acc);
-                acc128[NLSW128] = LO_WORD(acc);
-                add128(acc128, c);
-        }
-}
-/* Note: unsigned */
-static inline int cmp128(int128 a, int128 b)
-{
-        if (a[MSW128] < b[MSW128])
-                return -1;
-        if (a[MSW128] > b[MSW128])
-                return 1;
-        if (a[NMSW128] < b[NMSW128])
-                return -1;
-        if (a[NMSW128] > b[NMSW128])
-                return 1;
-        if (a[NLSW128] < b[NLSW128])
-                return -1;
-        if (a[NLSW128] > b[NLSW128])
-                return 1;
-        return (signed) a[LSW128] - b[LSW128];
-}
-inline void div128(int128 a, int128 b, int128 c)
-{
-        int128 mask;
-        /* Algorithm:
-           Shift the divisor until it's at least as big as the
-           dividend, keeping track of the position to which we've
-           shifted it, i.e. the power of 2 which we've multiplied it
-           by.
-           Then, for this power of 2 (the mask), and every one smaller
-           than it, subtract the mask from the dividend and add it to
-           the quotient until the dividend is smaller than the raised
-           divisor.  At this point, divide the dividend and the mask
-           by 2 (i.e. shift one place to the right).  Lather, rinse,
-           and repeat, until there are no more powers of 2 left. */
-        /* FIXME: needless to say, there's room for improvement here too. */
-        /* Shift up */
-        /* XXX: since it just has to be "at least as big", we can
-           probably eliminate this horribly wasteful loop.  I will
-           have to prove this first, though */
-        set128(0, 0, 0, 1, mask);
-        while (cmp128(b, a) < 0 && !btsthi128(b)) {
-                lslone128(b);
-                lslone128(mask);
-        }
-        /* Shift down */
-        zero128(c);
-        do {
-                if (cmp128(a, b) >= 0) {
-                        sub128(b, a);
-                        add128(mask, c);
-                }
-                lsrone128(mask);
-                lsrone128(b);
-        } while (nonzerop128(mask));
-        /* The remainder is in a... */
-}
-#endif
 #endif  /* MULTI_ARITH_H */
author	Geert Uytterhoeven <geert@linux-m68k.org>	2011-06-12 05:01:21 -0400
committer	Geert Uytterhoeven <geert@linux-m68k.org>	2011-07-30 15:21:40 -0400
commit	ffe6c42aa3f731f8707e49c39b4b37310ed363e9 (patch)
tree	5feac6358f9bddf949122074f939f3106de24b5f /arch/m68k/math-emu
parent	b2cb92417d301f46801695243df5061a9bd31dd5 (diff)

diff --git a/arch/m68k/math-emu/multi_arith.h b/arch/m68k/math-emu/multi_arith.h index 4ad0ca918e2e..4b5eb3d85638 100644 --- a/arch/m68k/math-emu/multi_arith.h +++ b/arch/m68k/math-emu/multi_arith.h
@@ -19,246 +19,6 @@
19	#ifndef MULTI_ARITH_H	19	#ifndef MULTI_ARITH_H
20	#define MULTI_ARITH_H	20	#define MULTI_ARITH_H
21		21
22	#if 0 /* old code... */
23
24	/* Unsigned only, because we don't need signs to multiply and divide. */
25	typedef unsigned int int128[4];
26
27	/* Word order */
28	enum {
29	MSW128,
30	NMSW128,
31	NLSW128,
32	LSW128
33	};
34
35	/* big-endian */
36	#define LO_WORD(ll) (((unsigned int *) &ll)[1])
37	#define HI_WORD(ll) (((unsigned int *) &ll)[0])
38
39	/* Convenience functions to stuff various integer values into int128s */
40
41	static inline void zero128(int128 a)
42	{
43	a[LSW128] = a[NLSW128] = a[NMSW128] = a[MSW128] = 0;
44	}
45
46	/* Human-readable word order in the arguments */
47	static inline void set128(unsigned int i3, unsigned int i2, unsigned int i1,
48	unsigned int i0, int128 a)
49	{
50	a[LSW128] = i0;
51	a[NLSW128] = i1;
52	a[NMSW128] = i2;
53	a[MSW128] = i3;
54	}
55
56	/* Convenience functions (for testing as well) */
57	static inline void int64_to_128(unsigned long long src, int128 dest)
58	{
59	dest[LSW128] = (unsigned int) src;
60	dest[NLSW128] = src >> 32;
61	dest[NMSW128] = dest[MSW128] = 0;
62	}
63
64	static inline void int128_to_64(const int128 src, unsigned long long *dest)
65	{
66	*dest = src[LSW128] \| (long long) src[NLSW128] << 32;
67	}
68
69	static inline void put_i128(const int128 a)
70	{
71	printk("%08x %08x %08x %08x\n", a[MSW128], a[NMSW128],
72	a[NLSW128], a[LSW128]);
73	}
74
75	/* Internal shifters:
76
77	Note that these are only good for 0 < count < 32.
78	*/
79
80	static inline void _lsl128(unsigned int count, int128 a)
81	{
82	a[MSW128] = (a[MSW128] << count) \| (a[NMSW128] >> (32 - count));
83	a[NMSW128] = (a[NMSW128] << count) \| (a[NLSW128] >> (32 - count));
84	a[NLSW128] = (a[NLSW128] << count) \| (a[LSW128] >> (32 - count));
85	a[LSW128] <<= count;
86	}
87
88	static inline void _lsr128(unsigned int count, int128 a)
89	{
90	a[LSW128] = (a[LSW128] >> count) \| (a[NLSW128] << (32 - count));
91	a[NLSW128] = (a[NLSW128] >> count) \| (a[NMSW128] << (32 - count));
92	a[NMSW128] = (a[NMSW128] >> count) \| (a[MSW128] << (32 - count));
93	a[MSW128] >>= count;
94	}
95
96	/* Should be faster, one would hope */
97
98	static inline void lslone128(int128 a)
99	{
100	asm volatile ("lsl.l #1,%0\n"
101	"roxl.l #1,%1\n"
102	"roxl.l #1,%2\n"
103	"roxl.l #1,%3\n"
104	:
105	"=d" (a[LSW128]),
106	"=d"(a[NLSW128]),
107	"=d"(a[NMSW128]),
108	"=d"(a[MSW128])
109	:
110	"0"(a[LSW128]),
111	"1"(a[NLSW128]),
112	"2"(a[NMSW128]),
113	"3"(a[MSW128]));
114	}
115
116	static inline void lsrone128(int128 a)
117	{
118	asm volatile ("lsr.l #1,%0\n"
119	"roxr.l #1,%1\n"
120	"roxr.l #1,%2\n"
121	"roxr.l #1,%3\n"
122	:
123	"=d" (a[MSW128]),
124	"=d"(a[NMSW128]),
125	"=d"(a[NLSW128]),
126	"=d"(a[LSW128])
127	:
128	"0"(a[MSW128]),
129	"1"(a[NMSW128]),
130	"2"(a[NLSW128]),
131	"3"(a[LSW128]));
132	}
133
134	/* Generalized 128-bit shifters:
135
136	These bit-shift to a multiple of 32, then move whole longwords. */
137
138	static inline void lsl128(unsigned int count, int128 a)
139	{
140	int wordcount, i;
141
142	if (count % 32)
143	_lsl128(count % 32, a);
144
145	if (0 == (wordcount = count / 32))
146	return;
147
148	/* argh, gak, endian-sensitive */
149	for (i = 0; i < 4 - wordcount; i++) {
150	a[i] = a[i + wordcount];
151	}
152	for (i = 3; i >= 4 - wordcount; --i) {
153	a[i] = 0;
154	}
155	}
156
157	static inline void lsr128(unsigned int count, int128 a)
158	{
159	int wordcount, i;
160
161	if (count % 32)
162	_lsr128(count % 32, a);
163
164	if (0 == (wordcount = count / 32))
165	return;
166
167	for (i = 3; i >= wordcount; --i) {
168	a[i] = a[i - wordcount];
169	}
170	for (i = 0; i < wordcount; i++) {
171	a[i] = 0;
172	}
173	}
174
175	static inline int orl128(int a, int128 b)
176	{
177	b[LSW128] \|= a;
178	}
179
180	static inline int btsthi128(const int128 a)
181	{
182	return a[MSW128] & 0x80000000;
183	}
184
185	/* test bits (numbered from 0 = LSB) up to and including "top" */
186	static inline int bftestlo128(int top, const int128 a)
187	{
188	int r = 0;
189
190	if (top > 31)
191	r \|= a[LSW128];
192	if (top > 63)
193	r \|= a[NLSW128];
194	if (top > 95)
195	r \|= a[NMSW128];
196
197	r \|= a[3 - (top / 32)] & ((1 << (top % 32 + 1)) - 1);
198
199	return (r != 0);
200	}
201
202	/* Aargh. We need these because GCC is broken */
203	/* FIXME: do them in assembly, for goodness' sake! */
204	static inline void mask64(int pos, unsigned long long *mask)
205	{
206	*mask = 0;
207
208	if (pos < 32) {
209	LO_WORD(*mask) = (1 << pos) - 1;
210	return;
211	}
212	LO_WORD(*mask) = -1;
213	HI_WORD(*mask) = (1 << (pos - 32)) - 1;
214	}
215
216	static inline void bset64(int pos, unsigned long long *dest)
217	{
218	/* This conditional will be optimized away. Thanks, GCC! */
219	if (pos < 32)
220	asm volatile ("bset %1,%0":"=m"
221	(LO_WORD(*dest)):"id"(pos));
222	else
223	asm volatile ("bset %1,%0":"=m"
224	(HI_WORD(*dest)):"id"(pos - 32));
225	}
226
227	static inline int btst64(int pos, unsigned long long dest)
228	{
229	if (pos < 32)
230	return (0 != (LO_WORD(dest) & (1 << pos)));
231	else
232	return (0 != (HI_WORD(dest) & (1 << (pos - 32))));
233	}
234
235	static inline void lsl64(int count, unsigned long long *dest)
236	{
237	if (count < 32) {
238	HI_WORD(dest) = (HI_WORD(dest) << count)
239	\| (LO_WORD(*dest) >> count);
240	LO_WORD(*dest) <<= count;
241	return;
242	}
243	count -= 32;
244	HI_WORD(dest) = LO_WORD(dest) << count;
245	LO_WORD(*dest) = 0;
246	}
247
248	static inline void lsr64(int count, unsigned long long *dest)
249	{
250	if (count < 32) {
251	LO_WORD(dest) = (LO_WORD(dest) >> count)
252	\| (HI_WORD(*dest) << (32 - count));
253	HI_WORD(*dest) >>= count;
254	return;
255	}
256	count -= 32;
257	LO_WORD(dest) = HI_WORD(dest) >> count;
258	HI_WORD(*dest) = 0;
259	}
260	#endif
261
262	static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)	22	static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
263	{	23	{
264	reg->exp += cnt;	24	reg->exp += cnt;
@@ -481,117 +241,6 @@ static inline void fp_dividemant(union fp_mant128 dest, struct fp_ext src,
481	}	241	}
482	}	242	}
483		243
484	#if 0
485	static inline unsigned int fp_fls128(union fp_mant128 *src)
486	{
487	unsigned long data;
488	unsigned int res, off;
489
490	if ((data = src->m32[0]))
491	off = 0;
492	else if ((data = src->m32[1]))
493	off = 32;
494	else if ((data = src->m32[2]))
495	off = 64;
496	else if ((data = src->m32[3]))
497	off = 96;
498	else
499	return 128;
500
501	asm ("bfffo %1{#0,#32},%0" : "=d" (res) : "dm" (data));
502	return res + off;
503	}
504
505	static inline void fp_shiftmant128(union fp_mant128 *src, int shift)
506	{
507	unsigned long sticky;
508
509	switch (shift) {
510	case 0:
511	return;
512	case 1:
513	asm volatile ("lsl.l #1,%0"
514	: "=d" (src->m32[3]) : "0" (src->m32[3]));
515	asm volatile ("roxl.l #1,%0"
516	: "=d" (src->m32[2]) : "0" (src->m32[2]));
517	asm volatile ("roxl.l #1,%0"
518	: "=d" (src->m32[1]) : "0" (src->m32[1]));
519	asm volatile ("roxl.l #1,%0"
520	: "=d" (src->m32[0]) : "0" (src->m32[0]));
521	return;
522	case 2 ... 31:
523	src->m32[0] = (src->m32[0] << shift) \| (src->m32[1] >> (32 - shift));
524	src->m32[1] = (src->m32[1] << shift) \| (src->m32[2] >> (32 - shift));
525	src->m32[2] = (src->m32[2] << shift) \| (src->m32[3] >> (32 - shift));
526	src->m32[3] = (src->m32[3] << shift);
527	return;
528	case 32 ... 63:
529	shift -= 32;
530	src->m32[0] = (src->m32[1] << shift) \| (src->m32[2] >> (32 - shift));
531	src->m32[1] = (src->m32[2] << shift) \| (src->m32[3] >> (32 - shift));
532	src->m32[2] = (src->m32[3] << shift);
533	src->m32[3] = 0;
534	return;
535	case 64 ... 95:
536	shift -= 64;
537	src->m32[0] = (src->m32[2] << shift) \| (src->m32[3] >> (32 - shift));
538	src->m32[1] = (src->m32[3] << shift);
539	src->m32[2] = src->m32[3] = 0;
540	return;
541	case 96 ... 127:
542	shift -= 96;
543	src->m32[0] = (src->m32[3] << shift);
544	src->m32[1] = src->m32[2] = src->m32[3] = 0;
545	return;
546	case -31 ... -1:
547	shift = -shift;
548	sticky = 0;
549	if (src->m32[3] << (32 - shift))
550	sticky = 1;
551	src->m32[3] = (src->m32[3] >> shift) \| (src->m32[2] << (32 - shift)) \| sticky;
552	src->m32[2] = (src->m32[2] >> shift) \| (src->m32[1] << (32 - shift));
553	src->m32[1] = (src->m32[1] >> shift) \| (src->m32[0] << (32 - shift));
554	src->m32[0] = (src->m32[0] >> shift);
555	return;
556	case -63 ... -32:
557	shift = -shift - 32;
558	sticky = 0;
559	if ((src->m32[2] << (32 - shift)) \|\| src->m32[3])
560	sticky = 1;
561	src->m32[3] = (src->m32[2] >> shift) \| (src->m32[1] << (32 - shift)) \| sticky;
562	src->m32[2] = (src->m32[1] >> shift) \| (src->m32[0] << (32 - shift));
563	src->m32[1] = (src->m32[0] >> shift);
564	src->m32[0] = 0;
565	return;
566	case -95 ... -64:
567	shift = -shift - 64;
568	sticky = 0;
569	if ((src->m32[1] << (32 - shift)) \|\| src->m32[2] \|\| src->m32[3])
570	sticky = 1;
571	src->m32[3] = (src->m32[1] >> shift) \| (src->m32[0] << (32 - shift)) \| sticky;
572	src->m32[2] = (src->m32[0] >> shift);
573	src->m32[1] = src->m32[0] = 0;
574	return;
575	case -127 ... -96:
576	shift = -shift - 96;
577	sticky = 0;
578	if ((src->m32[0] << (32 - shift)) \|\| src->m32[1] \|\| src->m32[2] \|\| src->m32[3])
579	sticky = 1;
580	src->m32[3] = (src->m32[0] >> shift) \| sticky;
581	src->m32[2] = src->m32[1] = src->m32[0] = 0;
582	return;
583	}
584
585	if (shift < 0 && (src->m32[0] \|\| src->m32[1] \|\| src->m32[2] \|\| src->m32[3]))
586	src->m32[3] = 1;
587	else
588	src->m32[3] = 0;
589	src->m32[2] = 0;
590	src->m32[1] = 0;
591	src->m32[0] = 0;
592	}
593	#endif
594
595	static inline void fp_putmant128(struct fp_ext dest, union fp_mant128 src,	244	static inline void fp_putmant128(struct fp_ext dest, union fp_mant128 src,
596	int shift)	245	int shift)
597	{	246	{
@@ -637,183 +286,4 @@ static inline void fp_putmant128(struct fp_ext dest, union fp_mant128 src,
637	}	286	}
638	}	287	}
639		288
640	#if 0 /* old code... */
641	static inline int fls(unsigned int a)
642	{
643	int r;
644
645	asm volatile ("bfffo %1{#0,#32},%0"
646	: "=d" (r) : "md" (a));
647	return r;
648	}
649
650	/* fls = "find last set" (cf. ffs(3)) */
651	static inline int fls128(const int128 a)
652	{
653	if (a[MSW128])
654	return fls(a[MSW128]);
655	if (a[NMSW128])
656	return fls(a[NMSW128]) + 32;
657	/* XXX: it probably never gets beyond this point in actual
658	use, but that's indicative of a more general problem in the
659	algorithm (i.e. as per the actual 68881 implementation, we
660	really only need at most 67 bits of precision [plus
661	overflow]) so I'm not going to fix it. */
662	if (a[NLSW128])
663	return fls(a[NLSW128]) + 64;
664	if (a[LSW128])
665	return fls(a[LSW128]) + 96;
666	else
667	return -1;
668	}
669
670	static inline int zerop128(const int128 a)
671	{
672	return !(a[LSW128] \| a[NLSW128] \| a[NMSW128] \| a[MSW128]);
673	}
674
675	static inline int nonzerop128(const int128 a)
676	{
677	return (a[LSW128] \| a[NLSW128] \| a[NMSW128] \| a[MSW128]);
678	}
679
680	/* Addition and subtraction */
681	/* Do these in "pure" assembly, because "extended" asm is unmanageable
682	here */
683	static inline void add128(const int128 a, int128 b)
684	{
685	/* rotating carry flags */
686	unsigned int carry[2];
687
688	carry[0] = a[LSW128] > (0xffffffff - b[LSW128]);
689	b[LSW128] += a[LSW128];
690
691	carry[1] = a[NLSW128] > (0xffffffff - b[NLSW128] - carry[0]);
692	b[NLSW128] = a[NLSW128] + b[NLSW128] + carry[0];
693
694	carry[0] = a[NMSW128] > (0xffffffff - b[NMSW128] - carry[1]);
695	b[NMSW128] = a[NMSW128] + b[NMSW128] + carry[1];
696
697	b[MSW128] = a[MSW128] + b[MSW128] + carry[0];
698	}
699
700	/* Note: assembler semantics: "b -= a" */
701	static inline void sub128(const int128 a, int128 b)
702	{
703	/* rotating borrow flags */
704	unsigned int borrow[2];
705
706	borrow[0] = b[LSW128] < a[LSW128];
707	b[LSW128] -= a[LSW128];
708
709	borrow[1] = b[NLSW128] < a[NLSW128] + borrow[0];
710	b[NLSW128] = b[NLSW128] - a[NLSW128] - borrow[0];
711
712	borrow[0] = b[NMSW128] < a[NMSW128] + borrow[1];
713	b[NMSW128] = b[NMSW128] - a[NMSW128] - borrow[1];
714
715	b[MSW128] = b[MSW128] - a[MSW128] - borrow[0];
716	}
717
718	/* Poor man's 64-bit expanding multiply */
719	static inline void mul64(unsigned long long a, unsigned long long b, int128 c)
720	{
721	unsigned long long acc;
722	int128 acc128;
723
724	zero128(acc128);
725	zero128(c);
726
727	/* first the low words */
728	if (LO_WORD(a) && LO_WORD(b)) {
729	acc = (long long) LO_WORD(a) * LO_WORD(b);
730	c[NLSW128] = HI_WORD(acc);
731	c[LSW128] = LO_WORD(acc);
732	}
733	/* Next the high words */
734	if (HI_WORD(a) && HI_WORD(b)) {
735	acc = (long long) HI_WORD(a) * HI_WORD(b);
736	c[MSW128] = HI_WORD(acc);
737	c[NMSW128] = LO_WORD(acc);
738	}
739	/* The middle words */
740	if (LO_WORD(a) && HI_WORD(b)) {
741	acc = (long long) LO_WORD(a) * HI_WORD(b);
742	acc128[NMSW128] = HI_WORD(acc);
743	acc128[NLSW128] = LO_WORD(acc);
744	add128(acc128, c);
745	}
746	/* The first and last words */
747	if (HI_WORD(a) && LO_WORD(b)) {
748	acc = (long long) HI_WORD(a) * LO_WORD(b);
749	acc128[NMSW128] = HI_WORD(acc);
750	acc128[NLSW128] = LO_WORD(acc);
751	add128(acc128, c);
752	}
753	}
754
755	/* Note: unsigned */
756	static inline int cmp128(int128 a, int128 b)
757	{
758	if (a[MSW128] < b[MSW128])
759	return -1;
760	if (a[MSW128] > b[MSW128])
761	return 1;
762	if (a[NMSW128] < b[NMSW128])
763	return -1;
764	if (a[NMSW128] > b[NMSW128])
765	return 1;
766	if (a[NLSW128] < b[NLSW128])
767	return -1;
768	if (a[NLSW128] > b[NLSW128])
769	return 1;
770
771	return (signed) a[LSW128] - b[LSW128];
772	}
773
774	inline void div128(int128 a, int128 b, int128 c)
775	{
776	int128 mask;
777
778	/* Algorithm:
779
780	Shift the divisor until it's at least as big as the
781	dividend, keeping track of the position to which we've
782	shifted it, i.e. the power of 2 which we've multiplied it
783	by.
784
785	Then, for this power of 2 (the mask), and every one smaller
786	than it, subtract the mask from the dividend and add it to
787	the quotient until the dividend is smaller than the raised
788	divisor. At this point, divide the dividend and the mask
789	by 2 (i.e. shift one place to the right). Lather, rinse,
790	and repeat, until there are no more powers of 2 left. */
791
792	/* FIXME: needless to say, there's room for improvement here too. */
793
794	/* Shift up */
795	/* XXX: since it just has to be "at least as big", we can
796	probably eliminate this horribly wasteful loop. I will
797	have to prove this first, though */
798	set128(0, 0, 0, 1, mask);
799	while (cmp128(b, a) < 0 && !btsthi128(b)) {
800	lslone128(b);
801	lslone128(mask);
802	}
803
804	/* Shift down */
805	zero128(c);
806	do {
807	if (cmp128(a, b) >= 0) {
808	sub128(b, a);
809	add128(mask, c);
810	}
811	lsrone128(mask);
812	lsrone128(b);
813	} while (nonzerop128(mask));
814
815	/* The remainder is in a... */
816	}
817	#endif
818
819	#endif /* MULTI_ARITH_H */	289	#endif /* MULTI_ARITH_H */