aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorKyle McMartin <kyle@mcmartin.ca>2007-10-18 03:06:26 -0400
committerKyle McMartin <kyle@shortfin.cabal.ca>2007-10-18 03:58:49 -0400
commitefb80e7e097d0888e59fbbe4ded2ac5a256f556d (patch)
tree98a0f2f1514501aeebb1877bfcb5b528491e5ad5 /arch
parent6f7d998e94ec7b7f08bd0c72fc05343435d7fa93 (diff)
[PARISC] import necessary bits of libgcc.a
Currently we're hacking libs-y to include libgcc.a, but this has unforeseen consequences since the userspace libgcc is linked with fpregs enabled. We need the kernel to stop using fpregs in an uncontrolled manner to implement lazy fpu state saves. Signed-off-by: Kyle McMartin <kyle@mcmartin.ca>
Diffstat (limited to 'arch')
-rw-r--r--arch/parisc/Makefile2
-rw-r--r--arch/parisc/kernel/parisc_ksyms.c22
-rw-r--r--arch/parisc/lib/Makefile2
-rw-r--r--arch/parisc/lib/libgcc/Makefile4
-rw-r--r--arch/parisc/lib/libgcc/__ashldi3.c19
-rw-r--r--arch/parisc/lib/libgcc/__ashrdi3.c19
-rw-r--r--arch/parisc/lib/libgcc/__clzsi2.c30
-rw-r--r--arch/parisc/lib/libgcc/__divdi3.c23
-rw-r--r--arch/parisc/lib/libgcc/__divsi3.c23
-rw-r--r--arch/parisc/lib/libgcc/__lshrdi3.c19
-rw-r--r--arch/parisc/lib/libgcc/__moddi3.c23
-rw-r--r--arch/parisc/lib/libgcc/__modsi3.c23
-rw-r--r--arch/parisc/lib/libgcc/__muldi3.c22
-rw-r--r--arch/parisc/lib/libgcc/__udivdi3.c7
-rw-r--r--arch/parisc/lib/libgcc/__udivmoddi4.c31
-rw-r--r--arch/parisc/lib/libgcc/__udivmodsi4.c31
-rw-r--r--arch/parisc/lib/libgcc/__udivsi3.c7
-rw-r--r--arch/parisc/lib/libgcc/__umoddi3.c10
-rw-r--r--arch/parisc/lib/libgcc/__umodsi3.c10
-rw-r--r--arch/parisc/lib/libgcc/__umulsidi3.c46
-rw-r--r--arch/parisc/lib/libgcc/libgcc.h32
-rw-r--r--arch/parisc/lib/milli/Makefile1
-rw-r--r--arch/parisc/lib/milli/divI.S254
-rw-r--r--arch/parisc/lib/milli/divU.S235
-rw-r--r--arch/parisc/lib/milli/div_const.S682
-rw-r--r--arch/parisc/lib/milli/dyncall.S32
-rw-r--r--arch/parisc/lib/milli/milli.S2071
-rw-r--r--arch/parisc/lib/milli/milli.h165
-rw-r--r--arch/parisc/lib/milli/mulI.S474
-rw-r--r--arch/parisc/lib/milli/remI.S185
-rw-r--r--arch/parisc/lib/milli/remU.S148
31 files changed, 4628 insertions, 24 deletions
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index f3d0d7c70977..20d327f63d57 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -69,7 +69,7 @@ kernel-y := mm/ kernel/ math-emu/ kernel/init_task.o
69kernel-$(CONFIG_HPUX) += hpux/ 69kernel-$(CONFIG_HPUX) += hpux/
70 70
71core-y += $(addprefix arch/parisc/, $(kernel-y)) 71core-y += $(addprefix arch/parisc/, $(kernel-y))
72libs-y += arch/parisc/lib/ `$(CC) -print-libgcc-file-name` 72libs-y += arch/parisc/lib/
73 73
74drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/ 74drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/
75 75
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 7aca704e96f0..671ee5b9950c 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -122,31 +122,9 @@ EXPORT_SYMBOL($$divI_12);
122EXPORT_SYMBOL($$divI_14); 122EXPORT_SYMBOL($$divI_14);
123EXPORT_SYMBOL($$divI_15); 123EXPORT_SYMBOL($$divI_15);
124 124
125extern void __ashrdi3(void);
126extern void __ashldi3(void);
127extern void __lshrdi3(void);
128extern void __muldi3(void);
129
130EXPORT_SYMBOL(__ashrdi3);
131EXPORT_SYMBOL(__ashldi3);
132EXPORT_SYMBOL(__lshrdi3);
133EXPORT_SYMBOL(__muldi3);
134
135asmlinkage void * __canonicalize_funcptr_for_compare(void *); 125asmlinkage void * __canonicalize_funcptr_for_compare(void *);
136EXPORT_SYMBOL(__canonicalize_funcptr_for_compare); 126EXPORT_SYMBOL(__canonicalize_funcptr_for_compare);
137 127
138#ifdef CONFIG_64BIT
139extern void __divdi3(void);
140extern void __udivdi3(void);
141extern void __umoddi3(void);
142extern void __moddi3(void);
143
144EXPORT_SYMBOL(__divdi3);
145EXPORT_SYMBOL(__udivdi3);
146EXPORT_SYMBOL(__umoddi3);
147EXPORT_SYMBOL(__moddi3);
148#endif
149
150#ifndef CONFIG_64BIT 128#ifndef CONFIG_64BIT
151extern void $$dyncall(void); 129extern void $$dyncall(void);
152EXPORT_SYMBOL($$dyncall); 130EXPORT_SYMBOL($$dyncall);
diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
index 5f2e6904d14a..7ce406c7daf5 100644
--- a/arch/parisc/lib/Makefile
+++ b/arch/parisc/lib/Makefile
@@ -4,4 +4,4 @@
4 4
5lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o 5lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o
6 6
7obj-y := iomap.o 7obj-y := libgcc/ milli/ iomap.o
diff --git a/arch/parisc/lib/libgcc/Makefile b/arch/parisc/lib/libgcc/Makefile
new file mode 100644
index 000000000000..b67a85ad9c87
--- /dev/null
+++ b/arch/parisc/lib/libgcc/Makefile
@@ -0,0 +1,4 @@
1obj-y := __ashldi3.o __ashrdi3.o __clzsi2.o __divdi3.o __divsi3.o \
2 __lshrdi3.o __moddi3.o __modsi3.o __udivdi3.o \
3 __udivmoddi4.o __udivmodsi4.o __udivsi3.o \
4 __umoddi3.o __umodsi3.o __muldi3.o __umulsidi3.o
diff --git a/arch/parisc/lib/libgcc/__ashldi3.c b/arch/parisc/lib/libgcc/__ashldi3.c
new file mode 100644
index 000000000000..a14a257abb2b
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__ashldi3.c
@@ -0,0 +1,19 @@
1#include "libgcc.h"
2
3u64 __ashldi3(u64 v, int cnt)
4{
5 int c = cnt & 31;
6 u32 vl = (u32) v;
7 u32 vh = (u32) (v >> 32);
8
9 if (cnt & 32) {
10 vh = (vl << c);
11 vl = 0;
12 } else {
13 vh = (vh << c) + (vl >> (32 - c));
14 vl = (vl << c);
15 }
16
17 return ((u64) vh << 32) + vl;
18}
19EXPORT_SYMBOL(__ashldi3);
diff --git a/arch/parisc/lib/libgcc/__ashrdi3.c b/arch/parisc/lib/libgcc/__ashrdi3.c
new file mode 100644
index 000000000000..8636a5aa4f77
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__ashrdi3.c
@@ -0,0 +1,19 @@
1#include "libgcc.h"
2
3u64 __ashrdi3(u64 v, int cnt)
4{
5 int c = cnt & 31;
6 u32 vl = (u32) v;
7 u32 vh = (u32) (v >> 32);
8
9 if (cnt & 32) {
10 vl = ((s32) vh >> c);
11 vh = (s32) vh >> 31;
12 } else {
13 vl = (vl >> c) + (vh << (32 - c));
14 vh = ((s32) vh >> c);
15 }
16
17 return ((u64) vh << 32) + vl;
18}
19EXPORT_SYMBOL(__ashrdi3);
diff --git a/arch/parisc/lib/libgcc/__clzsi2.c b/arch/parisc/lib/libgcc/__clzsi2.c
new file mode 100644
index 000000000000..a7aa2f55a9c6
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__clzsi2.c
@@ -0,0 +1,30 @@
1#include "libgcc.h"
2
3u32 __clzsi2(u32 v)
4{
5 int p = 31;
6
7 if (v & 0xffff0000) {
8 p -= 16;
9 v >>= 16;
10 }
11 if (v & 0xff00) {
12 p -= 8;
13 v >>= 8;
14 }
15 if (v & 0xf0) {
16 p -= 4;
17 v >>= 4;
18 }
19 if (v & 0xc) {
20 p -= 2;
21 v >>= 2;
22 }
23 if (v & 0x2) {
24 p -= 1;
25 v >>= 1;
26 }
27
28 return p;
29}
30EXPORT_SYMBOL(__clzsi2);
diff --git a/arch/parisc/lib/libgcc/__divdi3.c b/arch/parisc/lib/libgcc/__divdi3.c
new file mode 100644
index 000000000000..f23c6fe2838b
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__divdi3.c
@@ -0,0 +1,23 @@
1#include "libgcc.h"
2
3s64 __divdi3(s64 num, s64 den)
4{
5 int minus = 0;
6 s64 v;
7
8 if (num < 0) {
9 num = -num;
10 minus = 1;
11 }
12 if (den < 0) {
13 den = -den;
14 minus ^= 1;
15 }
16
17 v = __udivmoddi4(num, den, NULL);
18 if (minus)
19 v = -v;
20
21 return v;
22}
23EXPORT_SYMBOL(__divdi3);
diff --git a/arch/parisc/lib/libgcc/__divsi3.c b/arch/parisc/lib/libgcc/__divsi3.c
new file mode 100644
index 000000000000..730fb530680d
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__divsi3.c
@@ -0,0 +1,23 @@
1#include "libgcc.h"
2
3s32 __divsi3(s32 num, s32 den)
4{
5 int minus = 0;
6 s32 v;
7
8 if (num < 0) {
9 num = -num;
10 minus = 1;
11 }
12 if (den < 0) {
13 den = -den;
14 minus ^= 1;
15 }
16
17 v = __udivmodsi4(num, den, NULL);
18 if (minus)
19 v = -v;
20
21 return v;
22}
23EXPORT_SYMBOL(__divsi3);
diff --git a/arch/parisc/lib/libgcc/__lshrdi3.c b/arch/parisc/lib/libgcc/__lshrdi3.c
new file mode 100644
index 000000000000..4a820708ec57
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__lshrdi3.c
@@ -0,0 +1,19 @@
1#include "libgcc.h"
2
3u64 __lshrdi3(u64 v, int cnt)
4{
5 int c = cnt & 31;
6 u32 vl = (u32) v;
7 u32 vh = (u32) (v >> 32);
8
9 if (cnt & 32) {
10 vl = (vh >> c);
11 vh = 0;
12 } else {
13 vl = (vl >> c) + (vh << (32 - c));
14 vh = (vh >> c);
15 }
16
17 return ((u64) vh << 32) + vl;
18}
19EXPORT_SYMBOL(__lshrdi3);
diff --git a/arch/parisc/lib/libgcc/__moddi3.c b/arch/parisc/lib/libgcc/__moddi3.c
new file mode 100644
index 000000000000..ed64bbafc989
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__moddi3.c
@@ -0,0 +1,23 @@
1#include "libgcc.h"
2
3s64 __moddi3(s64 num, s64 den)
4{
5 int minus = 0;
6 s64 v;
7
8 if (num < 0) {
9 num = -num;
10 minus = 1;
11 }
12 if (den < 0) {
13 den = -den;
14 minus ^= 1;
15 }
16
17 (void)__udivmoddi4(num, den, (u64 *) & v);
18 if (minus)
19 v = -v;
20
21 return v;
22}
23EXPORT_SYMBOL(__moddi3);
diff --git a/arch/parisc/lib/libgcc/__modsi3.c b/arch/parisc/lib/libgcc/__modsi3.c
new file mode 100644
index 000000000000..62f773efaeea
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__modsi3.c
@@ -0,0 +1,23 @@
1#include "libgcc.h"
2
3s32 __modsi3(s32 num, s32 den)
4{
5 int minus = 0;
6 s32 v;
7
8 if (num < 0) {
9 num = -num;
10 minus = 1;
11 }
12 if (den < 0) {
13 den = -den;
14 minus ^= 1;
15 }
16
17 (void)__udivmodsi4(num, den, (u32 *) & v);
18 if (minus)
19 v = -v;
20
21 return v;
22}
23EXPORT_SYMBOL(__modsi3);
diff --git a/arch/parisc/lib/libgcc/__muldi3.c b/arch/parisc/lib/libgcc/__muldi3.c
new file mode 100644
index 000000000000..3308abdd5580
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__muldi3.c
@@ -0,0 +1,22 @@
1#include "libgcc.h"
2
3union DWunion {
4 struct {
5 s32 high;
6 s32 low;
7 } s;
8 s64 ll;
9};
10
11s64 __muldi3(s64 u, s64 v)
12{
13 const union DWunion uu = { .ll = u };
14 const union DWunion vv = { .ll = v };
15 union DWunion w = { .ll = __umulsidi3(uu.s.low, vv.s.low) };
16
17 w.s.high += ((u32)uu.s.low * (u32)vv.s.high
18 + (u32)uu.s.high * (u32)vv.s.low);
19
20 return w.ll;
21}
22EXPORT_SYMBOL(__muldi3);
diff --git a/arch/parisc/lib/libgcc/__udivdi3.c b/arch/parisc/lib/libgcc/__udivdi3.c
new file mode 100644
index 000000000000..740023d690f5
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__udivdi3.c
@@ -0,0 +1,7 @@
1#include "libgcc.h"
2
3u64 __udivdi3(u64 num, u64 den)
4{
5 return __udivmoddi4(num, den, NULL);
6}
7EXPORT_SYMBOL(__udivdi3);
diff --git a/arch/parisc/lib/libgcc/__udivmoddi4.c b/arch/parisc/lib/libgcc/__udivmoddi4.c
new file mode 100644
index 000000000000..2df0caa5a7d8
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__udivmoddi4.c
@@ -0,0 +1,31 @@
1#include "libgcc.h"
2
3u64 __udivmoddi4(u64 num, u64 den, u64 * rem_p)
4{
5 u64 quot = 0, qbit = 1;
6
7 if (den == 0) {
8 BUG();
9 }
10
11 /* Left-justify denominator and count shift */
12 while ((s64) den >= 0) {
13 den <<= 1;
14 qbit <<= 1;
15 }
16
17 while (qbit) {
18 if (den <= num) {
19 num -= den;
20 quot += qbit;
21 }
22 den >>= 1;
23 qbit >>= 1;
24 }
25
26 if (rem_p)
27 *rem_p = num;
28
29 return quot;
30}
31EXPORT_SYMBOL(__udivmoddi4);
diff --git a/arch/parisc/lib/libgcc/__udivmodsi4.c b/arch/parisc/lib/libgcc/__udivmodsi4.c
new file mode 100644
index 000000000000..2a2fc28b2026
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__udivmodsi4.c
@@ -0,0 +1,31 @@
1#include "libgcc.h"
2
3u32 __udivmodsi4(u32 num, u32 den, u32 * rem_p)
4{
5 u32 quot = 0, qbit = 1;
6
7 if (den == 0) {
8 BUG();
9 }
10
11 /* Left-justify denominator and count shift */
12 while ((s32) den >= 0) {
13 den <<= 1;
14 qbit <<= 1;
15 }
16
17 while (qbit) {
18 if (den <= num) {
19 num -= den;
20 quot += qbit;
21 }
22 den >>= 1;
23 qbit >>= 1;
24 }
25
26 if (rem_p)
27 *rem_p = num;
28
29 return quot;
30}
31EXPORT_SYMBOL(__udivmodsi4);
diff --git a/arch/parisc/lib/libgcc/__udivsi3.c b/arch/parisc/lib/libgcc/__udivsi3.c
new file mode 100644
index 000000000000..756a44164e90
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__udivsi3.c
@@ -0,0 +1,7 @@
1#include "libgcc.h"
2
3u32 __udivsi3(u32 num, u32 den)
4{
5 return __udivmodsi4(num, den, NULL);
6}
7EXPORT_SYMBOL(__udivsi3);
diff --git a/arch/parisc/lib/libgcc/__umoddi3.c b/arch/parisc/lib/libgcc/__umoddi3.c
new file mode 100644
index 000000000000..ac744e948bc1
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__umoddi3.c
@@ -0,0 +1,10 @@
1#include "libgcc.h"
2
3u64 __umoddi3(u64 num, u64 den)
4{
5 u64 v;
6
7 (void)__udivmoddi4(num, den, &v);
8 return v;
9}
10EXPORT_SYMBOL(__umoddi3);
diff --git a/arch/parisc/lib/libgcc/__umodsi3.c b/arch/parisc/lib/libgcc/__umodsi3.c
new file mode 100644
index 000000000000..51f55aa89f9a
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__umodsi3.c
@@ -0,0 +1,10 @@
1#include "libgcc.h"
2
3u32 __umodsi3(u32 num, u32 den)
4{
5 u32 v;
6
7 (void)__udivmodsi4(num, den, &v);
8 return v;
9}
10EXPORT_SYMBOL(__umodsi3);
diff --git a/arch/parisc/lib/libgcc/__umulsidi3.c b/arch/parisc/lib/libgcc/__umulsidi3.c
new file mode 100644
index 000000000000..396f669164d4
--- /dev/null
+++ b/arch/parisc/lib/libgcc/__umulsidi3.c
@@ -0,0 +1,46 @@
1#include "libgcc.h"
2
3#define __ll_B ((u32) 1 << (32 / 2))
4#define __ll_lowpart(t) ((u32) (t) & (__ll_B - 1))
5#define __ll_highpart(t) ((u32) (t) >> 16)
6
7#define umul_ppmm(w1, w0, u, v) \
8 do { \
9 u32 __x0, __x1, __x2, __x3; \
10 u16 __ul, __vl, __uh, __vh; \
11 \
12 __ul = __ll_lowpart (u); \
13 __uh = __ll_highpart (u); \
14 __vl = __ll_lowpart (v); \
15 __vh = __ll_highpart (v); \
16 \
17 __x0 = (u32) __ul * __vl; \
18 __x1 = (u32) __ul * __vh; \
19 __x2 = (u32) __uh * __vl; \
20 __x3 = (u32) __uh * __vh; \
21 \
22 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
23 __x1 += __x2; /* but this indeed can */ \
24 if (__x1 < __x2) /* did we get it? */ \
25 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
26 \
27 (w1) = __x3 + __ll_highpart (__x1); \
28 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
29 } while (0)
30
31union DWunion {
32 struct {
33 s32 high;
34 s32 low;
35 } s;
36 s64 ll;
37};
38
39u64 __umulsidi3(u32 u, u32 v)
40{
41 union DWunion __w;
42
43 umul_ppmm(__w.s.high, __w.s.low, u, v);
44
45 return __w.ll;
46}
diff --git a/arch/parisc/lib/libgcc/libgcc.h b/arch/parisc/lib/libgcc/libgcc.h
new file mode 100644
index 000000000000..5a6f7a510fbd
--- /dev/null
+++ b/arch/parisc/lib/libgcc/libgcc.h
@@ -0,0 +1,32 @@
1#ifndef _PA_LIBGCC_H_
2#define _PA_LIBGCC_H_
3
4#include <linux/types.h>
5#include <linux/module.h>
6
7/* Cribbed from klibc/libgcc/ */
8u64 __ashldi3(u64 v, int cnt);
9u64 __ashrdi3(u64 v, int cnt);
10
11u32 __clzsi2(u32 v);
12
13s64 __divdi3(s64 num, s64 den);
14s32 __divsi3(s32 num, s32 den);
15
16u64 __lshrdi3(u64 v, int cnt);
17
18s64 __moddi3(s64 num, s64 den);
19s32 __modsi3(s32 num, s32 den);
20
21u64 __udivdi3(u64 num, u64 den);
22u32 __udivsi3(u32 num, u32 den);
23
24u64 __udivmoddi4(u64 num, u64 den, u64 * rem_p);
25u32 __udivmodsi4(u32 num, u32 den, u32 * rem_p);
26
27u64 __umulsidi3(u32 u, u32 v);
28
29u64 __umoddi3(u64 num, u64 den);
30u32 __umodsi3(u32 num, u32 den);
31
32#endif /*_PA_LIBGCC_H_*/
diff --git a/arch/parisc/lib/milli/Makefile b/arch/parisc/lib/milli/Makefile
new file mode 100644
index 000000000000..9b24e9b1f3cb
--- /dev/null
+++ b/arch/parisc/lib/milli/Makefile
@@ -0,0 +1 @@
obj-y := dyncall.o divI.o divU.o remI.o remU.o div_const.o mulI.o
diff --git a/arch/parisc/lib/milli/divI.S b/arch/parisc/lib/milli/divI.S
new file mode 100644
index 000000000000..ac106b7b6f24
--- /dev/null
+++ b/arch/parisc/lib/milli/divI.S
@@ -0,0 +1,254 @@
1/* 32 and 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame@debian.org>
3 and Alan Modra <alan@linuxcare.com.au>.
4
5 Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
6
7 This file is part of GCC and is released under the terms of
8 of the GNU General Public License as published by the Free Software
9 Foundation; either version 2, or (at your option) any later version.
10 See the file COPYING in the top-level GCC source directory for a copy
11 of the license. */
12
13#include "milli.h"
14
15#ifdef L_divI
16/* ROUTINES: $$divI, $$divoI
17
18 Single precision divide for signed binary integers.
19
20 The quotient is truncated towards zero.
21 The sign of the quotient is the XOR of the signs of the dividend and
22 divisor.
23 Divide by zero is trapped.
24 Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
25
26 INPUT REGISTERS:
27 . arg0 == dividend
28 . arg1 == divisor
29 . mrp == return pc
30 . sr0 == return space when called externally
31
32 OUTPUT REGISTERS:
33 . arg0 = undefined
34 . arg1 = undefined
35 . ret1 = quotient
36
37 OTHER REGISTERS AFFECTED:
38 . r1 = undefined
39
40 SIDE EFFECTS:
41 . Causes a trap under the following conditions:
42 . divisor is zero (traps with ADDIT,= 0,25,0)
43 . dividend==-2**31 and divisor==-1 and routine is $$divoI
44 . (traps with ADDO 26,25,0)
45 . Changes memory at the following places:
46 . NONE
47
48 PERMISSIBLE CONTEXT:
49 . Unwindable.
50 . Suitable for internal or external millicode.
51 . Assumes the special millicode register conventions.
52
53 DISCUSSION:
54 . Branchs to other millicode routines using BE
55 . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
56 .
57 . For selected divisors, calls a divide by constant routine written by
58 . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13.
59 .
60 . The only overflow case is -2**31 divided by -1.
61 . Both routines return -2**31 but only $$divoI traps. */
62
63RDEFINE(temp,r1)
64RDEFINE(retreg,ret1) /* r29 */
65RDEFINE(temp1,arg0)
66 SUBSPA_MILLI_DIV
67 ATTR_MILLI
68 .import $$divI_2,millicode
69 .import $$divI_3,millicode
70 .import $$divI_4,millicode
71 .import $$divI_5,millicode
72 .import $$divI_6,millicode
73 .import $$divI_7,millicode
74 .import $$divI_8,millicode
75 .import $$divI_9,millicode
76 .import $$divI_10,millicode
77 .import $$divI_12,millicode
78 .import $$divI_14,millicode
79 .import $$divI_15,millicode
80 .export $$divI,millicode
81 .export $$divoI,millicode
82 .proc
83 .callinfo millicode
84 .entry
85GSYM($$divoI)
86 comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */
87GSYM($$divI)
88 ldo -1(arg1),temp /* is there at most one bit set ? */
89 and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */
90 addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */
91 b,n LREF(neg_denom)
92LSYM(pow2)
93 addi,>= 0,arg0,retreg /* if numerator is negative, add the */
94 add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */
95 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
96 extrs retreg,15,16,retreg /* retreg = retreg >> 16 */
97 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
98 ldi 0xcc,temp1 /* setup 0xcc in temp1 */
99 extru,= arg1,23,8,temp /* test denominator with 0xff00 */
100 extrs retreg,23,24,retreg /* retreg = retreg >> 8 */
101 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
102 ldi 0xaa,temp /* setup 0xaa in temp */
103 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
104 extrs retreg,27,28,retreg /* retreg = retreg >> 4 */
105 and,= arg1,temp1,r0 /* test denominator with 0xcc */
106 extrs retreg,29,30,retreg /* retreg = retreg >> 2 */
107 and,= arg1,temp,r0 /* test denominator with 0xaa */
108 extrs retreg,30,31,retreg /* retreg = retreg >> 1 */
109 MILLIRETN
110LSYM(neg_denom)
111 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */
112 b,n LREF(regular_seq)
113 sub r0,arg1,temp /* make denominator positive */
114 comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */
115 ldo -1(temp),retreg /* is there at most one bit set ? */
116 and,= temp,retreg,r0 /* if so, the denominator is power of 2 */
117 b,n LREF(regular_seq)
118 sub r0,arg0,retreg /* negate numerator */
119 comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */
120 copy retreg,arg0 /* set up arg0, arg1 and temp */
121 copy temp,arg1 /* before branching to pow2 */
122 b LREF(pow2)
123 ldo -1(arg1),temp
124LSYM(regular_seq)
125 comib,>>=,n 15,arg1,LREF(small_divisor)
126 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
127LSYM(normal)
128 subi 0,retreg,retreg /* make it positive */
129 sub 0,arg1,temp /* clear carry, */
130 /* negate the divisor */
131 ds 0,temp,0 /* set V-bit to the comple- */
132 /* ment of the divisor sign */
133 add retreg,retreg,retreg /* shift msb bit into carry */
134 ds r0,arg1,temp /* 1st divide step, if no carry */
135 addc retreg,retreg,retreg /* shift retreg with/into carry */
136 ds temp,arg1,temp /* 2nd divide step */
137 addc retreg,retreg,retreg /* shift retreg with/into carry */
138 ds temp,arg1,temp /* 3rd divide step */
139 addc retreg,retreg,retreg /* shift retreg with/into carry */
140 ds temp,arg1,temp /* 4th divide step */
141 addc retreg,retreg,retreg /* shift retreg with/into carry */
142 ds temp,arg1,temp /* 5th divide step */
143 addc retreg,retreg,retreg /* shift retreg with/into carry */
144 ds temp,arg1,temp /* 6th divide step */
145 addc retreg,retreg,retreg /* shift retreg with/into carry */
146 ds temp,arg1,temp /* 7th divide step */
147 addc retreg,retreg,retreg /* shift retreg with/into carry */
148 ds temp,arg1,temp /* 8th divide step */
149 addc retreg,retreg,retreg /* shift retreg with/into carry */
150 ds temp,arg1,temp /* 9th divide step */
151 addc retreg,retreg,retreg /* shift retreg with/into carry */
152 ds temp,arg1,temp /* 10th divide step */
153 addc retreg,retreg,retreg /* shift retreg with/into carry */
154 ds temp,arg1,temp /* 11th divide step */
155 addc retreg,retreg,retreg /* shift retreg with/into carry */
156 ds temp,arg1,temp /* 12th divide step */
157 addc retreg,retreg,retreg /* shift retreg with/into carry */
158 ds temp,arg1,temp /* 13th divide step */
159 addc retreg,retreg,retreg /* shift retreg with/into carry */
160 ds temp,arg1,temp /* 14th divide step */
161 addc retreg,retreg,retreg /* shift retreg with/into carry */
162 ds temp,arg1,temp /* 15th divide step */
163 addc retreg,retreg,retreg /* shift retreg with/into carry */
164 ds temp,arg1,temp /* 16th divide step */
165 addc retreg,retreg,retreg /* shift retreg with/into carry */
166 ds temp,arg1,temp /* 17th divide step */
167 addc retreg,retreg,retreg /* shift retreg with/into carry */
168 ds temp,arg1,temp /* 18th divide step */
169 addc retreg,retreg,retreg /* shift retreg with/into carry */
170 ds temp,arg1,temp /* 19th divide step */
171 addc retreg,retreg,retreg /* shift retreg with/into carry */
172 ds temp,arg1,temp /* 20th divide step */
173 addc retreg,retreg,retreg /* shift retreg with/into carry */
174 ds temp,arg1,temp /* 21st divide step */
175 addc retreg,retreg,retreg /* shift retreg with/into carry */
176 ds temp,arg1,temp /* 22nd divide step */
177 addc retreg,retreg,retreg /* shift retreg with/into carry */
178 ds temp,arg1,temp /* 23rd divide step */
179 addc retreg,retreg,retreg /* shift retreg with/into carry */
180 ds temp,arg1,temp /* 24th divide step */
181 addc retreg,retreg,retreg /* shift retreg with/into carry */
182 ds temp,arg1,temp /* 25th divide step */
183 addc retreg,retreg,retreg /* shift retreg with/into carry */
184 ds temp,arg1,temp /* 26th divide step */
185 addc retreg,retreg,retreg /* shift retreg with/into carry */
186 ds temp,arg1,temp /* 27th divide step */
187 addc retreg,retreg,retreg /* shift retreg with/into carry */
188 ds temp,arg1,temp /* 28th divide step */
189 addc retreg,retreg,retreg /* shift retreg with/into carry */
190 ds temp,arg1,temp /* 29th divide step */
191 addc retreg,retreg,retreg /* shift retreg with/into carry */
192 ds temp,arg1,temp /* 30th divide step */
193 addc retreg,retreg,retreg /* shift retreg with/into carry */
194 ds temp,arg1,temp /* 31st divide step */
195 addc retreg,retreg,retreg /* shift retreg with/into carry */
196 ds temp,arg1,temp /* 32nd divide step, */
197 addc retreg,retreg,retreg /* shift last retreg bit into retreg */
198 xor,>= arg0,arg1,0 /* get correct sign of quotient */
199 sub 0,retreg,retreg /* based on operand signs */
200 MILLIRETN
201 nop
202
203LSYM(small_divisor)
204
205#if defined(CONFIG_64BIT)
206/* Clear the upper 32 bits of the arg1 register. We are working with */
207/* small divisors (and 32-bit integers) We must not be mislead */
208/* by "1" bits left in the upper 32 bits. */
209 depd %r0,31,32,%r25
210#endif
211 blr,n arg1,r0
212 nop
213/* table for divisor == 0,1, ... ,15 */
214 addit,= 0,arg1,r0 /* trap if divisor == 0 */
215 nop
216 MILLIRET /* divisor == 1 */
217 copy arg0,retreg
218 MILLI_BEN($$divI_2) /* divisor == 2 */
219 nop
220 MILLI_BEN($$divI_3) /* divisor == 3 */
221 nop
222 MILLI_BEN($$divI_4) /* divisor == 4 */
223 nop
224 MILLI_BEN($$divI_5) /* divisor == 5 */
225 nop
226 MILLI_BEN($$divI_6) /* divisor == 6 */
227 nop
228 MILLI_BEN($$divI_7) /* divisor == 7 */
229 nop
230 MILLI_BEN($$divI_8) /* divisor == 8 */
231 nop
232 MILLI_BEN($$divI_9) /* divisor == 9 */
233 nop
234 MILLI_BEN($$divI_10) /* divisor == 10 */
235 nop
236 b LREF(normal) /* divisor == 11 */
237 add,>= 0,arg0,retreg
238 MILLI_BEN($$divI_12) /* divisor == 12 */
239 nop
240 b LREF(normal) /* divisor == 13 */
241 add,>= 0,arg0,retreg
242 MILLI_BEN($$divI_14) /* divisor == 14 */
243 nop
244 MILLI_BEN($$divI_15) /* divisor == 15 */
245 nop
246
247LSYM(negative1)
248 sub 0,arg0,retreg /* result is negation of dividend */
249 MILLIRET
250 addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */
251 .exit
252 .procend
253 .end
254#endif
diff --git a/arch/parisc/lib/milli/divU.S b/arch/parisc/lib/milli/divU.S
new file mode 100644
index 000000000000..9287fe2546fa
--- /dev/null
+++ b/arch/parisc/lib/milli/divU.S
@@ -0,0 +1,235 @@
1/* 32 and 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame@debian.org>
3 and Alan Modra <alan@linuxcare.com.au>.
4
5 Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
6
7 This file is part of GCC and is released under the terms of
8 of the GNU General Public License as published by the Free Software
9 Foundation; either version 2, or (at your option) any later version.
10 See the file COPYING in the top-level GCC source directory for a copy
11 of the license. */
12
13#include "milli.h"
14
15#ifdef L_divU
16/* ROUTINE: $$divU
17 .
18 . Single precision divide for unsigned integers.
19 .
20 . Quotient is truncated towards zero.
21 . Traps on divide by zero.
22
23 INPUT REGISTERS:
24 . arg0 == dividend
25 . arg1 == divisor
26 . mrp == return pc
27 . sr0 == return space when called externally
28
29 OUTPUT REGISTERS:
30 . arg0 = undefined
31 . arg1 = undefined
32 . ret1 = quotient
33
34 OTHER REGISTERS AFFECTED:
35 . r1 = undefined
36
37 SIDE EFFECTS:
38 . Causes a trap under the following conditions:
39 . divisor is zero
40 . Changes memory at the following places:
41 . NONE
42
43 PERMISSIBLE CONTEXT:
44 . Unwindable.
45 . Does not create a stack frame.
46 . Suitable for internal or external millicode.
47 . Assumes the special millicode register conventions.
48
49 DISCUSSION:
50 . Branchs to other millicode routines using BE:
51 . $$divU_# for 3,5,6,7,9,10,12,14,15
52 .
53 . For selected small divisors calls the special divide by constant
54 . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */
55
56RDEFINE(temp,r1)
57RDEFINE(retreg,ret1) /* r29 */
58RDEFINE(temp1,arg0)
59 SUBSPA_MILLI_DIV
60 ATTR_MILLI
61 .export $$divU,millicode
62 .import $$divU_3,millicode
63 .import $$divU_5,millicode
64 .import $$divU_6,millicode
65 .import $$divU_7,millicode
66 .import $$divU_9,millicode
67 .import $$divU_10,millicode
68 .import $$divU_12,millicode
69 .import $$divU_14,millicode
70 .import $$divU_15,millicode
71 .proc
72 .callinfo millicode
73 .entry
74GSYM($$divU)
75/* The subtract is not nullified since it does no harm and can be used
76 by the two cases that branch back to "normal". */
77 ldo -1(arg1),temp /* is there at most one bit set ? */
78 and,= arg1,temp,r0 /* if so, denominator is power of 2 */
79 b LREF(regular_seq)
80 addit,= 0,arg1,0 /* trap for zero dvr */
81 copy arg0,retreg
82 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
83 extru retreg,15,16,retreg /* retreg = retreg >> 16 */
84 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
85 ldi 0xcc,temp1 /* setup 0xcc in temp1 */
86 extru,= arg1,23,8,temp /* test denominator with 0xff00 */
87 extru retreg,23,24,retreg /* retreg = retreg >> 8 */
88 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
89 ldi 0xaa,temp /* setup 0xaa in temp */
90 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
91 extru retreg,27,28,retreg /* retreg = retreg >> 4 */
92 and,= arg1,temp1,r0 /* test denominator with 0xcc */
93 extru retreg,29,30,retreg /* retreg = retreg >> 2 */
94 and,= arg1,temp,r0 /* test denominator with 0xaa */
95 extru retreg,30,31,retreg /* retreg = retreg >> 1 */
96 MILLIRETN
97 nop
98LSYM(regular_seq)
99 comib,>= 15,arg1,LREF(special_divisor)
100 subi 0,arg1,temp /* clear carry, negate the divisor */
101 ds r0,temp,r0 /* set V-bit to 1 */
102LSYM(normal)
103 add arg0,arg0,retreg /* shift msb bit into carry */
104 ds r0,arg1,temp /* 1st divide step, if no carry */
105 addc retreg,retreg,retreg /* shift retreg with/into carry */
106 ds temp,arg1,temp /* 2nd divide step */
107 addc retreg,retreg,retreg /* shift retreg with/into carry */
108 ds temp,arg1,temp /* 3rd divide step */
109 addc retreg,retreg,retreg /* shift retreg with/into carry */
110 ds temp,arg1,temp /* 4th divide step */
111 addc retreg,retreg,retreg /* shift retreg with/into carry */
112 ds temp,arg1,temp /* 5th divide step */
113 addc retreg,retreg,retreg /* shift retreg with/into carry */
114 ds temp,arg1,temp /* 6th divide step */
115 addc retreg,retreg,retreg /* shift retreg with/into carry */
116 ds temp,arg1,temp /* 7th divide step */
117 addc retreg,retreg,retreg /* shift retreg with/into carry */
118 ds temp,arg1,temp /* 8th divide step */
119 addc retreg,retreg,retreg /* shift retreg with/into carry */
120 ds temp,arg1,temp /* 9th divide step */
121 addc retreg,retreg,retreg /* shift retreg with/into carry */
122 ds temp,arg1,temp /* 10th divide step */
123 addc retreg,retreg,retreg /* shift retreg with/into carry */
124 ds temp,arg1,temp /* 11th divide step */
125 addc retreg,retreg,retreg /* shift retreg with/into carry */
126 ds temp,arg1,temp /* 12th divide step */
127 addc retreg,retreg,retreg /* shift retreg with/into carry */
128 ds temp,arg1,temp /* 13th divide step */
129 addc retreg,retreg,retreg /* shift retreg with/into carry */
130 ds temp,arg1,temp /* 14th divide step */
131 addc retreg,retreg,retreg /* shift retreg with/into carry */
132 ds temp,arg1,temp /* 15th divide step */
133 addc retreg,retreg,retreg /* shift retreg with/into carry */
134 ds temp,arg1,temp /* 16th divide step */
135 addc retreg,retreg,retreg /* shift retreg with/into carry */
136 ds temp,arg1,temp /* 17th divide step */
137 addc retreg,retreg,retreg /* shift retreg with/into carry */
138 ds temp,arg1,temp /* 18th divide step */
139 addc retreg,retreg,retreg /* shift retreg with/into carry */
140 ds temp,arg1,temp /* 19th divide step */
141 addc retreg,retreg,retreg /* shift retreg with/into carry */
142 ds temp,arg1,temp /* 20th divide step */
143 addc retreg,retreg,retreg /* shift retreg with/into carry */
144 ds temp,arg1,temp /* 21st divide step */
145 addc retreg,retreg,retreg /* shift retreg with/into carry */
146 ds temp,arg1,temp /* 22nd divide step */
147 addc retreg,retreg,retreg /* shift retreg with/into carry */
148 ds temp,arg1,temp /* 23rd divide step */
149 addc retreg,retreg,retreg /* shift retreg with/into carry */
150 ds temp,arg1,temp /* 24th divide step */
151 addc retreg,retreg,retreg /* shift retreg with/into carry */
152 ds temp,arg1,temp /* 25th divide step */
153 addc retreg,retreg,retreg /* shift retreg with/into carry */
154 ds temp,arg1,temp /* 26th divide step */
155 addc retreg,retreg,retreg /* shift retreg with/into carry */
156 ds temp,arg1,temp /* 27th divide step */
157 addc retreg,retreg,retreg /* shift retreg with/into carry */
158 ds temp,arg1,temp /* 28th divide step */
159 addc retreg,retreg,retreg /* shift retreg with/into carry */
160 ds temp,arg1,temp /* 29th divide step */
161 addc retreg,retreg,retreg /* shift retreg with/into carry */
162 ds temp,arg1,temp /* 30th divide step */
163 addc retreg,retreg,retreg /* shift retreg with/into carry */
164 ds temp,arg1,temp /* 31st divide step */
165 addc retreg,retreg,retreg /* shift retreg with/into carry */
166 ds temp,arg1,temp /* 32nd divide step, */
167 MILLIRET
168 addc retreg,retreg,retreg /* shift last retreg bit into retreg */
169
170/* Handle the cases where divisor is a small constant or has high bit on. */
171LSYM(special_divisor)
172/* blr arg1,r0 */
173/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */
174
175/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
176 generating such a blr, comib sequence. A problem in nullification. So I
177 rewrote this code. */
178
179#if defined(CONFIG_64BIT)
180/* Clear the upper 32 bits of the arg1 register. We are working with
181 small divisors (and 32-bit unsigned integers) We must not be mislead
182 by "1" bits left in the upper 32 bits. */
183 depd %r0,31,32,%r25
184#endif
185 comib,> 0,arg1,LREF(big_divisor)
186 nop
187 blr arg1,r0
188 nop
189
190LSYM(zero_divisor) /* this label is here to provide external visibility */
191 addit,= 0,arg1,0 /* trap for zero dvr */
192 nop
193 MILLIRET /* divisor == 1 */
194 copy arg0,retreg
195 MILLIRET /* divisor == 2 */
196 extru arg0,30,31,retreg
197 MILLI_BEN($$divU_3) /* divisor == 3 */
198 nop
199 MILLIRET /* divisor == 4 */
200 extru arg0,29,30,retreg
201 MILLI_BEN($$divU_5) /* divisor == 5 */
202 nop
203 MILLI_BEN($$divU_6) /* divisor == 6 */
204 nop
205 MILLI_BEN($$divU_7) /* divisor == 7 */
206 nop
207 MILLIRET /* divisor == 8 */
208 extru arg0,28,29,retreg
209 MILLI_BEN($$divU_9) /* divisor == 9 */
210 nop
211 MILLI_BEN($$divU_10) /* divisor == 10 */
212 nop
213 b LREF(normal) /* divisor == 11 */
214 ds r0,temp,r0 /* set V-bit to 1 */
215 MILLI_BEN($$divU_12) /* divisor == 12 */
216 nop
217 b LREF(normal) /* divisor == 13 */
218 ds r0,temp,r0 /* set V-bit to 1 */
219 MILLI_BEN($$divU_14) /* divisor == 14 */
220 nop
221 MILLI_BEN($$divU_15) /* divisor == 15 */
222 nop
223
224/* Handle the case where the high bit is on in the divisor.
225 Compute: if( dividend>=divisor) quotient=1; else quotient=0;
226 Note: dividend>==divisor iff dividend-divisor does not borrow
227 and not borrow iff carry. */
228LSYM(big_divisor)
229 sub arg0,arg1,r0
230 MILLIRET
231 addc r0,r0,retreg
232 .exit
233 .procend
234 .end
235#endif
diff --git a/arch/parisc/lib/milli/div_const.S b/arch/parisc/lib/milli/div_const.S
new file mode 100644
index 000000000000..dd660076e944
--- /dev/null
+++ b/arch/parisc/lib/milli/div_const.S
@@ -0,0 +1,682 @@
1/* 32 and 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame@debian.org>
3 and Alan Modra <alan@linuxcare.com.au>.
4
5 Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
6
7 This file is part of GCC and is released under the terms of
8 of the GNU General Public License as published by the Free Software
9 Foundation; either version 2, or (at your option) any later version.
10 See the file COPYING in the top-level GCC source directory for a copy
11 of the license. */
12
13#include "milli.h"
14
15#ifdef L_div_const
16/* ROUTINE: $$divI_2
17 . $$divI_3 $$divU_3
18 . $$divI_4
19 . $$divI_5 $$divU_5
20 . $$divI_6 $$divU_6
21 . $$divI_7 $$divU_7
22 . $$divI_8
23 . $$divI_9 $$divU_9
24 . $$divI_10 $$divU_10
25 .
26 . $$divI_12 $$divU_12
27 .
28 . $$divI_14 $$divU_14
29 . $$divI_15 $$divU_15
30 . $$divI_16
31 . $$divI_17 $$divU_17
32 .
33 . Divide by selected constants for single precision binary integers.
34
35 INPUT REGISTERS:
36 . arg0 == dividend
37 . mrp == return pc
38 . sr0 == return space when called externally
39
40 OUTPUT REGISTERS:
41 . arg0 = undefined
42 . arg1 = undefined
43 . ret1 = quotient
44
45 OTHER REGISTERS AFFECTED:
46 . r1 = undefined
47
48 SIDE EFFECTS:
49 . Causes a trap under the following conditions: NONE
50 . Changes memory at the following places: NONE
51
52 PERMISSIBLE CONTEXT:
53 . Unwindable.
54 . Does not create a stack frame.
55 . Suitable for internal or external millicode.
56 . Assumes the special millicode register conventions.
57
58 DISCUSSION:
59 . Calls other millicode routines using mrp: NONE
60 . Calls other millicode routines: NONE */
61
62
63/* TRUNCATED DIVISION BY SMALL INTEGERS
64
65 We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
66 (with y fixed).
67
68 Let a = floor(z/y), for some choice of z. Note that z will be
69 chosen so that division by z is cheap.
70
71 Let r be the remainder(z/y). In other words, r = z - ay.
72
73 Now, our method is to choose a value for b such that
74
75 q'(x) = floor((ax+b)/z)
76
77 is equal to q(x) over as large a range of x as possible. If the
78 two are equal over a sufficiently large range, and if it is easy to
79 form the product (ax), and it is easy to divide by z, then we can
80 perform the division much faster than the general division algorithm.
81
82 So, we want the following to be true:
83
84 . For x in the following range:
85 .
86 . ky <= x < (k+1)y
87 .
88 . implies that
89 .
90 . k <= (ax+b)/z < (k+1)
91
92 We want to determine b such that this is true for all k in the
93 range {0..K} for some maximum K.
94
95 Since (ax+b) is an increasing function of x, we can take each
96 bound separately to determine the "best" value for b.
97
98 (ax+b)/z < (k+1) implies
99
100 (a((k+1)y-1)+b < (k+1)z implies
101
102 b < a + (k+1)(z-ay) implies
103
104 b < a + (k+1)r
105
106 This needs to be true for all k in the range {0..K}. In
107 particular, it is true for k = 0 and this leads to a maximum
108 acceptable value for b.
109
110 b < a+r or b <= a+r-1
111
112 Taking the other bound, we have
113
114 k <= (ax+b)/z implies
115
116 k <= (aky+b)/z implies
117
118 k(z-ay) <= b implies
119
120 kr <= b
121
122 Clearly, the largest range for k will be achieved by maximizing b,
123 when r is not zero. When r is zero, then the simplest choice for b
124 is 0. When r is not 0, set
125
126 . b = a+r-1
127
128 Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
129 for all x in the range:
130
131 . 0 <= x < (K+1)y
132
133 We need to determine what K is. Of our two bounds,
134
135 . b < a+(k+1)r is satisfied for all k >= 0, by construction.
136
137 The other bound is
138
139 . kr <= b
140
141 This is always true if r = 0. If r is not 0 (the usual case), then
142 K = floor((a+r-1)/r), is the maximum value for k.
143
144 Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
145 answer for q(x) = floor(x/y) when x is in the range
146
147 (0,(K+1)y-1) K = floor((a+r-1)/r)
148
149 To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
150 the formula for q'(x) yields the correct value of q(x) for all x
151 representable by a single word in HPPA.
152
153 We are also constrained in that computing the product (ax), adding
154 b, and dividing by z must all be done quickly, otherwise we will be
155 better off going through the general algorithm using the DS
156 instruction, which uses approximately 70 cycles.
157
158 For each y, there is a choice of z which satisfies the constraints
159 for (K+1)y >= 2**32. We may not, however, be able to satisfy the
160 timing constraints for arbitrary y. It seems that z being equal to
161 a power of 2 or a power of 2 minus 1 is as good as we can do, since
162 it minimizes the time to do division by z. We want the choice of z
163 to also result in a value for (a) that minimizes the computation of
164 the product (ax). This is best achieved if (a) has a regular bit
165 pattern (so the multiplication can be done with shifts and adds).
166 The value of (a) also needs to be less than 2**32 so the product is
167 always guaranteed to fit in 2 words.
168
169 In actual practice, the following should be done:
170
171 1) For negative x, you should take the absolute value and remember
172 . the fact so that the result can be negated. This obviously does
173 . not apply in the unsigned case.
174 2) For even y, you should factor out the power of 2 that divides y
175 . and divide x by it. You can then proceed by dividing by the
176 . odd factor of y.
177
178 Here is a table of some odd values of y, and corresponding choices
179 for z which are "good".
180
181 y z r a (hex) max x (hex)
182
183 3 2**32 1 55555555 100000001
184 5 2**32 1 33333333 100000003
185 7 2**24-1 0 249249 (infinite)
186 9 2**24-1 0 1c71c7 (infinite)
187 11 2**20-1 0 1745d (infinite)
188 13 2**24-1 0 13b13b (infinite)
189 15 2**32 1 11111111 10000000d
190 17 2**32 1 f0f0f0f 10000000f
191
192 If r is 1, then b = a+r-1 = a. This simplifies the computation
193 of (ax+b), since you can compute (x+1)(a) instead. If r is 0,
194 then b = 0 is ok to use which simplifies (ax+b).
195
196 The bit patterns for 55555555, 33333333, and 11111111 are obviously
197 very regular. The bit patterns for the other values of a above are:
198
199 y (hex) (binary)
200
201 7 249249 001001001001001001001001 << regular >>
202 9 1c71c7 000111000111000111000111 << regular >>
203 11 1745d 000000010111010001011101 << irregular >>
204 13 13b13b 000100111011000100111011 << irregular >>
205
206 The bit patterns for (a) corresponding to (y) of 11 and 13 may be
207 too irregular to warrant using this method.
208
209 When z is a power of 2 minus 1, then the division by z is slightly
210 more complicated, involving an iterative solution.
211
212 The code presented here solves division by 1 through 17, except for
213 11 and 13. There are algorithms for both signed and unsigned
214 quantities given.
215
216 TIMINGS (cycles)
217
218 divisor positive negative unsigned
219
220 . 1 2 2 2
221 . 2 4 4 2
222 . 3 19 21 19
223 . 4 4 4 2
224 . 5 18 22 19
225 . 6 19 22 19
226 . 8 4 4 2
227 . 10 18 19 17
228 . 12 18 20 18
229 . 15 16 18 16
230 . 16 4 4 2
231 . 17 16 18 16
232
233 Now, the algorithm for 7, 9, and 14 is an iterative one. That is,
234 a loop body is executed until the tentative quotient is 0. The
235 number of times the loop body is executed varies depending on the
236 dividend, but is never more than two times. If the dividend is
237 less than the divisor, then the loop body is not executed at all.
238 Each iteration adds 4 cycles to the timings.
239
240 divisor positive negative unsigned
241
242 . 7 19+4n 20+4n 20+4n n = number of iterations
243 . 9 21+4n 22+4n 21+4n
244 . 14 21+4n 22+4n 20+4n
245
246 To give an idea of how the number of iterations varies, here is a
247 table of dividend versus number of iterations when dividing by 7.
248
249 smallest largest required
250 dividend dividend iterations
251
252 . 0 6 0
253 . 7 0x6ffffff 1
254 0x1000006 0xffffffff 2
255
256 There is some overlap in the range of numbers requiring 1 and 2
257 iterations. */
258
259RDEFINE(t2,r1)
260RDEFINE(x2,arg0) /* r26 */
261RDEFINE(t1,arg1) /* r25 */
262RDEFINE(x1,ret1) /* r29 */
263
264 SUBSPA_MILLI_DIV
265 ATTR_MILLI
266
267 .proc
268 .callinfo millicode
269 .entry
270/* NONE of these routines require a stack frame
271 ALL of these routines are unwindable from millicode */
272
273GSYM($$divide_by_constant)
274 .export $$divide_by_constant,millicode
275/* Provides a "nice" label for the code covered by the unwind descriptor
276 for things like gprof. */
277
278/* DIVISION BY 2 (shift by 1) */
279GSYM($$divI_2)
280 .export $$divI_2,millicode
281 comclr,>= arg0,0,0
282 addi 1,arg0,arg0
283 MILLIRET
284 extrs arg0,30,31,ret1
285
286
287/* DIVISION BY 4 (shift by 2) */
288GSYM($$divI_4)
289 .export $$divI_4,millicode
290 comclr,>= arg0,0,0
291 addi 3,arg0,arg0
292 MILLIRET
293 extrs arg0,29,30,ret1
294
295
296/* DIVISION BY 8 (shift by 3) */
297GSYM($$divI_8)
298 .export $$divI_8,millicode
299 comclr,>= arg0,0,0
300 addi 7,arg0,arg0
301 MILLIRET
302 extrs arg0,28,29,ret1
303
304/* DIVISION BY 16 (shift by 4) */
305GSYM($$divI_16)
306 .export $$divI_16,millicode
307 comclr,>= arg0,0,0
308 addi 15,arg0,arg0
309 MILLIRET
310 extrs arg0,27,28,ret1
311
312/****************************************************************************
313*
314* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
315*
316* includes 3,5,15,17 and also 6,10,12
317*
318****************************************************************************/
319
320/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
321
322GSYM($$divI_3)
323 .export $$divI_3,millicode
324 comb,<,N x2,0,LREF(neg3)
325
326 addi 1,x2,x2 /* this cannot overflow */
327 extru x2,1,2,x1 /* multiply by 5 to get started */
328 sh2add x2,x2,x2
329 b LREF(pos)
330 addc x1,0,x1
331
332LSYM(neg3)
333 subi 1,x2,x2 /* this cannot overflow */
334 extru x2,1,2,x1 /* multiply by 5 to get started */
335 sh2add x2,x2,x2
336 b LREF(neg)
337 addc x1,0,x1
338
339GSYM($$divU_3)
340 .export $$divU_3,millicode
341 addi 1,x2,x2 /* this CAN overflow */
342 addc 0,0,x1
343 shd x1,x2,30,t1 /* multiply by 5 to get started */
344 sh2add x2,x2,x2
345 b LREF(pos)
346 addc x1,t1,x1
347
348/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
349
350GSYM($$divI_5)
351 .export $$divI_5,millicode
352 comb,<,N x2,0,LREF(neg5)
353
354 addi 3,x2,t1 /* this cannot overflow */
355 sh1add x2,t1,x2 /* multiply by 3 to get started */
356 b LREF(pos)
357 addc 0,0,x1
358
359LSYM(neg5)
360 sub 0,x2,x2 /* negate x2 */
361 addi 1,x2,x2 /* this cannot overflow */
362 shd 0,x2,31,x1 /* get top bit (can be 1) */
363 sh1add x2,x2,x2 /* multiply by 3 to get started */
364 b LREF(neg)
365 addc x1,0,x1
366
367GSYM($$divU_5)
368 .export $$divU_5,millicode
369 addi 1,x2,x2 /* this CAN overflow */
370 addc 0,0,x1
371 shd x1,x2,31,t1 /* multiply by 3 to get started */
372 sh1add x2,x2,x2
373 b LREF(pos)
374 addc t1,x1,x1
375
376/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
377GSYM($$divI_6)
378 .export $$divI_6,millicode
379 comb,<,N x2,0,LREF(neg6)
380 extru x2,30,31,x2 /* divide by 2 */
381 addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */
382 sh2add x2,t1,x2 /* multiply by 5 to get started */
383 b LREF(pos)
384 addc 0,0,x1
385
386LSYM(neg6)
387 subi 2,x2,x2 /* negate, divide by 2, and add 1 */
388 /* negation and adding 1 are done */
389 /* at the same time by the SUBI */
390 extru x2,30,31,x2
391 shd 0,x2,30,x1
392 sh2add x2,x2,x2 /* multiply by 5 to get started */
393 b LREF(neg)
394 addc x1,0,x1
395
396GSYM($$divU_6)
397 .export $$divU_6,millicode
398 extru x2,30,31,x2 /* divide by 2 */
399 addi 1,x2,x2 /* cannot carry */
400 shd 0,x2,30,x1 /* multiply by 5 to get started */
401 sh2add x2,x2,x2
402 b LREF(pos)
403 addc x1,0,x1
404
405/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
406GSYM($$divU_10)
407 .export $$divU_10,millicode
408 extru x2,30,31,x2 /* divide by 2 */
409 addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */
410 sh1add x2,t1,x2 /* multiply by 3 to get started */
411 addc 0,0,x1
412LSYM(pos)
413 shd x1,x2,28,t1 /* multiply by 0x11 */
414 shd x2,0,28,t2
415 add x2,t2,x2
416 addc x1,t1,x1
417LSYM(pos_for_17)
418 shd x1,x2,24,t1 /* multiply by 0x101 */
419 shd x2,0,24,t2
420 add x2,t2,x2
421 addc x1,t1,x1
422
423 shd x1,x2,16,t1 /* multiply by 0x10001 */
424 shd x2,0,16,t2
425 add x2,t2,x2
426 MILLIRET
427 addc x1,t1,x1
428
429GSYM($$divI_10)
430 .export $$divI_10,millicode
431 comb,< x2,0,LREF(neg10)
432 copy 0,x1
433 extru x2,30,31,x2 /* divide by 2 */
434 addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */
435 sh1add x2,x2,x2 /* multiply by 3 to get started */
436
437LSYM(neg10)
438 subi 2,x2,x2 /* negate, divide by 2, and add 1 */
439 /* negation and adding 1 are done */
440 /* at the same time by the SUBI */
441 extru x2,30,31,x2
442 sh1add x2,x2,x2 /* multiply by 3 to get started */
443LSYM(neg)
444 shd x1,x2,28,t1 /* multiply by 0x11 */
445 shd x2,0,28,t2
446 add x2,t2,x2
447 addc x1,t1,x1
448LSYM(neg_for_17)
449 shd x1,x2,24,t1 /* multiply by 0x101 */
450 shd x2,0,24,t2
451 add x2,t2,x2
452 addc x1,t1,x1
453
454 shd x1,x2,16,t1 /* multiply by 0x10001 */
455 shd x2,0,16,t2
456 add x2,t2,x2
457 addc x1,t1,x1
458 MILLIRET
459 sub 0,x1,x1
460
461/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
462GSYM($$divI_12)
463 .export $$divI_12,millicode
464 comb,< x2,0,LREF(neg12)
465 copy 0,x1
466 extru x2,29,30,x2 /* divide by 4 */
467 addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */
468 sh2add x2,x2,x2 /* multiply by 5 to get started */
469
470LSYM(neg12)
471 subi 4,x2,x2 /* negate, divide by 4, and add 1 */
472 /* negation and adding 1 are done */
473 /* at the same time by the SUBI */
474 extru x2,29,30,x2
475 b LREF(neg)
476 sh2add x2,x2,x2 /* multiply by 5 to get started */
477
478GSYM($$divU_12)
479 .export $$divU_12,millicode
480 extru x2,29,30,x2 /* divide by 4 */
481 addi 5,x2,t1 /* cannot carry */
482 sh2add x2,t1,x2 /* multiply by 5 to get started */
483 b LREF(pos)
484 addc 0,0,x1
485
486/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
487GSYM($$divI_15)
488 .export $$divI_15,millicode
489 comb,< x2,0,LREF(neg15)
490 copy 0,x1
491 addib,tr 1,x2,LREF(pos)+4
492 shd x1,x2,28,t1
493
494LSYM(neg15)
495 b LREF(neg)
496 subi 1,x2,x2
497
498GSYM($$divU_15)
499 .export $$divU_15,millicode
500 addi 1,x2,x2 /* this CAN overflow */
501 b LREF(pos)
502 addc 0,0,x1
503
504/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */
505GSYM($$divI_17)
506 .export $$divI_17,millicode
507 comb,<,n x2,0,LREF(neg17)
508 addi 1,x2,x2 /* this cannot overflow */
509 shd 0,x2,28,t1 /* multiply by 0xf to get started */
510 shd x2,0,28,t2
511 sub t2,x2,x2
512 b LREF(pos_for_17)
513 subb t1,0,x1
514
515LSYM(neg17)
516 subi 1,x2,x2 /* this cannot overflow */
517 shd 0,x2,28,t1 /* multiply by 0xf to get started */
518 shd x2,0,28,t2
519 sub t2,x2,x2
520 b LREF(neg_for_17)
521 subb t1,0,x1
522
523GSYM($$divU_17)
524 .export $$divU_17,millicode
525 addi 1,x2,x2 /* this CAN overflow */
526 addc 0,0,x1
527 shd x1,x2,28,t1 /* multiply by 0xf to get started */
528LSYM(u17)
529 shd x2,0,28,t2
530 sub t2,x2,x2
531 b LREF(pos_for_17)
532 subb t1,x1,x1
533
534
535/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
536 includes 7,9 and also 14
537
538
539 z = 2**24-1
540 r = z mod x = 0
541
542 so choose b = 0
543
544 Also, in order to divide by z = 2**24-1, we approximate by dividing
545 by (z+1) = 2**24 (which is easy), and then correcting.
546
547 (ax) = (z+1)q' + r
548 . = zq' + (q'+r)
549
550 So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
551 Then the true remainder of (ax)/z is (q'+r). Repeat the process
552 with this new remainder, adding the tentative quotients together,
553 until a tentative quotient is 0 (and then we are done). There is
554 one last correction to be done. It is possible that (q'+r) = z.
555 If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
556 in fact, we need to add 1 more to the quotient. Now, it turns
557 out that this happens if and only if the original value x is
558 an exact multiple of y. So, to avoid a three instruction test at
559 the end, instead use 1 instruction to add 1 to x at the beginning. */
560
561/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
562GSYM($$divI_7)
563 .export $$divI_7,millicode
564 comb,<,n x2,0,LREF(neg7)
565LSYM(7)
566 addi 1,x2,x2 /* cannot overflow */
567 shd 0,x2,29,x1
568 sh3add x2,x2,x2
569 addc x1,0,x1
570LSYM(pos7)
571 shd x1,x2,26,t1
572 shd x2,0,26,t2
573 add x2,t2,x2
574 addc x1,t1,x1
575
576 shd x1,x2,20,t1
577 shd x2,0,20,t2
578 add x2,t2,x2
579 addc x1,t1,t1
580
581 /* computed <t1,x2>. Now divide it by (2**24 - 1) */
582
583 copy 0,x1
584 shd,= t1,x2,24,t1 /* tentative quotient */
585LSYM(1)
586 addb,tr t1,x1,LREF(2) /* add to previous quotient */
587 extru x2,31,24,x2 /* new remainder (unadjusted) */
588
589 MILLIRETN
590
591LSYM(2)
592 addb,tr t1,x2,LREF(1) /* adjust remainder */
593 extru,= x2,7,8,t1 /* new quotient */
594
595LSYM(neg7)
596 subi 1,x2,x2 /* negate x2 and add 1 */
597LSYM(8)
598 shd 0,x2,29,x1
599 sh3add x2,x2,x2
600 addc x1,0,x1
601
602LSYM(neg7_shift)
603 shd x1,x2,26,t1
604 shd x2,0,26,t2
605 add x2,t2,x2
606 addc x1,t1,x1
607
608 shd x1,x2,20,t1
609 shd x2,0,20,t2
610 add x2,t2,x2
611 addc x1,t1,t1
612
613 /* computed <t1,x2>. Now divide it by (2**24 - 1) */
614
615 copy 0,x1
616 shd,= t1,x2,24,t1 /* tentative quotient */
617LSYM(3)
618 addb,tr t1,x1,LREF(4) /* add to previous quotient */
619 extru x2,31,24,x2 /* new remainder (unadjusted) */
620
621 MILLIRET
622 sub 0,x1,x1 /* negate result */
623
624LSYM(4)
625 addb,tr t1,x2,LREF(3) /* adjust remainder */
626 extru,= x2,7,8,t1 /* new quotient */
627
628GSYM($$divU_7)
629 .export $$divU_7,millicode
630 addi 1,x2,x2 /* can carry */
631 addc 0,0,x1
632 shd x1,x2,29,t1
633 sh3add x2,x2,x2
634 b LREF(pos7)
635 addc t1,x1,x1
636
637/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
638GSYM($$divI_9)
639 .export $$divI_9,millicode
640 comb,<,n x2,0,LREF(neg9)
641 addi 1,x2,x2 /* cannot overflow */
642 shd 0,x2,29,t1
643 shd x2,0,29,t2
644 sub t2,x2,x2
645 b LREF(pos7)
646 subb t1,0,x1
647
648LSYM(neg9)
649 subi 1,x2,x2 /* negate and add 1 */
650 shd 0,x2,29,t1
651 shd x2,0,29,t2
652 sub t2,x2,x2
653 b LREF(neg7_shift)
654 subb t1,0,x1
655
656GSYM($$divU_9)
657 .export $$divU_9,millicode
658 addi 1,x2,x2 /* can carry */
659 addc 0,0,x1
660 shd x1,x2,29,t1
661 shd x2,0,29,t2
662 sub t2,x2,x2
663 b LREF(pos7)
664 subb t1,x1,x1
665
666/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
667GSYM($$divI_14)
668 .export $$divI_14,millicode
669 comb,<,n x2,0,LREF(neg14)
670GSYM($$divU_14)
671 .export $$divU_14,millicode
672 b LREF(7) /* go to 7 case */
673 extru x2,30,31,x2 /* divide by 2 */
674
675LSYM(neg14)
676 subi 2,x2,x2 /* negate (and add 2) */
677 b LREF(8)
678 extru x2,30,31,x2 /* divide by 2 */
679 .exit
680 .procend
681 .end
682#endif
diff --git a/arch/parisc/lib/milli/dyncall.S b/arch/parisc/lib/milli/dyncall.S
new file mode 100644
index 000000000000..27f9ca558d0a
--- /dev/null
+++ b/arch/parisc/lib/milli/dyncall.S
@@ -0,0 +1,32 @@
1/* 32 and 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame@debian.org>
3 and Alan Modra <alan@linuxcare.com.au>.
4
5 Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
6
7 This file is part of GCC and is released under the terms of
8 of the GNU General Public License as published by the Free Software
9 Foundation; either version 2, or (at your option) any later version.
10 See the file COPYING in the top-level GCC source directory for a copy
11 of the license. */
12
13#include "milli.h"
14
15#ifdef L_dyncall
16 SUBSPA_MILLI
17 ATTR_DATA
18GSYM($$dyncall)
19 .export $$dyncall,millicode
20 .proc
21 .callinfo millicode
22 .entry
23 bb,>=,n %r22,30,LREF(1) ; branch if not plabel address
24 depi 0,31,2,%r22 ; clear the two least significant bits
25 ldw 4(%r22),%r19 ; load new LTP value
26 ldw 0(%r22),%r22 ; load address of target
27LSYM(1)
28 bv %r0(%r22) ; branch to the real target
29 stw %r2,-24(%r30) ; save return address into frame marker
30 .exit
31 .procend
32#endif
diff --git a/arch/parisc/lib/milli/milli.S b/arch/parisc/lib/milli/milli.S
new file mode 100644
index 000000000000..47c6cde712e3
--- /dev/null
+++ b/arch/parisc/lib/milli/milli.S
@@ -0,0 +1,2071 @@
1/* 32 and 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame@debian.org>
3 and Alan Modra <alan@linuxcare.com.au>.
4
5 Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
6
7 This file is part of GCC and is released under the terms of
8 of the GNU General Public License as published by the Free Software
9 Foundation; either version 2, or (at your option) any later version.
10 See the file COPYING in the top-level GCC source directory for a copy
11 of the license. */
12
13#ifdef CONFIG_64BIT
14 .level 2.0w
15#endif
16
17/* Hardware General Registers. */
18r0: .reg %r0
19r1: .reg %r1
20r2: .reg %r2
21r3: .reg %r3
22r4: .reg %r4
23r5: .reg %r5
24r6: .reg %r6
25r7: .reg %r7
26r8: .reg %r8
27r9: .reg %r9
28r10: .reg %r10
29r11: .reg %r11
30r12: .reg %r12
31r13: .reg %r13
32r14: .reg %r14
33r15: .reg %r15
34r16: .reg %r16
35r17: .reg %r17
36r18: .reg %r18
37r19: .reg %r19
38r20: .reg %r20
39r21: .reg %r21
40r22: .reg %r22
41r23: .reg %r23
42r24: .reg %r24
43r25: .reg %r25
44r26: .reg %r26
45r27: .reg %r27
46r28: .reg %r28
47r29: .reg %r29
48r30: .reg %r30
49r31: .reg %r31
50
51/* Hardware Space Registers. */
52sr0: .reg %sr0
53sr1: .reg %sr1
54sr2: .reg %sr2
55sr3: .reg %sr3
56sr4: .reg %sr4
57sr5: .reg %sr5
58sr6: .reg %sr6
59sr7: .reg %sr7
60
61/* Hardware Floating Point Registers. */
62fr0: .reg %fr0
63fr1: .reg %fr1
64fr2: .reg %fr2
65fr3: .reg %fr3
66fr4: .reg %fr4
67fr5: .reg %fr5
68fr6: .reg %fr6
69fr7: .reg %fr7
70fr8: .reg %fr8
71fr9: .reg %fr9
72fr10: .reg %fr10
73fr11: .reg %fr11
74fr12: .reg %fr12
75fr13: .reg %fr13
76fr14: .reg %fr14
77fr15: .reg %fr15
78
79/* Hardware Control Registers. */
80cr11: .reg %cr11
81sar: .reg %cr11 /* Shift Amount Register */
82
83/* Software Architecture General Registers. */
84rp: .reg r2 /* return pointer */
85#ifdef CONFIG_64BIT
86mrp: .reg r2 /* millicode return pointer */
87#else
88mrp: .reg r31 /* millicode return pointer */
89#endif
90ret0: .reg r28 /* return value */
91ret1: .reg r29 /* return value (high part of double) */
92sp: .reg r30 /* stack pointer */
93dp: .reg r27 /* data pointer */
94arg0: .reg r26 /* argument */
95arg1: .reg r25 /* argument or high part of double argument */
96arg2: .reg r24 /* argument */
97arg3: .reg r23 /* argument or high part of double argument */
98
99/* Software Architecture Space Registers. */
100/* sr0 ; return link from BLE */
101sret: .reg sr1 /* return value */
102sarg: .reg sr1 /* argument */
103/* sr4 ; PC SPACE tracker */
104/* sr5 ; process private data */
105
106/* Frame Offsets (millicode convention!) Used when calling other
107 millicode routines. Stack unwinding is dependent upon these
108 definitions. */
109r31_slot: .equ -20 /* "current RP" slot */
110sr0_slot: .equ -16 /* "static link" slot */
111#if defined(CONFIG_64BIT)
112mrp_slot: .equ -16 /* "current RP" slot */
113psp_slot: .equ -8 /* "previous SP" slot */
114#else
115mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */
116#endif
117
118
119#define DEFINE(name,value)name: .EQU value
120#define RDEFINE(name,value)name: .REG value
121#ifdef milliext
122#define MILLI_BE(lbl) BE lbl(sr7,r0)
123#define MILLI_BEN(lbl) BE,n lbl(sr7,r0)
124#define MILLI_BLE(lbl) BLE lbl(sr7,r0)
125#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0)
126#define MILLIRETN BE,n 0(sr0,mrp)
127#define MILLIRET BE 0(sr0,mrp)
128#define MILLI_RETN BE,n 0(sr0,mrp)
129#define MILLI_RET BE 0(sr0,mrp)
130#else
131#define MILLI_BE(lbl) B lbl
132#define MILLI_BEN(lbl) B,n lbl
133#define MILLI_BLE(lbl) BL lbl,mrp
134#define MILLI_BLEN(lbl) BL,n lbl,mrp
135#define MILLIRETN BV,n 0(mrp)
136#define MILLIRET BV 0(mrp)
137#define MILLI_RETN BV,n 0(mrp)
138#define MILLI_RET BV 0(mrp)
139#endif
140
141#define CAT(a,b) a##b
142
143#define SUBSPA_MILLI .section .text
144#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
145#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
146#define ATTR_MILLI
147#define SUBSPA_DATA .section .data
148#define ATTR_DATA
149#define GLOBAL $global$
150#define GSYM(sym) !sym:
151#define LSYM(sym) !CAT(.L,sym:)
152#define LREF(sym) CAT(.L,sym)
153
154#ifdef L_dyncall
155 SUBSPA_MILLI
156 ATTR_DATA
157GSYM($$dyncall)
158 .export $$dyncall,millicode
159 .proc
160 .callinfo millicode
161 .entry
162 bb,>=,n %r22,30,LREF(1) ; branch if not plabel address
163 depi 0,31,2,%r22 ; clear the two least significant bits
164 ldw 4(%r22),%r19 ; load new LTP value
165 ldw 0(%r22),%r22 ; load address of target
166LSYM(1)
167 bv %r0(%r22) ; branch to the real target
168 stw %r2,-24(%r30) ; save return address into frame marker
169 .exit
170 .procend
171#endif
172
173#ifdef L_divI
174/* ROUTINES: $$divI, $$divoI
175
176 Single precision divide for signed binary integers.
177
178 The quotient is truncated towards zero.
179 The sign of the quotient is the XOR of the signs of the dividend and
180 divisor.
181 Divide by zero is trapped.
182 Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
183
184 INPUT REGISTERS:
185 . arg0 == dividend
186 . arg1 == divisor
187 . mrp == return pc
188 . sr0 == return space when called externally
189
190 OUTPUT REGISTERS:
191 . arg0 = undefined
192 . arg1 = undefined
193 . ret1 = quotient
194
195 OTHER REGISTERS AFFECTED:
196 . r1 = undefined
197
198 SIDE EFFECTS:
199 . Causes a trap under the following conditions:
200 . divisor is zero (traps with ADDIT,= 0,25,0)
201 . dividend==-2**31 and divisor==-1 and routine is $$divoI
202 . (traps with ADDO 26,25,0)
203 . Changes memory at the following places:
204 . NONE
205
206 PERMISSIBLE CONTEXT:
207 . Unwindable.
208 . Suitable for internal or external millicode.
209 . Assumes the special millicode register conventions.
210
211 DISCUSSION:
212 . Branchs to other millicode routines using BE
213 . $$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
214 .
215 . For selected divisors, calls a divide by constant routine written by
216 . Karl Pettis. Eligible divisors are 1..15 excluding 11 and 13.
217 .
218 . The only overflow case is -2**31 divided by -1.
219 . Both routines return -2**31 but only $$divoI traps. */
220
221RDEFINE(temp,r1)
222RDEFINE(retreg,ret1) /* r29 */
223RDEFINE(temp1,arg0)
224 SUBSPA_MILLI_DIV
225 ATTR_MILLI
226 .import $$divI_2,millicode
227 .import $$divI_3,millicode
228 .import $$divI_4,millicode
229 .import $$divI_5,millicode
230 .import $$divI_6,millicode
231 .import $$divI_7,millicode
232 .import $$divI_8,millicode
233 .import $$divI_9,millicode
234 .import $$divI_10,millicode
235 .import $$divI_12,millicode
236 .import $$divI_14,millicode
237 .import $$divI_15,millicode
238 .export $$divI,millicode
239 .export $$divoI,millicode
240 .proc
241 .callinfo millicode
242 .entry
243GSYM($$divoI)
244 comib,=,n -1,arg1,LREF(negative1) /* when divisor == -1 */
245GSYM($$divI)
246 ldo -1(arg1),temp /* is there at most one bit set ? */
247 and,<> arg1,temp,r0 /* if not, don't use power of 2 divide */
248 addi,> 0,arg1,r0 /* if divisor > 0, use power of 2 divide */
249 b,n LREF(neg_denom)
250LSYM(pow2)
251 addi,>= 0,arg0,retreg /* if numerator is negative, add the */
252 add arg0,temp,retreg /* (denominaotr -1) to correct for shifts */
253 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
254 extrs retreg,15,16,retreg /* retreg = retreg >> 16 */
255 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
256 ldi 0xcc,temp1 /* setup 0xcc in temp1 */
257 extru,= arg1,23,8,temp /* test denominator with 0xff00 */
258 extrs retreg,23,24,retreg /* retreg = retreg >> 8 */
259 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
260 ldi 0xaa,temp /* setup 0xaa in temp */
261 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
262 extrs retreg,27,28,retreg /* retreg = retreg >> 4 */
263 and,= arg1,temp1,r0 /* test denominator with 0xcc */
264 extrs retreg,29,30,retreg /* retreg = retreg >> 2 */
265 and,= arg1,temp,r0 /* test denominator with 0xaa */
266 extrs retreg,30,31,retreg /* retreg = retreg >> 1 */
267 MILLIRETN
268LSYM(neg_denom)
269 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power of 2 */
270 b,n LREF(regular_seq)
271 sub r0,arg1,temp /* make denominator positive */
272 comb,=,n arg1,temp,LREF(regular_seq) /* test against 0x80000000 and 0 */
273 ldo -1(temp),retreg /* is there at most one bit set ? */
274 and,= temp,retreg,r0 /* if so, the denominator is power of 2 */
275 b,n LREF(regular_seq)
276 sub r0,arg0,retreg /* negate numerator */
277 comb,=,n arg0,retreg,LREF(regular_seq) /* test against 0x80000000 */
278 copy retreg,arg0 /* set up arg0, arg1 and temp */
279 copy temp,arg1 /* before branching to pow2 */
280 b LREF(pow2)
281 ldo -1(arg1),temp
282LSYM(regular_seq)
283 comib,>>=,n 15,arg1,LREF(small_divisor)
284 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
285LSYM(normal)
286 subi 0,retreg,retreg /* make it positive */
287 sub 0,arg1,temp /* clear carry, */
288 /* negate the divisor */
289 ds 0,temp,0 /* set V-bit to the comple- */
290 /* ment of the divisor sign */
291 add retreg,retreg,retreg /* shift msb bit into carry */
292 ds r0,arg1,temp /* 1st divide step, if no carry */
293 addc retreg,retreg,retreg /* shift retreg with/into carry */
294 ds temp,arg1,temp /* 2nd divide step */
295 addc retreg,retreg,retreg /* shift retreg with/into carry */
296 ds temp,arg1,temp /* 3rd divide step */
297 addc retreg,retreg,retreg /* shift retreg with/into carry */
298 ds temp,arg1,temp /* 4th divide step */
299 addc retreg,retreg,retreg /* shift retreg with/into carry */
300 ds temp,arg1,temp /* 5th divide step */
301 addc retreg,retreg,retreg /* shift retreg with/into carry */
302 ds temp,arg1,temp /* 6th divide step */
303 addc retreg,retreg,retreg /* shift retreg with/into carry */
304 ds temp,arg1,temp /* 7th divide step */
305 addc retreg,retreg,retreg /* shift retreg with/into carry */
306 ds temp,arg1,temp /* 8th divide step */
307 addc retreg,retreg,retreg /* shift retreg with/into carry */
308 ds temp,arg1,temp /* 9th divide step */
309 addc retreg,retreg,retreg /* shift retreg with/into carry */
310 ds temp,arg1,temp /* 10th divide step */
311 addc retreg,retreg,retreg /* shift retreg with/into carry */
312 ds temp,arg1,temp /* 11th divide step */
313 addc retreg,retreg,retreg /* shift retreg with/into carry */
314 ds temp,arg1,temp /* 12th divide step */
315 addc retreg,retreg,retreg /* shift retreg with/into carry */
316 ds temp,arg1,temp /* 13th divide step */
317 addc retreg,retreg,retreg /* shift retreg with/into carry */
318 ds temp,arg1,temp /* 14th divide step */
319 addc retreg,retreg,retreg /* shift retreg with/into carry */
320 ds temp,arg1,temp /* 15th divide step */
321 addc retreg,retreg,retreg /* shift retreg with/into carry */
322 ds temp,arg1,temp /* 16th divide step */
323 addc retreg,retreg,retreg /* shift retreg with/into carry */
324 ds temp,arg1,temp /* 17th divide step */
325 addc retreg,retreg,retreg /* shift retreg with/into carry */
326 ds temp,arg1,temp /* 18th divide step */
327 addc retreg,retreg,retreg /* shift retreg with/into carry */
328 ds temp,arg1,temp /* 19th divide step */
329 addc retreg,retreg,retreg /* shift retreg with/into carry */
330 ds temp,arg1,temp /* 20th divide step */
331 addc retreg,retreg,retreg /* shift retreg with/into carry */
332 ds temp,arg1,temp /* 21st divide step */
333 addc retreg,retreg,retreg /* shift retreg with/into carry */
334 ds temp,arg1,temp /* 22nd divide step */
335 addc retreg,retreg,retreg /* shift retreg with/into carry */
336 ds temp,arg1,temp /* 23rd divide step */
337 addc retreg,retreg,retreg /* shift retreg with/into carry */
338 ds temp,arg1,temp /* 24th divide step */
339 addc retreg,retreg,retreg /* shift retreg with/into carry */
340 ds temp,arg1,temp /* 25th divide step */
341 addc retreg,retreg,retreg /* shift retreg with/into carry */
342 ds temp,arg1,temp /* 26th divide step */
343 addc retreg,retreg,retreg /* shift retreg with/into carry */
344 ds temp,arg1,temp /* 27th divide step */
345 addc retreg,retreg,retreg /* shift retreg with/into carry */
346 ds temp,arg1,temp /* 28th divide step */
347 addc retreg,retreg,retreg /* shift retreg with/into carry */
348 ds temp,arg1,temp /* 29th divide step */
349 addc retreg,retreg,retreg /* shift retreg with/into carry */
350 ds temp,arg1,temp /* 30th divide step */
351 addc retreg,retreg,retreg /* shift retreg with/into carry */
352 ds temp,arg1,temp /* 31st divide step */
353 addc retreg,retreg,retreg /* shift retreg with/into carry */
354 ds temp,arg1,temp /* 32nd divide step, */
355 addc retreg,retreg,retreg /* shift last retreg bit into retreg */
356 xor,>= arg0,arg1,0 /* get correct sign of quotient */
357 sub 0,retreg,retreg /* based on operand signs */
358 MILLIRETN
359 nop
360
361LSYM(small_divisor)
362
363#if defined(CONFIG_64BIT)
364/* Clear the upper 32 bits of the arg1 register. We are working with */
365/* small divisors (and 32-bit integers) We must not be mislead */
366/* by "1" bits left in the upper 32 bits. */
367 depd %r0,31,32,%r25
368#endif
369 blr,n arg1,r0
370 nop
371/* table for divisor == 0,1, ... ,15 */
372 addit,= 0,arg1,r0 /* trap if divisor == 0 */
373 nop
374 MILLIRET /* divisor == 1 */
375 copy arg0,retreg
376 MILLI_BEN($$divI_2) /* divisor == 2 */
377 nop
378 MILLI_BEN($$divI_3) /* divisor == 3 */
379 nop
380 MILLI_BEN($$divI_4) /* divisor == 4 */
381 nop
382 MILLI_BEN($$divI_5) /* divisor == 5 */
383 nop
384 MILLI_BEN($$divI_6) /* divisor == 6 */
385 nop
386 MILLI_BEN($$divI_7) /* divisor == 7 */
387 nop
388 MILLI_BEN($$divI_8) /* divisor == 8 */
389 nop
390 MILLI_BEN($$divI_9) /* divisor == 9 */
391 nop
392 MILLI_BEN($$divI_10) /* divisor == 10 */
393 nop
394 b LREF(normal) /* divisor == 11 */
395 add,>= 0,arg0,retreg
396 MILLI_BEN($$divI_12) /* divisor == 12 */
397 nop
398 b LREF(normal) /* divisor == 13 */
399 add,>= 0,arg0,retreg
400 MILLI_BEN($$divI_14) /* divisor == 14 */
401 nop
402 MILLI_BEN($$divI_15) /* divisor == 15 */
403 nop
404
405LSYM(negative1)
406 sub 0,arg0,retreg /* result is negation of dividend */
407 MILLIRET
408 addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */
409 .exit
410 .procend
411 .end
412#endif
413
414#ifdef L_divU
415/* ROUTINE: $$divU
416 .
417 . Single precision divide for unsigned integers.
418 .
419 . Quotient is truncated towards zero.
420 . Traps on divide by zero.
421
422 INPUT REGISTERS:
423 . arg0 == dividend
424 . arg1 == divisor
425 . mrp == return pc
426 . sr0 == return space when called externally
427
428 OUTPUT REGISTERS:
429 . arg0 = undefined
430 . arg1 = undefined
431 . ret1 = quotient
432
433 OTHER REGISTERS AFFECTED:
434 . r1 = undefined
435
436 SIDE EFFECTS:
437 . Causes a trap under the following conditions:
438 . divisor is zero
439 . Changes memory at the following places:
440 . NONE
441
442 PERMISSIBLE CONTEXT:
443 . Unwindable.
444 . Does not create a stack frame.
445 . Suitable for internal or external millicode.
446 . Assumes the special millicode register conventions.
447
448 DISCUSSION:
449 . Branchs to other millicode routines using BE:
450 . $$divU_# for 3,5,6,7,9,10,12,14,15
451 .
452 . For selected small divisors calls the special divide by constant
453 . routines written by Karl Pettis. These are: 3,5,6,7,9,10,12,14,15. */
454
455RDEFINE(temp,r1)
456RDEFINE(retreg,ret1) /* r29 */
457RDEFINE(temp1,arg0)
458 SUBSPA_MILLI_DIV
459 ATTR_MILLI
460 .export $$divU,millicode
461 .import $$divU_3,millicode
462 .import $$divU_5,millicode
463 .import $$divU_6,millicode
464 .import $$divU_7,millicode
465 .import $$divU_9,millicode
466 .import $$divU_10,millicode
467 .import $$divU_12,millicode
468 .import $$divU_14,millicode
469 .import $$divU_15,millicode
470 .proc
471 .callinfo millicode
472 .entry
473GSYM($$divU)
474/* The subtract is not nullified since it does no harm and can be used
475 by the two cases that branch back to "normal". */
476 ldo -1(arg1),temp /* is there at most one bit set ? */
477 and,= arg1,temp,r0 /* if so, denominator is power of 2 */
478 b LREF(regular_seq)
479 addit,= 0,arg1,0 /* trap for zero dvr */
480 copy arg0,retreg
481 extru,= arg1,15,16,temp /* test denominator with 0xffff0000 */
482 extru retreg,15,16,retreg /* retreg = retreg >> 16 */
483 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 16) */
484 ldi 0xcc,temp1 /* setup 0xcc in temp1 */
485 extru,= arg1,23,8,temp /* test denominator with 0xff00 */
486 extru retreg,23,24,retreg /* retreg = retreg >> 8 */
487 or arg1,temp,arg1 /* arg1 = arg1 | (arg1 >> 8) */
488 ldi 0xaa,temp /* setup 0xaa in temp */
489 extru,= arg1,27,4,r0 /* test denominator with 0xf0 */
490 extru retreg,27,28,retreg /* retreg = retreg >> 4 */
491 and,= arg1,temp1,r0 /* test denominator with 0xcc */
492 extru retreg,29,30,retreg /* retreg = retreg >> 2 */
493 and,= arg1,temp,r0 /* test denominator with 0xaa */
494 extru retreg,30,31,retreg /* retreg = retreg >> 1 */
495 MILLIRETN
496 nop
497LSYM(regular_seq)
498 comib,>= 15,arg1,LREF(special_divisor)
499 subi 0,arg1,temp /* clear carry, negate the divisor */
500 ds r0,temp,r0 /* set V-bit to 1 */
501LSYM(normal)
502 add arg0,arg0,retreg /* shift msb bit into carry */
503 ds r0,arg1,temp /* 1st divide step, if no carry */
504 addc retreg,retreg,retreg /* shift retreg with/into carry */
505 ds temp,arg1,temp /* 2nd divide step */
506 addc retreg,retreg,retreg /* shift retreg with/into carry */
507 ds temp,arg1,temp /* 3rd divide step */
508 addc retreg,retreg,retreg /* shift retreg with/into carry */
509 ds temp,arg1,temp /* 4th divide step */
510 addc retreg,retreg,retreg /* shift retreg with/into carry */
511 ds temp,arg1,temp /* 5th divide step */
512 addc retreg,retreg,retreg /* shift retreg with/into carry */
513 ds temp,arg1,temp /* 6th divide step */
514 addc retreg,retreg,retreg /* shift retreg with/into carry */
515 ds temp,arg1,temp /* 7th divide step */
516 addc retreg,retreg,retreg /* shift retreg with/into carry */
517 ds temp,arg1,temp /* 8th divide step */
518 addc retreg,retreg,retreg /* shift retreg with/into carry */
519 ds temp,arg1,temp /* 9th divide step */
520 addc retreg,retreg,retreg /* shift retreg with/into carry */
521 ds temp,arg1,temp /* 10th divide step */
522 addc retreg,retreg,retreg /* shift retreg with/into carry */
523 ds temp,arg1,temp /* 11th divide step */
524 addc retreg,retreg,retreg /* shift retreg with/into carry */
525 ds temp,arg1,temp /* 12th divide step */
526 addc retreg,retreg,retreg /* shift retreg with/into carry */
527 ds temp,arg1,temp /* 13th divide step */
528 addc retreg,retreg,retreg /* shift retreg with/into carry */
529 ds temp,arg1,temp /* 14th divide step */
530 addc retreg,retreg,retreg /* shift retreg with/into carry */
531 ds temp,arg1,temp /* 15th divide step */
532 addc retreg,retreg,retreg /* shift retreg with/into carry */
533 ds temp,arg1,temp /* 16th divide step */
534 addc retreg,retreg,retreg /* shift retreg with/into carry */
535 ds temp,arg1,temp /* 17th divide step */
536 addc retreg,retreg,retreg /* shift retreg with/into carry */
537 ds temp,arg1,temp /* 18th divide step */
538 addc retreg,retreg,retreg /* shift retreg with/into carry */
539 ds temp,arg1,temp /* 19th divide step */
540 addc retreg,retreg,retreg /* shift retreg with/into carry */
541 ds temp,arg1,temp /* 20th divide step */
542 addc retreg,retreg,retreg /* shift retreg with/into carry */
543 ds temp,arg1,temp /* 21st divide step */
544 addc retreg,retreg,retreg /* shift retreg with/into carry */
545 ds temp,arg1,temp /* 22nd divide step */
546 addc retreg,retreg,retreg /* shift retreg with/into carry */
547 ds temp,arg1,temp /* 23rd divide step */
548 addc retreg,retreg,retreg /* shift retreg with/into carry */
549 ds temp,arg1,temp /* 24th divide step */
550 addc retreg,retreg,retreg /* shift retreg with/into carry */
551 ds temp,arg1,temp /* 25th divide step */
552 addc retreg,retreg,retreg /* shift retreg with/into carry */
553 ds temp,arg1,temp /* 26th divide step */
554 addc retreg,retreg,retreg /* shift retreg with/into carry */
555 ds temp,arg1,temp /* 27th divide step */
556 addc retreg,retreg,retreg /* shift retreg with/into carry */
557 ds temp,arg1,temp /* 28th divide step */
558 addc retreg,retreg,retreg /* shift retreg with/into carry */
559 ds temp,arg1,temp /* 29th divide step */
560 addc retreg,retreg,retreg /* shift retreg with/into carry */
561 ds temp,arg1,temp /* 30th divide step */
562 addc retreg,retreg,retreg /* shift retreg with/into carry */
563 ds temp,arg1,temp /* 31st divide step */
564 addc retreg,retreg,retreg /* shift retreg with/into carry */
565 ds temp,arg1,temp /* 32nd divide step, */
566 MILLIRET
567 addc retreg,retreg,retreg /* shift last retreg bit into retreg */
568
569/* Handle the cases where divisor is a small constant or has high bit on. */
570LSYM(special_divisor)
571/* blr arg1,r0 */
572/* comib,>,n 0,arg1,LREF(big_divisor) ; nullify previous instruction */
573
574/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
575 generating such a blr, comib sequence. A problem in nullification. So I
576 rewrote this code. */
577
578#if defined(CONFIG_64BIT)
579/* Clear the upper 32 bits of the arg1 register. We are working with
580 small divisors (and 32-bit unsigned integers) We must not be mislead
581 by "1" bits left in the upper 32 bits. */
582 depd %r0,31,32,%r25
583#endif
584 comib,> 0,arg1,LREF(big_divisor)
585 nop
586 blr arg1,r0
587 nop
588
589LSYM(zero_divisor) /* this label is here to provide external visibility */
590 addit,= 0,arg1,0 /* trap for zero dvr */
591 nop
592 MILLIRET /* divisor == 1 */
593 copy arg0,retreg
594 MILLIRET /* divisor == 2 */
595 extru arg0,30,31,retreg
596 MILLI_BEN($$divU_3) /* divisor == 3 */
597 nop
598 MILLIRET /* divisor == 4 */
599 extru arg0,29,30,retreg
600 MILLI_BEN($$divU_5) /* divisor == 5 */
601 nop
602 MILLI_BEN($$divU_6) /* divisor == 6 */
603 nop
604 MILLI_BEN($$divU_7) /* divisor == 7 */
605 nop
606 MILLIRET /* divisor == 8 */
607 extru arg0,28,29,retreg
608 MILLI_BEN($$divU_9) /* divisor == 9 */
609 nop
610 MILLI_BEN($$divU_10) /* divisor == 10 */
611 nop
612 b LREF(normal) /* divisor == 11 */
613 ds r0,temp,r0 /* set V-bit to 1 */
614 MILLI_BEN($$divU_12) /* divisor == 12 */
615 nop
616 b LREF(normal) /* divisor == 13 */
617 ds r0,temp,r0 /* set V-bit to 1 */
618 MILLI_BEN($$divU_14) /* divisor == 14 */
619 nop
620 MILLI_BEN($$divU_15) /* divisor == 15 */
621 nop
622
623/* Handle the case where the high bit is on in the divisor.
624 Compute: if( dividend>=divisor) quotient=1; else quotient=0;
625 Note: dividend>==divisor iff dividend-divisor does not borrow
626 and not borrow iff carry. */
627LSYM(big_divisor)
628 sub arg0,arg1,r0
629 MILLIRET
630 addc r0,r0,retreg
631 .exit
632 .procend
633 .end
634#endif
635
636#ifdef L_remI
637/* ROUTINE: $$remI
638
639 DESCRIPTION:
640 . $$remI returns the remainder of the division of two signed 32-bit
641 . integers. The sign of the remainder is the same as the sign of
642 . the dividend.
643
644
645 INPUT REGISTERS:
646 . arg0 == dividend
647 . arg1 == divisor
648 . mrp == return pc
649 . sr0 == return space when called externally
650
651 OUTPUT REGISTERS:
652 . arg0 = destroyed
653 . arg1 = destroyed
654 . ret1 = remainder
655
656 OTHER REGISTERS AFFECTED:
657 . r1 = undefined
658
659 SIDE EFFECTS:
660 . Causes a trap under the following conditions: DIVIDE BY ZERO
661 . Changes memory at the following places: NONE
662
663 PERMISSIBLE CONTEXT:
664 . Unwindable
665 . Does not create a stack frame
666 . Is usable for internal or external microcode
667
668 DISCUSSION:
669 . Calls other millicode routines via mrp: NONE
670 . Calls other millicode routines: NONE */
671
672RDEFINE(tmp,r1)
673RDEFINE(retreg,ret1)
674
675 SUBSPA_MILLI
676 ATTR_MILLI
677 .proc
678 .callinfo millicode
679 .entry
680GSYM($$remI)
681GSYM($$remoI)
682 .export $$remI,MILLICODE
683 .export $$remoI,MILLICODE
684 ldo -1(arg1),tmp /* is there at most one bit set ? */
685 and,<> arg1,tmp,r0 /* if not, don't use power of 2 */
686 addi,> 0,arg1,r0 /* if denominator > 0, use power */
687 /* of 2 */
688 b,n LREF(neg_denom)
689LSYM(pow2)
690 comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */
691 and arg0,tmp,retreg /* get the result */
692 MILLIRETN
693LSYM(neg_num)
694 subi 0,arg0,arg0 /* negate numerator */
695 and arg0,tmp,retreg /* get the result */
696 subi 0,retreg,retreg /* negate result */
697 MILLIRETN
698LSYM(neg_denom)
699 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */
700 /* of 2 */
701 b,n LREF(regular_seq)
702 sub r0,arg1,tmp /* make denominator positive */
703 comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */
704 ldo -1(tmp),retreg /* is there at most one bit set ? */
705 and,= tmp,retreg,r0 /* if not, go to regular_seq */
706 b,n LREF(regular_seq)
707 comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */
708 and arg0,retreg,retreg
709 MILLIRETN
710LSYM(neg_num_2)
711 subi 0,arg0,tmp /* test against 0x80000000 */
712 and tmp,retreg,retreg
713 subi 0,retreg,retreg
714 MILLIRETN
715LSYM(regular_seq)
716 addit,= 0,arg1,0 /* trap if div by zero */
717 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
718 sub 0,retreg,retreg /* make it positive */
719 sub 0,arg1, tmp /* clear carry, */
720 /* negate the divisor */
721 ds 0, tmp,0 /* set V-bit to the comple- */
722 /* ment of the divisor sign */
723 or 0,0, tmp /* clear tmp */
724 add retreg,retreg,retreg /* shift msb bit into carry */
725 ds tmp,arg1, tmp /* 1st divide step, if no carry */
726 /* out, msb of quotient = 0 */
727 addc retreg,retreg,retreg /* shift retreg with/into carry */
728LSYM(t1)
729 ds tmp,arg1, tmp /* 2nd divide step */
730 addc retreg,retreg,retreg /* shift retreg with/into carry */
731 ds tmp,arg1, tmp /* 3rd divide step */
732 addc retreg,retreg,retreg /* shift retreg with/into carry */
733 ds tmp,arg1, tmp /* 4th divide step */
734 addc retreg,retreg,retreg /* shift retreg with/into carry */
735 ds tmp,arg1, tmp /* 5th divide step */
736 addc retreg,retreg,retreg /* shift retreg with/into carry */
737 ds tmp,arg1, tmp /* 6th divide step */
738 addc retreg,retreg,retreg /* shift retreg with/into carry */
739 ds tmp,arg1, tmp /* 7th divide step */
740 addc retreg,retreg,retreg /* shift retreg with/into carry */
741 ds tmp,arg1, tmp /* 8th divide step */
742 addc retreg,retreg,retreg /* shift retreg with/into carry */
743 ds tmp,arg1, tmp /* 9th divide step */
744 addc retreg,retreg,retreg /* shift retreg with/into carry */
745 ds tmp,arg1, tmp /* 10th divide step */
746 addc retreg,retreg,retreg /* shift retreg with/into carry */
747 ds tmp,arg1, tmp /* 11th divide step */
748 addc retreg,retreg,retreg /* shift retreg with/into carry */
749 ds tmp,arg1, tmp /* 12th divide step */
750 addc retreg,retreg,retreg /* shift retreg with/into carry */
751 ds tmp,arg1, tmp /* 13th divide step */
752 addc retreg,retreg,retreg /* shift retreg with/into carry */
753 ds tmp,arg1, tmp /* 14th divide step */
754 addc retreg,retreg,retreg /* shift retreg with/into carry */
755 ds tmp,arg1, tmp /* 15th divide step */
756 addc retreg,retreg,retreg /* shift retreg with/into carry */
757 ds tmp,arg1, tmp /* 16th divide step */
758 addc retreg,retreg,retreg /* shift retreg with/into carry */
759 ds tmp,arg1, tmp /* 17th divide step */
760 addc retreg,retreg,retreg /* shift retreg with/into carry */
761 ds tmp,arg1, tmp /* 18th divide step */
762 addc retreg,retreg,retreg /* shift retreg with/into carry */
763 ds tmp,arg1, tmp /* 19th divide step */
764 addc retreg,retreg,retreg /* shift retreg with/into carry */
765 ds tmp,arg1, tmp /* 20th divide step */
766 addc retreg,retreg,retreg /* shift retreg with/into carry */
767 ds tmp,arg1, tmp /* 21st divide step */
768 addc retreg,retreg,retreg /* shift retreg with/into carry */
769 ds tmp,arg1, tmp /* 22nd divide step */
770 addc retreg,retreg,retreg /* shift retreg with/into carry */
771 ds tmp,arg1, tmp /* 23rd divide step */
772 addc retreg,retreg,retreg /* shift retreg with/into carry */
773 ds tmp,arg1, tmp /* 24th divide step */
774 addc retreg,retreg,retreg /* shift retreg with/into carry */
775 ds tmp,arg1, tmp /* 25th divide step */
776 addc retreg,retreg,retreg /* shift retreg with/into carry */
777 ds tmp,arg1, tmp /* 26th divide step */
778 addc retreg,retreg,retreg /* shift retreg with/into carry */
779 ds tmp,arg1, tmp /* 27th divide step */
780 addc retreg,retreg,retreg /* shift retreg with/into carry */
781 ds tmp,arg1, tmp /* 28th divide step */
782 addc retreg,retreg,retreg /* shift retreg with/into carry */
783 ds tmp,arg1, tmp /* 29th divide step */
784 addc retreg,retreg,retreg /* shift retreg with/into carry */
785 ds tmp,arg1, tmp /* 30th divide step */
786 addc retreg,retreg,retreg /* shift retreg with/into carry */
787 ds tmp,arg1, tmp /* 31st divide step */
788 addc retreg,retreg,retreg /* shift retreg with/into carry */
789 ds tmp,arg1, tmp /* 32nd divide step, */
790 addc retreg,retreg,retreg /* shift last bit into retreg */
791 movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */
792 add,< arg1,0,0 /* if arg1 > 0, add arg1 */
793 add,tr tmp,arg1,retreg /* for correcting remainder tmp */
794 sub tmp,arg1,retreg /* else add absolute value arg1 */
795LSYM(finish)
796 add,>= arg0,0,0 /* set sign of remainder */
797 sub 0,retreg,retreg /* to sign of dividend */
798 MILLIRET
799 nop
800 .exit
801 .procend
802#ifdef milliext
803 .origin 0x00000200
804#endif
805 .end
806#endif
807
808#ifdef L_remU
809/* ROUTINE: $$remU
810 . Single precision divide for remainder with unsigned binary integers.
811 .
812 . The remainder must be dividend-(dividend/divisor)*divisor.
813 . Divide by zero is trapped.
814
815 INPUT REGISTERS:
816 . arg0 == dividend
817 . arg1 == divisor
818 . mrp == return pc
819 . sr0 == return space when called externally
820
821 OUTPUT REGISTERS:
822 . arg0 = undefined
823 . arg1 = undefined
824 . ret1 = remainder
825
826 OTHER REGISTERS AFFECTED:
827 . r1 = undefined
828
829 SIDE EFFECTS:
830 . Causes a trap under the following conditions: DIVIDE BY ZERO
831 . Changes memory at the following places: NONE
832
833 PERMISSIBLE CONTEXT:
834 . Unwindable.
835 . Does not create a stack frame.
836 . Suitable for internal or external millicode.
837 . Assumes the special millicode register conventions.
838
839 DISCUSSION:
840 . Calls other millicode routines using mrp: NONE
841 . Calls other millicode routines: NONE */
842
843
844RDEFINE(temp,r1)
845RDEFINE(rmndr,ret1) /* r29 */
846 SUBSPA_MILLI
847 ATTR_MILLI
848 .export $$remU,millicode
849 .proc
850 .callinfo millicode
851 .entry
852GSYM($$remU)
853 ldo -1(arg1),temp /* is there at most one bit set ? */
854 and,= arg1,temp,r0 /* if not, don't use power of 2 */
855 b LREF(regular_seq)
856 addit,= 0,arg1,r0 /* trap on div by zero */
857 and arg0,temp,rmndr /* get the result for power of 2 */
858 MILLIRETN
859LSYM(regular_seq)
860 comib,>=,n 0,arg1,LREF(special_case)
861 subi 0,arg1,rmndr /* clear carry, negate the divisor */
862 ds r0,rmndr,r0 /* set V-bit to 1 */
863 add arg0,arg0,temp /* shift msb bit into carry */
864 ds r0,arg1,rmndr /* 1st divide step, if no carry */
865 addc temp,temp,temp /* shift temp with/into carry */
866 ds rmndr,arg1,rmndr /* 2nd divide step */
867 addc temp,temp,temp /* shift temp with/into carry */
868 ds rmndr,arg1,rmndr /* 3rd divide step */
869 addc temp,temp,temp /* shift temp with/into carry */
870 ds rmndr,arg1,rmndr /* 4th divide step */
871 addc temp,temp,temp /* shift temp with/into carry */
872 ds rmndr,arg1,rmndr /* 5th divide step */
873 addc temp,temp,temp /* shift temp with/into carry */
874 ds rmndr,arg1,rmndr /* 6th divide step */
875 addc temp,temp,temp /* shift temp with/into carry */
876 ds rmndr,arg1,rmndr /* 7th divide step */
877 addc temp,temp,temp /* shift temp with/into carry */
878 ds rmndr,arg1,rmndr /* 8th divide step */
879 addc temp,temp,temp /* shift temp with/into carry */
880 ds rmndr,arg1,rmndr /* 9th divide step */
881 addc temp,temp,temp /* shift temp with/into carry */
882 ds rmndr,arg1,rmndr /* 10th divide step */
883 addc temp,temp,temp /* shift temp with/into carry */
884 ds rmndr,arg1,rmndr /* 11th divide step */
885 addc temp,temp,temp /* shift temp with/into carry */
886 ds rmndr,arg1,rmndr /* 12th divide step */
887 addc temp,temp,temp /* shift temp with/into carry */
888 ds rmndr,arg1,rmndr /* 13th divide step */
889 addc temp,temp,temp /* shift temp with/into carry */
890 ds rmndr,arg1,rmndr /* 14th divide step */
891 addc temp,temp,temp /* shift temp with/into carry */
892 ds rmndr,arg1,rmndr /* 15th divide step */
893 addc temp,temp,temp /* shift temp with/into carry */
894 ds rmndr,arg1,rmndr /* 16th divide step */
895 addc temp,temp,temp /* shift temp with/into carry */
896 ds rmndr,arg1,rmndr /* 17th divide step */
897 addc temp,temp,temp /* shift temp with/into carry */
898 ds rmndr,arg1,rmndr /* 18th divide step */
899 addc temp,temp,temp /* shift temp with/into carry */
900 ds rmndr,arg1,rmndr /* 19th divide step */
901 addc temp,temp,temp /* shift temp with/into carry */
902 ds rmndr,arg1,rmndr /* 20th divide step */
903 addc temp,temp,temp /* shift temp with/into carry */
904 ds rmndr,arg1,rmndr /* 21st divide step */
905 addc temp,temp,temp /* shift temp with/into carry */
906 ds rmndr,arg1,rmndr /* 22nd divide step */
907 addc temp,temp,temp /* shift temp with/into carry */
908 ds rmndr,arg1,rmndr /* 23rd divide step */
909 addc temp,temp,temp /* shift temp with/into carry */
910 ds rmndr,arg1,rmndr /* 24th divide step */
911 addc temp,temp,temp /* shift temp with/into carry */
912 ds rmndr,arg1,rmndr /* 25th divide step */
913 addc temp,temp,temp /* shift temp with/into carry */
914 ds rmndr,arg1,rmndr /* 26th divide step */
915 addc temp,temp,temp /* shift temp with/into carry */
916 ds rmndr,arg1,rmndr /* 27th divide step */
917 addc temp,temp,temp /* shift temp with/into carry */
918 ds rmndr,arg1,rmndr /* 28th divide step */
919 addc temp,temp,temp /* shift temp with/into carry */
920 ds rmndr,arg1,rmndr /* 29th divide step */
921 addc temp,temp,temp /* shift temp with/into carry */
922 ds rmndr,arg1,rmndr /* 30th divide step */
923 addc temp,temp,temp /* shift temp with/into carry */
924 ds rmndr,arg1,rmndr /* 31st divide step */
925 addc temp,temp,temp /* shift temp with/into carry */
926 ds rmndr,arg1,rmndr /* 32nd divide step, */
927 comiclr,<= 0,rmndr,r0
928 add rmndr,arg1,rmndr /* correction */
929 MILLIRETN
930 nop
931
932/* Putting >= on the last DS and deleting COMICLR does not work! */
933LSYM(special_case)
934 sub,>>= arg0,arg1,rmndr
935 copy arg0,rmndr
936 MILLIRETN
937 nop
938 .exit
939 .procend
940 .end
941#endif
942
943#ifdef L_div_const
944/* ROUTINE: $$divI_2
945 . $$divI_3 $$divU_3
946 . $$divI_4
947 . $$divI_5 $$divU_5
948 . $$divI_6 $$divU_6
949 . $$divI_7 $$divU_7
950 . $$divI_8
951 . $$divI_9 $$divU_9
952 . $$divI_10 $$divU_10
953 .
954 . $$divI_12 $$divU_12
955 .
956 . $$divI_14 $$divU_14
957 . $$divI_15 $$divU_15
958 . $$divI_16
959 . $$divI_17 $$divU_17
960 .
961 . Divide by selected constants for single precision binary integers.
962
963 INPUT REGISTERS:
964 . arg0 == dividend
965 . mrp == return pc
966 . sr0 == return space when called externally
967
968 OUTPUT REGISTERS:
969 . arg0 = undefined
970 . arg1 = undefined
971 . ret1 = quotient
972
973 OTHER REGISTERS AFFECTED:
974 . r1 = undefined
975
976 SIDE EFFECTS:
977 . Causes a trap under the following conditions: NONE
978 . Changes memory at the following places: NONE
979
980 PERMISSIBLE CONTEXT:
981 . Unwindable.
982 . Does not create a stack frame.
983 . Suitable for internal or external millicode.
984 . Assumes the special millicode register conventions.
985
986 DISCUSSION:
987 . Calls other millicode routines using mrp: NONE
988 . Calls other millicode routines: NONE */
989
990
991/* TRUNCATED DIVISION BY SMALL INTEGERS
992
993 We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
994 (with y fixed).
995
996 Let a = floor(z/y), for some choice of z. Note that z will be
997 chosen so that division by z is cheap.
998
999 Let r be the remainder(z/y). In other words, r = z - ay.
1000
1001 Now, our method is to choose a value for b such that
1002
1003 q'(x) = floor((ax+b)/z)
1004
1005 is equal to q(x) over as large a range of x as possible. If the
1006 two are equal over a sufficiently large range, and if it is easy to
1007 form the product (ax), and it is easy to divide by z, then we can
1008 perform the division much faster than the general division algorithm.
1009
1010 So, we want the following to be true:
1011
1012 . For x in the following range:
1013 .
1014 . ky <= x < (k+1)y
1015 .
1016 . implies that
1017 .
1018 . k <= (ax+b)/z < (k+1)
1019
1020 We want to determine b such that this is true for all k in the
1021 range {0..K} for some maximum K.
1022
1023 Since (ax+b) is an increasing function of x, we can take each
1024 bound separately to determine the "best" value for b.
1025
1026 (ax+b)/z < (k+1) implies
1027
1028 (a((k+1)y-1)+b < (k+1)z implies
1029
1030 b < a + (k+1)(z-ay) implies
1031
1032 b < a + (k+1)r
1033
1034 This needs to be true for all k in the range {0..K}. In
1035 particular, it is true for k = 0 and this leads to a maximum
1036 acceptable value for b.
1037
1038 b < a+r or b <= a+r-1
1039
1040 Taking the other bound, we have
1041
1042 k <= (ax+b)/z implies
1043
1044 k <= (aky+b)/z implies
1045
1046 k(z-ay) <= b implies
1047
1048 kr <= b
1049
1050 Clearly, the largest range for k will be achieved by maximizing b,
1051 when r is not zero. When r is zero, then the simplest choice for b
1052 is 0. When r is not 0, set
1053
1054 . b = a+r-1
1055
1056 Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
1057 for all x in the range:
1058
1059 . 0 <= x < (K+1)y
1060
1061 We need to determine what K is. Of our two bounds,
1062
1063 . b < a+(k+1)r is satisfied for all k >= 0, by construction.
1064
1065 The other bound is
1066
1067 . kr <= b
1068
1069 This is always true if r = 0. If r is not 0 (the usual case), then
1070 K = floor((a+r-1)/r), is the maximum value for k.
1071
1072 Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
1073 answer for q(x) = floor(x/y) when x is in the range
1074
1075 (0,(K+1)y-1) K = floor((a+r-1)/r)
1076
1077 To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
1078 the formula for q'(x) yields the correct value of q(x) for all x
1079 representable by a single word in HPPA.
1080
1081 We are also constrained in that computing the product (ax), adding
1082 b, and dividing by z must all be done quickly, otherwise we will be
1083 better off going through the general algorithm using the DS
1084 instruction, which uses approximately 70 cycles.
1085
1086 For each y, there is a choice of z which satisfies the constraints
1087 for (K+1)y >= 2**32. We may not, however, be able to satisfy the
1088 timing constraints for arbitrary y. It seems that z being equal to
1089 a power of 2 or a power of 2 minus 1 is as good as we can do, since
1090 it minimizes the time to do division by z. We want the choice of z
1091 to also result in a value for (a) that minimizes the computation of
1092 the product (ax). This is best achieved if (a) has a regular bit
1093 pattern (so the multiplication can be done with shifts and adds).
1094 The value of (a) also needs to be less than 2**32 so the product is
1095 always guaranteed to fit in 2 words.
1096
1097 In actual practice, the following should be done:
1098
1099 1) For negative x, you should take the absolute value and remember
1100 . the fact so that the result can be negated. This obviously does
1101 . not apply in the unsigned case.
1102 2) For even y, you should factor out the power of 2 that divides y
1103 . and divide x by it. You can then proceed by dividing by the
1104 . odd factor of y.
1105
1106 Here is a table of some odd values of y, and corresponding choices
1107 for z which are "good".
1108
1109 y z r a (hex) max x (hex)
1110
1111 3 2**32 1 55555555 100000001
1112 5 2**32 1 33333333 100000003
1113 7 2**24-1 0 249249 (infinite)
1114 9 2**24-1 0 1c71c7 (infinite)
1115 11 2**20-1 0 1745d (infinite)
1116 13 2**24-1 0 13b13b (infinite)
1117 15 2**32 1 11111111 10000000d
1118 17 2**32 1 f0f0f0f 10000000f
1119
1120 If r is 1, then b = a+r-1 = a. This simplifies the computation
1121 of (ax+b), since you can compute (x+1)(a) instead. If r is 0,
1122 then b = 0 is ok to use which simplifies (ax+b).
1123
1124 The bit patterns for 55555555, 33333333, and 11111111 are obviously
1125 very regular. The bit patterns for the other values of a above are:
1126
1127 y (hex) (binary)
1128
1129 7 249249 001001001001001001001001 << regular >>
1130 9 1c71c7 000111000111000111000111 << regular >>
1131 11 1745d 000000010111010001011101 << irregular >>
1132 13 13b13b 000100111011000100111011 << irregular >>
1133
1134 The bit patterns for (a) corresponding to (y) of 11 and 13 may be
1135 too irregular to warrant using this method.
1136
1137 When z is a power of 2 minus 1, then the division by z is slightly
1138 more complicated, involving an iterative solution.
1139
1140 The code presented here solves division by 1 through 17, except for
1141 11 and 13. There are algorithms for both signed and unsigned
1142 quantities given.
1143
1144 TIMINGS (cycles)
1145
1146 divisor positive negative unsigned
1147
1148 . 1 2 2 2
1149 . 2 4 4 2
1150 . 3 19 21 19
1151 . 4 4 4 2
1152 . 5 18 22 19
1153 . 6 19 22 19
1154 . 8 4 4 2
1155 . 10 18 19 17
1156 . 12 18 20 18
1157 . 15 16 18 16
1158 . 16 4 4 2
1159 . 17 16 18 16
1160
1161 Now, the algorithm for 7, 9, and 14 is an iterative one. That is,
1162 a loop body is executed until the tentative quotient is 0. The
1163 number of times the loop body is executed varies depending on the
1164 dividend, but is never more than two times. If the dividend is
1165 less than the divisor, then the loop body is not executed at all.
1166 Each iteration adds 4 cycles to the timings.
1167
1168 divisor positive negative unsigned
1169
1170 . 7 19+4n 20+4n 20+4n n = number of iterations
1171 . 9 21+4n 22+4n 21+4n
1172 . 14 21+4n 22+4n 20+4n
1173
1174 To give an idea of how the number of iterations varies, here is a
1175 table of dividend versus number of iterations when dividing by 7.
1176
1177 smallest largest required
1178 dividend dividend iterations
1179
1180 . 0 6 0
1181 . 7 0x6ffffff 1
1182 0x1000006 0xffffffff 2
1183
1184 There is some overlap in the range of numbers requiring 1 and 2
1185 iterations. */
1186
1187RDEFINE(t2,r1)
1188RDEFINE(x2,arg0) /* r26 */
1189RDEFINE(t1,arg1) /* r25 */
1190RDEFINE(x1,ret1) /* r29 */
1191
1192 SUBSPA_MILLI_DIV
1193 ATTR_MILLI
1194
1195 .proc
1196 .callinfo millicode
1197 .entry
1198/* NONE of these routines require a stack frame
1199 ALL of these routines are unwindable from millicode */
1200
1201GSYM($$divide_by_constant)
1202 .export $$divide_by_constant,millicode
1203/* Provides a "nice" label for the code covered by the unwind descriptor
1204 for things like gprof. */
1205
1206/* DIVISION BY 2 (shift by 1) */
1207GSYM($$divI_2)
1208 .export $$divI_2,millicode
1209 comclr,>= arg0,0,0
1210 addi 1,arg0,arg0
1211 MILLIRET
1212 extrs arg0,30,31,ret1
1213
1214
1215/* DIVISION BY 4 (shift by 2) */
1216GSYM($$divI_4)
1217 .export $$divI_4,millicode
1218 comclr,>= arg0,0,0
1219 addi 3,arg0,arg0
1220 MILLIRET
1221 extrs arg0,29,30,ret1
1222
1223
1224/* DIVISION BY 8 (shift by 3) */
1225GSYM($$divI_8)
1226 .export $$divI_8,millicode
1227 comclr,>= arg0,0,0
1228 addi 7,arg0,arg0
1229 MILLIRET
1230 extrs arg0,28,29,ret1
1231
1232/* DIVISION BY 16 (shift by 4) */
1233GSYM($$divI_16)
1234 .export $$divI_16,millicode
1235 comclr,>= arg0,0,0
1236 addi 15,arg0,arg0
1237 MILLIRET
1238 extrs arg0,27,28,ret1
1239
1240/****************************************************************************
1241*
1242* DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
1243*
1244* includes 3,5,15,17 and also 6,10,12
1245*
1246****************************************************************************/
1247
1248/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
1249
1250GSYM($$divI_3)
1251 .export $$divI_3,millicode
1252 comb,<,N x2,0,LREF(neg3)
1253
1254 addi 1,x2,x2 /* this cannot overflow */
1255 extru x2,1,2,x1 /* multiply by 5 to get started */
1256 sh2add x2,x2,x2
1257 b LREF(pos)
1258 addc x1,0,x1
1259
1260LSYM(neg3)
1261 subi 1,x2,x2 /* this cannot overflow */
1262 extru x2,1,2,x1 /* multiply by 5 to get started */
1263 sh2add x2,x2,x2
1264 b LREF(neg)
1265 addc x1,0,x1
1266
1267GSYM($$divU_3)
1268 .export $$divU_3,millicode
1269 addi 1,x2,x2 /* this CAN overflow */
1270 addc 0,0,x1
1271 shd x1,x2,30,t1 /* multiply by 5 to get started */
1272 sh2add x2,x2,x2
1273 b LREF(pos)
1274 addc x1,t1,x1
1275
1276/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
1277
1278GSYM($$divI_5)
1279 .export $$divI_5,millicode
1280 comb,<,N x2,0,LREF(neg5)
1281
1282 addi 3,x2,t1 /* this cannot overflow */
1283 sh1add x2,t1,x2 /* multiply by 3 to get started */
1284 b LREF(pos)
1285 addc 0,0,x1
1286
1287LSYM(neg5)
1288 sub 0,x2,x2 /* negate x2 */
1289 addi 1,x2,x2 /* this cannot overflow */
1290 shd 0,x2,31,x1 /* get top bit (can be 1) */
1291 sh1add x2,x2,x2 /* multiply by 3 to get started */
1292 b LREF(neg)
1293 addc x1,0,x1
1294
1295GSYM($$divU_5)
1296 .export $$divU_5,millicode
1297 addi 1,x2,x2 /* this CAN overflow */
1298 addc 0,0,x1
1299 shd x1,x2,31,t1 /* multiply by 3 to get started */
1300 sh1add x2,x2,x2
1301 b LREF(pos)
1302 addc t1,x1,x1
1303
1304/* DIVISION BY 6 (shift to divide by 2 then divide by 3) */
1305GSYM($$divI_6)
1306 .export $$divI_6,millicode
1307 comb,<,N x2,0,LREF(neg6)
1308 extru x2,30,31,x2 /* divide by 2 */
1309 addi 5,x2,t1 /* compute 5*(x2+1) = 5*x2+5 */
1310 sh2add x2,t1,x2 /* multiply by 5 to get started */
1311 b LREF(pos)
1312 addc 0,0,x1
1313
1314LSYM(neg6)
1315 subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1316 /* negation and adding 1 are done */
1317 /* at the same time by the SUBI */
1318 extru x2,30,31,x2
1319 shd 0,x2,30,x1
1320 sh2add x2,x2,x2 /* multiply by 5 to get started */
1321 b LREF(neg)
1322 addc x1,0,x1
1323
1324GSYM($$divU_6)
1325 .export $$divU_6,millicode
1326 extru x2,30,31,x2 /* divide by 2 */
1327 addi 1,x2,x2 /* cannot carry */
1328 shd 0,x2,30,x1 /* multiply by 5 to get started */
1329 sh2add x2,x2,x2
1330 b LREF(pos)
1331 addc x1,0,x1
1332
1333/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
1334GSYM($$divU_10)
1335 .export $$divU_10,millicode
1336 extru x2,30,31,x2 /* divide by 2 */
1337 addi 3,x2,t1 /* compute 3*(x2+1) = (3*x2)+3 */
1338 sh1add x2,t1,x2 /* multiply by 3 to get started */
1339 addc 0,0,x1
1340LSYM(pos)
1341 shd x1,x2,28,t1 /* multiply by 0x11 */
1342 shd x2,0,28,t2
1343 add x2,t2,x2
1344 addc x1,t1,x1
1345LSYM(pos_for_17)
1346 shd x1,x2,24,t1 /* multiply by 0x101 */
1347 shd x2,0,24,t2
1348 add x2,t2,x2
1349 addc x1,t1,x1
1350
1351 shd x1,x2,16,t1 /* multiply by 0x10001 */
1352 shd x2,0,16,t2
1353 add x2,t2,x2
1354 MILLIRET
1355 addc x1,t1,x1
1356
1357GSYM($$divI_10)
1358 .export $$divI_10,millicode
1359 comb,< x2,0,LREF(neg10)
1360 copy 0,x1
1361 extru x2,30,31,x2 /* divide by 2 */
1362 addib,TR 1,x2,LREF(pos) /* add 1 (cannot overflow) */
1363 sh1add x2,x2,x2 /* multiply by 3 to get started */
1364
1365LSYM(neg10)
1366 subi 2,x2,x2 /* negate, divide by 2, and add 1 */
1367 /* negation and adding 1 are done */
1368 /* at the same time by the SUBI */
1369 extru x2,30,31,x2
1370 sh1add x2,x2,x2 /* multiply by 3 to get started */
1371LSYM(neg)
1372 shd x1,x2,28,t1 /* multiply by 0x11 */
1373 shd x2,0,28,t2
1374 add x2,t2,x2
1375 addc x1,t1,x1
1376LSYM(neg_for_17)
1377 shd x1,x2,24,t1 /* multiply by 0x101 */
1378 shd x2,0,24,t2
1379 add x2,t2,x2
1380 addc x1,t1,x1
1381
1382 shd x1,x2,16,t1 /* multiply by 0x10001 */
1383 shd x2,0,16,t2
1384 add x2,t2,x2
1385 addc x1,t1,x1
1386 MILLIRET
1387 sub 0,x1,x1
1388
1389/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
1390GSYM($$divI_12)
1391 .export $$divI_12,millicode
1392 comb,< x2,0,LREF(neg12)
1393 copy 0,x1
1394 extru x2,29,30,x2 /* divide by 4 */
1395 addib,tr 1,x2,LREF(pos) /* compute 5*(x2+1) = 5*x2+5 */
1396 sh2add x2,x2,x2 /* multiply by 5 to get started */
1397
1398LSYM(neg12)
1399 subi 4,x2,x2 /* negate, divide by 4, and add 1 */
1400 /* negation and adding 1 are done */
1401 /* at the same time by the SUBI */
1402 extru x2,29,30,x2
1403 b LREF(neg)
1404 sh2add x2,x2,x2 /* multiply by 5 to get started */
1405
1406GSYM($$divU_12)
1407 .export $$divU_12,millicode
1408 extru x2,29,30,x2 /* divide by 4 */
1409 addi 5,x2,t1 /* cannot carry */
1410 sh2add x2,t1,x2 /* multiply by 5 to get started */
1411 b LREF(pos)
1412 addc 0,0,x1
1413
1414/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
1415GSYM($$divI_15)
1416 .export $$divI_15,millicode
1417 comb,< x2,0,LREF(neg15)
1418 copy 0,x1
1419 addib,tr 1,x2,LREF(pos)+4
1420 shd x1,x2,28,t1
1421
1422LSYM(neg15)
1423 b LREF(neg)
1424 subi 1,x2,x2
1425
1426GSYM($$divU_15)
1427 .export $$divU_15,millicode
1428 addi 1,x2,x2 /* this CAN overflow */
1429 b LREF(pos)
1430 addc 0,0,x1
1431
1432/* DIVISION BY 17 (use z = 2**32; a = f0f0f0f) */
1433GSYM($$divI_17)
1434 .export $$divI_17,millicode
1435 comb,<,n x2,0,LREF(neg17)
1436 addi 1,x2,x2 /* this cannot overflow */
1437 shd 0,x2,28,t1 /* multiply by 0xf to get started */
1438 shd x2,0,28,t2
1439 sub t2,x2,x2
1440 b LREF(pos_for_17)
1441 subb t1,0,x1
1442
1443LSYM(neg17)
1444 subi 1,x2,x2 /* this cannot overflow */
1445 shd 0,x2,28,t1 /* multiply by 0xf to get started */
1446 shd x2,0,28,t2
1447 sub t2,x2,x2
1448 b LREF(neg_for_17)
1449 subb t1,0,x1
1450
1451GSYM($$divU_17)
1452 .export $$divU_17,millicode
1453 addi 1,x2,x2 /* this CAN overflow */
1454 addc 0,0,x1
1455 shd x1,x2,28,t1 /* multiply by 0xf to get started */
1456LSYM(u17)
1457 shd x2,0,28,t2
1458 sub t2,x2,x2
1459 b LREF(pos_for_17)
1460 subb t1,x1,x1
1461
1462
1463/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
1464 includes 7,9 and also 14
1465
1466
1467 z = 2**24-1
1468 r = z mod x = 0
1469
1470 so choose b = 0
1471
1472 Also, in order to divide by z = 2**24-1, we approximate by dividing
1473 by (z+1) = 2**24 (which is easy), and then correcting.
1474
1475 (ax) = (z+1)q' + r
1476 . = zq' + (q'+r)
1477
1478 So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
1479 Then the true remainder of (ax)/z is (q'+r). Repeat the process
1480 with this new remainder, adding the tentative quotients together,
1481 until a tentative quotient is 0 (and then we are done). There is
1482 one last correction to be done. It is possible that (q'+r) = z.
1483 If so, then (q'+r)/(z+1) = 0 and it looks like we are done. But,
1484 in fact, we need to add 1 more to the quotient. Now, it turns
1485 out that this happens if and only if the original value x is
1486 an exact multiple of y. So, to avoid a three instruction test at
1487 the end, instead use 1 instruction to add 1 to x at the beginning. */
1488
1489/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
1490GSYM($$divI_7)
1491 .export $$divI_7,millicode
1492 comb,<,n x2,0,LREF(neg7)
1493LSYM(7)
1494 addi 1,x2,x2 /* cannot overflow */
1495 shd 0,x2,29,x1
1496 sh3add x2,x2,x2
1497 addc x1,0,x1
1498LSYM(pos7)
1499 shd x1,x2,26,t1
1500 shd x2,0,26,t2
1501 add x2,t2,x2
1502 addc x1,t1,x1
1503
1504 shd x1,x2,20,t1
1505 shd x2,0,20,t2
1506 add x2,t2,x2
1507 addc x1,t1,t1
1508
1509 /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1510
1511 copy 0,x1
1512 shd,= t1,x2,24,t1 /* tentative quotient */
1513LSYM(1)
1514 addb,tr t1,x1,LREF(2) /* add to previous quotient */
1515 extru x2,31,24,x2 /* new remainder (unadjusted) */
1516
1517 MILLIRETN
1518
1519LSYM(2)
1520 addb,tr t1,x2,LREF(1) /* adjust remainder */
1521 extru,= x2,7,8,t1 /* new quotient */
1522
1523LSYM(neg7)
1524 subi 1,x2,x2 /* negate x2 and add 1 */
1525LSYM(8)
1526 shd 0,x2,29,x1
1527 sh3add x2,x2,x2
1528 addc x1,0,x1
1529
1530LSYM(neg7_shift)
1531 shd x1,x2,26,t1
1532 shd x2,0,26,t2
1533 add x2,t2,x2
1534 addc x1,t1,x1
1535
1536 shd x1,x2,20,t1
1537 shd x2,0,20,t2
1538 add x2,t2,x2
1539 addc x1,t1,t1
1540
1541 /* computed <t1,x2>. Now divide it by (2**24 - 1) */
1542
1543 copy 0,x1
1544 shd,= t1,x2,24,t1 /* tentative quotient */
1545LSYM(3)
1546 addb,tr t1,x1,LREF(4) /* add to previous quotient */
1547 extru x2,31,24,x2 /* new remainder (unadjusted) */
1548
1549 MILLIRET
1550 sub 0,x1,x1 /* negate result */
1551
1552LSYM(4)
1553 addb,tr t1,x2,LREF(3) /* adjust remainder */
1554 extru,= x2,7,8,t1 /* new quotient */
1555
1556GSYM($$divU_7)
1557 .export $$divU_7,millicode
1558 addi 1,x2,x2 /* can carry */
1559 addc 0,0,x1
1560 shd x1,x2,29,t1
1561 sh3add x2,x2,x2
1562 b LREF(pos7)
1563 addc t1,x1,x1
1564
1565/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
1566GSYM($$divI_9)
1567 .export $$divI_9,millicode
1568 comb,<,n x2,0,LREF(neg9)
1569 addi 1,x2,x2 /* cannot overflow */
1570 shd 0,x2,29,t1
1571 shd x2,0,29,t2
1572 sub t2,x2,x2
1573 b LREF(pos7)
1574 subb t1,0,x1
1575
1576LSYM(neg9)
1577 subi 1,x2,x2 /* negate and add 1 */
1578 shd 0,x2,29,t1
1579 shd x2,0,29,t2
1580 sub t2,x2,x2
1581 b LREF(neg7_shift)
1582 subb t1,0,x1
1583
1584GSYM($$divU_9)
1585 .export $$divU_9,millicode
1586 addi 1,x2,x2 /* can carry */
1587 addc 0,0,x1
1588 shd x1,x2,29,t1
1589 shd x2,0,29,t2
1590 sub t2,x2,x2
1591 b LREF(pos7)
1592 subb t1,x1,x1
1593
1594/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
1595GSYM($$divI_14)
1596 .export $$divI_14,millicode
1597 comb,<,n x2,0,LREF(neg14)
1598GSYM($$divU_14)
1599 .export $$divU_14,millicode
1600 b LREF(7) /* go to 7 case */
1601 extru x2,30,31,x2 /* divide by 2 */
1602
1603LSYM(neg14)
1604 subi 2,x2,x2 /* negate (and add 2) */
1605 b LREF(8)
1606 extru x2,30,31,x2 /* divide by 2 */
1607 .exit
1608 .procend
1609 .end
1610#endif
1611
1612#ifdef L_mulI
1613/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
1614/******************************************************************************
1615This routine is used on PA2.0 processors when gcc -mno-fpregs is used
1616
1617ROUTINE: $$mulI
1618
1619
1620DESCRIPTION:
1621
1622 $$mulI multiplies two single word integers, giving a single
1623 word result.
1624
1625
1626INPUT REGISTERS:
1627
1628 arg0 = Operand 1
1629 arg1 = Operand 2
1630 r31 == return pc
1631 sr0 == return space when called externally
1632
1633
1634OUTPUT REGISTERS:
1635
1636 arg0 = undefined
1637 arg1 = undefined
1638 ret1 = result
1639
1640OTHER REGISTERS AFFECTED:
1641
1642 r1 = undefined
1643
1644SIDE EFFECTS:
1645
1646 Causes a trap under the following conditions: NONE
1647 Changes memory at the following places: NONE
1648
1649PERMISSIBLE CONTEXT:
1650
1651 Unwindable
1652 Does not create a stack frame
1653 Is usable for internal or external microcode
1654
1655DISCUSSION:
1656
1657 Calls other millicode routines via mrp: NONE
1658 Calls other millicode routines: NONE
1659
1660***************************************************************************/
1661
1662
1663#define a0 %arg0
1664#define a1 %arg1
1665#define t0 %r1
1666#define r %ret1
1667
1668#define a0__128a0 zdep a0,24,25,a0
1669#define a0__256a0 zdep a0,23,24,a0
1670#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0)
1671#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1)
1672#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2)
1673#define b_n_ret_t0 b,n LREF(ret_t0)
1674#define b_e_shift b LREF(e_shift)
1675#define b_e_t0ma0 b LREF(e_t0ma0)
1676#define b_e_t0 b LREF(e_t0)
1677#define b_e_t0a0 b LREF(e_t0a0)
1678#define b_e_t02a0 b LREF(e_t02a0)
1679#define b_e_t04a0 b LREF(e_t04a0)
1680#define b_e_2t0 b LREF(e_2t0)
1681#define b_e_2t0a0 b LREF(e_2t0a0)
1682#define b_e_2t04a0 b LREF(e2t04a0)
1683#define b_e_3t0 b LREF(e_3t0)
1684#define b_e_4t0 b LREF(e_4t0)
1685#define b_e_4t0a0 b LREF(e_4t0a0)
1686#define b_e_4t08a0 b LREF(e4t08a0)
1687#define b_e_5t0 b LREF(e_5t0)
1688#define b_e_8t0 b LREF(e_8t0)
1689#define b_e_8t0a0 b LREF(e_8t0a0)
1690#define r__r_a0 add r,a0,r
1691#define r__r_2a0 sh1add a0,r,r
1692#define r__r_4a0 sh2add a0,r,r
1693#define r__r_8a0 sh3add a0,r,r
1694#define r__r_t0 add r,t0,r
1695#define r__r_2t0 sh1add t0,r,r
1696#define r__r_4t0 sh2add t0,r,r
1697#define r__r_8t0 sh3add t0,r,r
1698#define t0__3a0 sh1add a0,a0,t0
1699#define t0__4a0 sh2add a0,0,t0
1700#define t0__5a0 sh2add a0,a0,t0
1701#define t0__8a0 sh3add a0,0,t0
1702#define t0__9a0 sh3add a0,a0,t0
1703#define t0__16a0 zdep a0,27,28,t0
1704#define t0__32a0 zdep a0,26,27,t0
1705#define t0__64a0 zdep a0,25,26,t0
1706#define t0__128a0 zdep a0,24,25,t0
1707#define t0__t0ma0 sub t0,a0,t0
1708#define t0__t0_a0 add t0,a0,t0
1709#define t0__t0_2a0 sh1add a0,t0,t0
1710#define t0__t0_4a0 sh2add a0,t0,t0
1711#define t0__t0_8a0 sh3add a0,t0,t0
1712#define t0__2t0_a0 sh1add t0,a0,t0
1713#define t0__3t0 sh1add t0,t0,t0
1714#define t0__4t0 sh2add t0,0,t0
1715#define t0__4t0_a0 sh2add t0,a0,t0
1716#define t0__5t0 sh2add t0,t0,t0
1717#define t0__8t0 sh3add t0,0,t0
1718#define t0__8t0_a0 sh3add t0,a0,t0
1719#define t0__9t0 sh3add t0,t0,t0
1720#define t0__16t0 zdep t0,27,28,t0
1721#define t0__32t0 zdep t0,26,27,t0
1722#define t0__256a0 zdep a0,23,24,t0
1723
1724
1725 SUBSPA_MILLI
1726 ATTR_MILLI
1727 .align 16
1728 .proc
1729 .callinfo millicode
1730 .export $$mulI,millicode
1731GSYM($$mulI)
1732 combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */
1733 copy 0,r /* zero out the result */
1734 xor a0,a1,a0 /* swap a0 & a1 using the */
1735 xor a0,a1,a1 /* old xor trick */
1736 xor a0,a1,a0
1737LSYM(l4)
1738 combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */
1739 zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1740 sub,> 0,a1,t0 /* otherwise negate both and */
1741 combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */
1742 sub 0,a0,a1
1743 movb,tr,n t0,a0,LREF(l2) /* 10th inst. */
1744
1745LSYM(l0) r__r_t0 /* add in this partial product */
1746LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */
1747LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
1748LSYM(l3) blr t0,0 /* case on these 8 bits ****** */
1749 extru a1,23,24,a1 /* a1 >>= 8 ****************** */
1750
1751/*16 insts before this. */
1752/* a0 <<= 8 ************************** */
1753LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop
1754LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop
1755LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop
1756LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0
1757LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop
1758LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0
1759LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1760LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0
1761LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop
1762LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0
1763LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1764LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1765LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1766LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
1767LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1768LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0
1769LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1770LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0
1771LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
1772LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0
1773LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1774LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1775LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1776LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1777LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1778LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1779LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1780LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0
1781LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1782LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1783LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1784LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1785LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1786LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1787LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1788LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0
1789LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
1790LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
1791LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
1792LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
1793LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1794LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1795LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1796LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1797LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1798LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
1799LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0
1800LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0
1801LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0
1802LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0
1803LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1804LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0
1805LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1806LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1807LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
1808LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0
1809LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1810LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0
1811LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1812LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0
1813LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1814LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1815LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1816LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1817LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1818LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
1819LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1820LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1821LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1822LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1823LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0
1824LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0
1825LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
1826LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0
1827LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
1828LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
1829LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
1830LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
1831LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0
1832LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1833LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0
1834LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0
1835LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1836LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1837LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1838LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1839LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1840LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0
1841LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1842LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1843LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
1844LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0
1845LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1846LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0
1847LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0
1848LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
1849LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1850LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1851LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0
1852LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1853LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
1854LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1855LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
1856LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0
1857LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1858LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1859LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0
1860LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0
1861LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
1862LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
1863LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0
1864LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
1865LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0
1866LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0
1867LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0
1868LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0
1869LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0
1870LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
1871LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0
1872LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0
1873LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
1874LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
1875LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
1876LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
1877LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
1878LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
1879LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
1880LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
1881LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
1882LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0
1883LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
1884LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1885LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1886LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1887LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1888LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0
1889LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1890LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1891LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1892LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0
1893LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0
1894LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0
1895LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0
1896LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0
1897LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0
1898LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0
1899LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
1900LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
1901LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
1902LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
1903LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
1904LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1905LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
1906LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
1907LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
1908LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0
1909LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
1910LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0
1911LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0
1912LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
1913LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0
1914LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1915LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0
1916LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0
1917LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0
1918LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1919LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0
1920LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
1921LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
1922LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0
1923LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0
1924LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0
1925LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0
1926LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0
1927LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0
1928LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0
1929LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0
1930LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0
1931LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0
1932LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0
1933LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
1934LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
1935LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0
1936LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0
1937LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0
1938LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
1939LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0
1940LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0
1941LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0
1942LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
1943LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0
1944LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0
1945LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
1946LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
1947LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
1948LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
1949LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0
1950LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0
1951LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
1952LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
1953LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0
1954LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0
1955LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0
1956LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0
1957LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
1958LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0
1959LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0
1960LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
1961LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0
1962LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0
1963LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0
1964LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0
1965LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0
1966LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0
1967LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0
1968LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0
1969LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
1970LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
1971LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
1972LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
1973LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0
1974LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0
1975LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0
1976LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
1977LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0
1978LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
1979LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0
1980LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0
1981LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
1982LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0
1983LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0
1984LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
1985LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0
1986LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0
1987LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0
1988LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0
1989LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0
1990LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0
1991LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0
1992LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0
1993LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0
1994LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0
1995LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0
1996LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0
1997LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0
1998LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0
1999LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0
2000LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0
2001LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0
2002LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0
2003LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0
2004LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0
2005LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
2006LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0
2007LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
2008LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
2009/*1040 insts before this. */
2010LSYM(ret_t0) MILLIRET
2011LSYM(e_t0) r__r_t0
2012LSYM(e_shift) a1_ne_0_b_l2
2013 a0__256a0 /* a0 <<= 8 *********** */
2014 MILLIRETN
2015LSYM(e_t0ma0) a1_ne_0_b_l0
2016 t0__t0ma0
2017 MILLIRET
2018 r__r_t0
2019LSYM(e_t0a0) a1_ne_0_b_l0
2020 t0__t0_a0
2021 MILLIRET
2022 r__r_t0
2023LSYM(e_t02a0) a1_ne_0_b_l0
2024 t0__t0_2a0
2025 MILLIRET
2026 r__r_t0
2027LSYM(e_t04a0) a1_ne_0_b_l0
2028 t0__t0_4a0
2029 MILLIRET
2030 r__r_t0
2031LSYM(e_2t0) a1_ne_0_b_l1
2032 r__r_2t0
2033 MILLIRETN
2034LSYM(e_2t0a0) a1_ne_0_b_l0
2035 t0__2t0_a0
2036 MILLIRET
2037 r__r_t0
2038LSYM(e2t04a0) t0__t0_2a0
2039 a1_ne_0_b_l1
2040 r__r_2t0
2041 MILLIRETN
2042LSYM(e_3t0) a1_ne_0_b_l0
2043 t0__3t0
2044 MILLIRET
2045 r__r_t0
2046LSYM(e_4t0) a1_ne_0_b_l1
2047 r__r_4t0
2048 MILLIRETN
2049LSYM(e_4t0a0) a1_ne_0_b_l0
2050 t0__4t0_a0
2051 MILLIRET
2052 r__r_t0
2053LSYM(e4t08a0) t0__t0_2a0
2054 a1_ne_0_b_l1
2055 r__r_4t0
2056 MILLIRETN
2057LSYM(e_5t0) a1_ne_0_b_l0
2058 t0__5t0
2059 MILLIRET
2060 r__r_t0
2061LSYM(e_8t0) a1_ne_0_b_l1
2062 r__r_8t0
2063 MILLIRETN
2064LSYM(e_8t0a0) a1_ne_0_b_l0
2065 t0__8t0_a0
2066 MILLIRET
2067 r__r_t0
2068
2069 .procend
2070 .end
2071#endif
diff --git a/arch/parisc/lib/milli/milli.h b/arch/parisc/lib/milli/milli.h
new file mode 100644
index 000000000000..19ac79f336de
--- /dev/null
+++ b/arch/parisc/lib/milli/milli.h
@@ -0,0 +1,165 @@
1/* 32 and 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame@debian.org>
3 and Alan Modra <alan@linuxcare.com.au>.
4
5 Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
6
7 This file is part of GCC and is released under the terms of
8 of the GNU General Public License as published by the Free Software
9 Foundation; either version 2, or (at your option) any later version.
10 See the file COPYING in the top-level GCC source directory for a copy
11 of the license. */
12
13#ifndef _PA_MILLI_H_
14#define _PA_MILLI_H_
15
16#define L_dyncall
17#define L_divI
18#define L_divU
19#define L_remI
20#define L_remU
21#define L_div_const
22#define L_mulI
23
24#ifdef CONFIG_64BIT
25 .level 2.0w
26#endif
27
28/* Hardware General Registers. */
29r0: .reg %r0
30r1: .reg %r1
31r2: .reg %r2
32r3: .reg %r3
33r4: .reg %r4
34r5: .reg %r5
35r6: .reg %r6
36r7: .reg %r7
37r8: .reg %r8
38r9: .reg %r9
39r10: .reg %r10
40r11: .reg %r11
41r12: .reg %r12
42r13: .reg %r13
43r14: .reg %r14
44r15: .reg %r15
45r16: .reg %r16
46r17: .reg %r17
47r18: .reg %r18
48r19: .reg %r19
49r20: .reg %r20
50r21: .reg %r21
51r22: .reg %r22
52r23: .reg %r23
53r24: .reg %r24
54r25: .reg %r25
55r26: .reg %r26
56r27: .reg %r27
57r28: .reg %r28
58r29: .reg %r29
59r30: .reg %r30
60r31: .reg %r31
61
62/* Hardware Space Registers. */
63sr0: .reg %sr0
64sr1: .reg %sr1
65sr2: .reg %sr2
66sr3: .reg %sr3
67sr4: .reg %sr4
68sr5: .reg %sr5
69sr6: .reg %sr6
70sr7: .reg %sr7
71
72/* Hardware Floating Point Registers. */
73fr0: .reg %fr0
74fr1: .reg %fr1
75fr2: .reg %fr2
76fr3: .reg %fr3
77fr4: .reg %fr4
78fr5: .reg %fr5
79fr6: .reg %fr6
80fr7: .reg %fr7
81fr8: .reg %fr8
82fr9: .reg %fr9
83fr10: .reg %fr10
84fr11: .reg %fr11
85fr12: .reg %fr12
86fr13: .reg %fr13
87fr14: .reg %fr14
88fr15: .reg %fr15
89
90/* Hardware Control Registers. */
91cr11: .reg %cr11
92sar: .reg %cr11 /* Shift Amount Register */
93
94/* Software Architecture General Registers. */
95rp: .reg r2 /* return pointer */
96#ifdef CONFIG_64BIT
97mrp: .reg r2 /* millicode return pointer */
98#else
99mrp: .reg r31 /* millicode return pointer */
100#endif
101ret0: .reg r28 /* return value */
102ret1: .reg r29 /* return value (high part of double) */
103sp: .reg r30 /* stack pointer */
104dp: .reg r27 /* data pointer */
105arg0: .reg r26 /* argument */
106arg1: .reg r25 /* argument or high part of double argument */
107arg2: .reg r24 /* argument */
108arg3: .reg r23 /* argument or high part of double argument */
109
110/* Software Architecture Space Registers. */
111/* sr0 ; return link from BLE */
112sret: .reg sr1 /* return value */
113sarg: .reg sr1 /* argument */
114/* sr4 ; PC SPACE tracker */
115/* sr5 ; process private data */
116
117/* Frame Offsets (millicode convention!) Used when calling other
118 millicode routines. Stack unwinding is dependent upon these
119 definitions. */
120r31_slot: .equ -20 /* "current RP" slot */
121sr0_slot: .equ -16 /* "static link" slot */
122#if defined(CONFIG_64BIT)
123mrp_slot: .equ -16 /* "current RP" slot */
124psp_slot: .equ -8 /* "previous SP" slot */
125#else
126mrp_slot: .equ -20 /* "current RP" slot (replacing "r31_slot") */
127#endif
128
129
130#define DEFINE(name,value)name: .EQU value
131#define RDEFINE(name,value)name: .REG value
132#ifdef milliext
133#define MILLI_BE(lbl) BE lbl(sr7,r0)
134#define MILLI_BEN(lbl) BE,n lbl(sr7,r0)
135#define MILLI_BLE(lbl) BLE lbl(sr7,r0)
136#define MILLI_BLEN(lbl) BLE,n lbl(sr7,r0)
137#define MILLIRETN BE,n 0(sr0,mrp)
138#define MILLIRET BE 0(sr0,mrp)
139#define MILLI_RETN BE,n 0(sr0,mrp)
140#define MILLI_RET BE 0(sr0,mrp)
141#else
142#define MILLI_BE(lbl) B lbl
143#define MILLI_BEN(lbl) B,n lbl
144#define MILLI_BLE(lbl) BL lbl,mrp
145#define MILLI_BLEN(lbl) BL,n lbl,mrp
146#define MILLIRETN BV,n 0(mrp)
147#define MILLIRET BV 0(mrp)
148#define MILLI_RETN BV,n 0(mrp)
149#define MILLI_RET BV 0(mrp)
150#endif
151
152#define CAT(a,b) a##b
153
154#define SUBSPA_MILLI .section .text
155#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
156#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
157#define ATTR_MILLI
158#define SUBSPA_DATA .section .data
159#define ATTR_DATA
160#define GLOBAL $global$
161#define GSYM(sym) !sym:
162#define LSYM(sym) !CAT(.L,sym:)
163#define LREF(sym) CAT(.L,sym)
164
165#endif /*_PA_MILLI_H_*/
diff --git a/arch/parisc/lib/milli/mulI.S b/arch/parisc/lib/milli/mulI.S
new file mode 100644
index 000000000000..4c7e0c36d15e
--- /dev/null
+++ b/arch/parisc/lib/milli/mulI.S
@@ -0,0 +1,474 @@
1/* 32 and 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame@debian.org>
3 and Alan Modra <alan@linuxcare.com.au>.
4
5 Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
6
7 This file is part of GCC and is released under the terms of
8 of the GNU General Public License as published by the Free Software
9 Foundation; either version 2, or (at your option) any later version.
10 See the file COPYING in the top-level GCC source directory for a copy
11 of the license. */
12
13#include "milli.h"
14
15#ifdef L_mulI
16/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
17/******************************************************************************
18This routine is used on PA2.0 processors when gcc -mno-fpregs is used
19
20ROUTINE: $$mulI
21
22
23DESCRIPTION:
24
25 $$mulI multiplies two single word integers, giving a single
26 word result.
27
28
29INPUT REGISTERS:
30
31 arg0 = Operand 1
32 arg1 = Operand 2
33 r31 == return pc
34 sr0 == return space when called externally
35
36
37OUTPUT REGISTERS:
38
39 arg0 = undefined
40 arg1 = undefined
41 ret1 = result
42
43OTHER REGISTERS AFFECTED:
44
45 r1 = undefined
46
47SIDE EFFECTS:
48
49 Causes a trap under the following conditions: NONE
50 Changes memory at the following places: NONE
51
52PERMISSIBLE CONTEXT:
53
54 Unwindable
55 Does not create a stack frame
56 Is usable for internal or external microcode
57
58DISCUSSION:
59
60 Calls other millicode routines via mrp: NONE
61 Calls other millicode routines: NONE
62
63***************************************************************************/
64
65
66#define a0 %arg0
67#define a1 %arg1
68#define t0 %r1
69#define r %ret1
70
71#define a0__128a0 zdep a0,24,25,a0
72#define a0__256a0 zdep a0,23,24,a0
73#define a1_ne_0_b_l0 comb,<> a1,0,LREF(l0)
74#define a1_ne_0_b_l1 comb,<> a1,0,LREF(l1)
75#define a1_ne_0_b_l2 comb,<> a1,0,LREF(l2)
76#define b_n_ret_t0 b,n LREF(ret_t0)
77#define b_e_shift b LREF(e_shift)
78#define b_e_t0ma0 b LREF(e_t0ma0)
79#define b_e_t0 b LREF(e_t0)
80#define b_e_t0a0 b LREF(e_t0a0)
81#define b_e_t02a0 b LREF(e_t02a0)
82#define b_e_t04a0 b LREF(e_t04a0)
83#define b_e_2t0 b LREF(e_2t0)
84#define b_e_2t0a0 b LREF(e_2t0a0)
85#define b_e_2t04a0 b LREF(e2t04a0)
86#define b_e_3t0 b LREF(e_3t0)
87#define b_e_4t0 b LREF(e_4t0)
88#define b_e_4t0a0 b LREF(e_4t0a0)
89#define b_e_4t08a0 b LREF(e4t08a0)
90#define b_e_5t0 b LREF(e_5t0)
91#define b_e_8t0 b LREF(e_8t0)
92#define b_e_8t0a0 b LREF(e_8t0a0)
93#define r__r_a0 add r,a0,r
94#define r__r_2a0 sh1add a0,r,r
95#define r__r_4a0 sh2add a0,r,r
96#define r__r_8a0 sh3add a0,r,r
97#define r__r_t0 add r,t0,r
98#define r__r_2t0 sh1add t0,r,r
99#define r__r_4t0 sh2add t0,r,r
100#define r__r_8t0 sh3add t0,r,r
101#define t0__3a0 sh1add a0,a0,t0
102#define t0__4a0 sh2add a0,0,t0
103#define t0__5a0 sh2add a0,a0,t0
104#define t0__8a0 sh3add a0,0,t0
105#define t0__9a0 sh3add a0,a0,t0
106#define t0__16a0 zdep a0,27,28,t0
107#define t0__32a0 zdep a0,26,27,t0
108#define t0__64a0 zdep a0,25,26,t0
109#define t0__128a0 zdep a0,24,25,t0
110#define t0__t0ma0 sub t0,a0,t0
111#define t0__t0_a0 add t0,a0,t0
112#define t0__t0_2a0 sh1add a0,t0,t0
113#define t0__t0_4a0 sh2add a0,t0,t0
114#define t0__t0_8a0 sh3add a0,t0,t0
115#define t0__2t0_a0 sh1add t0,a0,t0
116#define t0__3t0 sh1add t0,t0,t0
117#define t0__4t0 sh2add t0,0,t0
118#define t0__4t0_a0 sh2add t0,a0,t0
119#define t0__5t0 sh2add t0,t0,t0
120#define t0__8t0 sh3add t0,0,t0
121#define t0__8t0_a0 sh3add t0,a0,t0
122#define t0__9t0 sh3add t0,t0,t0
123#define t0__16t0 zdep t0,27,28,t0
124#define t0__32t0 zdep t0,26,27,t0
125#define t0__256a0 zdep a0,23,24,t0
126
127
128 SUBSPA_MILLI
129 ATTR_MILLI
130 .align 16
131 .proc
132 .callinfo millicode
133 .export $$mulI,millicode
134GSYM($$mulI)
135 combt,<<= a1,a0,LREF(l4) /* swap args if unsigned a1>a0 */
136 copy 0,r /* zero out the result */
137 xor a0,a1,a0 /* swap a0 & a1 using the */
138 xor a0,a1,a1 /* old xor trick */
139 xor a0,a1,a0
140LSYM(l4)
141 combt,<= 0,a0,LREF(l3) /* if a0>=0 then proceed like unsigned */
142 zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
143 sub,> 0,a1,t0 /* otherwise negate both and */
144 combt,<=,n a0,t0,LREF(l2) /* swap back if |a0|<|a1| */
145 sub 0,a0,a1
146 movb,tr,n t0,a0,LREF(l2) /* 10th inst. */
147
148LSYM(l0) r__r_t0 /* add in this partial product */
149LSYM(l1) a0__256a0 /* a0 <<= 8 ****************** */
150LSYM(l2) zdep a1,30,8,t0 /* t0 = (a1&0xff)<<1 ********* */
151LSYM(l3) blr t0,0 /* case on these 8 bits ****** */
152 extru a1,23,24,a1 /* a1 >>= 8 ****************** */
153
154/*16 insts before this. */
155/* a0 <<= 8 ************************** */
156LSYM(x0) a1_ne_0_b_l2 ! a0__256a0 ! MILLIRETN ! nop
157LSYM(x1) a1_ne_0_b_l1 ! r__r_a0 ! MILLIRETN ! nop
158LSYM(x2) a1_ne_0_b_l1 ! r__r_2a0 ! MILLIRETN ! nop
159LSYM(x3) a1_ne_0_b_l0 ! t0__3a0 ! MILLIRET ! r__r_t0
160LSYM(x4) a1_ne_0_b_l1 ! r__r_4a0 ! MILLIRETN ! nop
161LSYM(x5) a1_ne_0_b_l0 ! t0__5a0 ! MILLIRET ! r__r_t0
162LSYM(x6) t0__3a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
163LSYM(x7) t0__3a0 ! a1_ne_0_b_l0 ! r__r_4a0 ! b_n_ret_t0
164LSYM(x8) a1_ne_0_b_l1 ! r__r_8a0 ! MILLIRETN ! nop
165LSYM(x9) a1_ne_0_b_l0 ! t0__9a0 ! MILLIRET ! r__r_t0
166LSYM(x10) t0__5a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
167LSYM(x11) t0__3a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
168LSYM(x12) t0__3a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
169LSYM(x13) t0__5a0 ! a1_ne_0_b_l0 ! r__r_8a0 ! b_n_ret_t0
170LSYM(x14) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
171LSYM(x15) t0__5a0 ! a1_ne_0_b_l0 ! t0__3t0 ! b_n_ret_t0
172LSYM(x16) t0__16a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
173LSYM(x17) t0__9a0 ! a1_ne_0_b_l0 ! t0__t0_8a0 ! b_n_ret_t0
174LSYM(x18) t0__9a0 ! a1_ne_0_b_l1 ! r__r_2t0 ! MILLIRETN
175LSYM(x19) t0__9a0 ! a1_ne_0_b_l0 ! t0__2t0_a0 ! b_n_ret_t0
176LSYM(x20) t0__5a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
177LSYM(x21) t0__5a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
178LSYM(x22) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
179LSYM(x23) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
180LSYM(x24) t0__3a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
181LSYM(x25) t0__5a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
182LSYM(x26) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
183LSYM(x27) t0__3a0 ! a1_ne_0_b_l0 ! t0__9t0 ! b_n_ret_t0
184LSYM(x28) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
185LSYM(x29) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
186LSYM(x30) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
187LSYM(x31) t0__32a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
188LSYM(x32) t0__32a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
189LSYM(x33) t0__8a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
190LSYM(x34) t0__16a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
191LSYM(x35) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__t0_8a0
192LSYM(x36) t0__9a0 ! a1_ne_0_b_l1 ! r__r_4t0 ! MILLIRETN
193LSYM(x37) t0__9a0 ! a1_ne_0_b_l0 ! t0__4t0_a0 ! b_n_ret_t0
194LSYM(x38) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_2t0
195LSYM(x39) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__2t0_a0
196LSYM(x40) t0__5a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
197LSYM(x41) t0__5a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
198LSYM(x42) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
199LSYM(x43) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
200LSYM(x44) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
201LSYM(x45) t0__9a0 ! a1_ne_0_b_l0 ! t0__5t0 ! b_n_ret_t0
202LSYM(x46) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_a0
203LSYM(x47) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_2a0
204LSYM(x48) t0__3a0 ! a1_ne_0_b_l0 ! t0__16t0 ! b_n_ret_t0
205LSYM(x49) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__t0_4a0
206LSYM(x50) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
207LSYM(x51) t0__9a0 ! t0__t0_8a0 ! b_e_t0 ! t0__3t0
208LSYM(x52) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
209LSYM(x53) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
210LSYM(x54) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_2t0
211LSYM(x55) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__2t0_a0
212LSYM(x56) t0__3a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
213LSYM(x57) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__3t0
214LSYM(x58) t0__3a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
215LSYM(x59) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__3t0
216LSYM(x60) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
217LSYM(x61) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
218LSYM(x62) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
219LSYM(x63) t0__64a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
220LSYM(x64) t0__64a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
221LSYM(x65) t0__8a0 ! a1_ne_0_b_l0 ! t0__8t0_a0 ! b_n_ret_t0
222LSYM(x66) t0__32a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
223LSYM(x67) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
224LSYM(x68) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
225LSYM(x69) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
226LSYM(x70) t0__64a0 ! t0__t0_4a0 ! b_e_t0 ! t0__t0_2a0
227LSYM(x71) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__t0ma0
228LSYM(x72) t0__9a0 ! a1_ne_0_b_l1 ! r__r_8t0 ! MILLIRETN
229LSYM(x73) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_t0
230LSYM(x74) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_2t0
231LSYM(x75) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__2t0_a0
232LSYM(x76) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_4t0
233LSYM(x77) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__4t0_a0
234LSYM(x78) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__2t0_a0
235LSYM(x79) t0__16a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
236LSYM(x80) t0__16a0 ! t0__5t0 ! b_e_shift ! r__r_t0
237LSYM(x81) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_t0
238LSYM(x82) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
239LSYM(x83) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
240LSYM(x84) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
241LSYM(x85) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
242LSYM(x86) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
243LSYM(x87) t0__9a0 ! t0__9t0 ! b_e_t02a0 ! t0__t0_4a0
244LSYM(x88) t0__5a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
245LSYM(x89) t0__5a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
246LSYM(x90) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_2t0
247LSYM(x91) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__2t0_a0
248LSYM(x92) t0__5a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
249LSYM(x93) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__3t0
250LSYM(x94) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__t0_2a0
251LSYM(x95) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__5t0
252LSYM(x96) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
253LSYM(x97) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
254LSYM(x98) t0__32a0 ! t0__3t0 ! b_e_t0 ! t0__t0_2a0
255LSYM(x99) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
256LSYM(x100) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
257LSYM(x101) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
258LSYM(x102) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
259LSYM(x103) t0__5a0 ! t0__5t0 ! b_e_t02a0 ! t0__4t0_a0
260LSYM(x104) t0__3a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
261LSYM(x105) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
262LSYM(x106) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__4t0_a0
263LSYM(x107) t0__9a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__8t0_a0
264LSYM(x108) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_4t0
265LSYM(x109) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__4t0_a0
266LSYM(x110) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__2t0_a0
267LSYM(x111) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__3t0
268LSYM(x112) t0__3a0 ! t0__2t0_a0 ! b_e_t0 ! t0__16t0
269LSYM(x113) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__3t0
270LSYM(x114) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__3t0
271LSYM(x115) t0__9a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__3t0
272LSYM(x116) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__4t0_a0
273LSYM(x117) t0__3a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
274LSYM(x118) t0__3a0 ! t0__4t0_a0 ! b_e_t0a0 ! t0__9t0
275LSYM(x119) t0__3a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__9t0
276LSYM(x120) t0__5a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
277LSYM(x121) t0__5a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
278LSYM(x122) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
279LSYM(x123) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
280LSYM(x124) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
281LSYM(x125) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
282LSYM(x126) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
283LSYM(x127) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
284LSYM(x128) t0__128a0 ! a1_ne_0_b_l1 ! r__r_t0 ! MILLIRETN
285LSYM(x129) t0__128a0 ! a1_ne_0_b_l0 ! t0__t0_a0 ! b_n_ret_t0
286LSYM(x130) t0__64a0 ! t0__t0_a0 ! b_e_shift ! r__r_2t0
287LSYM(x131) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
288LSYM(x132) t0__8a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
289LSYM(x133) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
290LSYM(x134) t0__8a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
291LSYM(x135) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__3t0
292LSYM(x136) t0__8a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
293LSYM(x137) t0__8a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
294LSYM(x138) t0__8a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
295LSYM(x139) t0__8a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__4t0_a0
296LSYM(x140) t0__3a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__5t0
297LSYM(x141) t0__8a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__2t0_a0
298LSYM(x142) t0__9a0 ! t0__8t0 ! b_e_2t0 ! t0__t0ma0
299LSYM(x143) t0__16a0 ! t0__9t0 ! b_e_t0 ! t0__t0ma0
300LSYM(x144) t0__9a0 ! t0__8t0 ! b_e_shift ! r__r_2t0
301LSYM(x145) t0__9a0 ! t0__8t0 ! b_e_t0 ! t0__2t0_a0
302LSYM(x146) t0__9a0 ! t0__8t0_a0 ! b_e_shift ! r__r_2t0
303LSYM(x147) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__2t0_a0
304LSYM(x148) t0__9a0 ! t0__4t0_a0 ! b_e_shift ! r__r_4t0
305LSYM(x149) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__4t0_a0
306LSYM(x150) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__2t0_a0
307LSYM(x151) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
308LSYM(x152) t0__9a0 ! t0__2t0_a0 ! b_e_shift ! r__r_8t0
309LSYM(x153) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__8t0_a0
310LSYM(x154) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__4t0_a0
311LSYM(x155) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__5t0
312LSYM(x156) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__2t0_a0
313LSYM(x157) t0__32a0 ! t0__t0ma0 ! b_e_t02a0 ! t0__5t0
314LSYM(x158) t0__16a0 ! t0__5t0 ! b_e_2t0 ! t0__t0ma0
315LSYM(x159) t0__32a0 ! t0__5t0 ! b_e_t0 ! t0__t0ma0
316LSYM(x160) t0__5a0 ! t0__4t0 ! b_e_shift ! r__r_8t0
317LSYM(x161) t0__8a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
318LSYM(x162) t0__9a0 ! t0__9t0 ! b_e_shift ! r__r_2t0
319LSYM(x163) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__2t0_a0
320LSYM(x164) t0__5a0 ! t0__8t0_a0 ! b_e_shift ! r__r_4t0
321LSYM(x165) t0__8a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
322LSYM(x166) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__2t0_a0
323LSYM(x167) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__2t0_a0
324LSYM(x168) t0__5a0 ! t0__4t0_a0 ! b_e_shift ! r__r_8t0
325LSYM(x169) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__8t0_a0
326LSYM(x170) t0__32a0 ! t0__t0_2a0 ! b_e_t0 ! t0__5t0
327LSYM(x171) t0__9a0 ! t0__2t0_a0 ! b_e_t0 ! t0__9t0
328LSYM(x172) t0__5a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__2t0_a0
329LSYM(x173) t0__9a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__9t0
330LSYM(x174) t0__32a0 ! t0__t0_2a0 ! b_e_t04a0 ! t0__5t0
331LSYM(x175) t0__8a0 ! t0__2t0_a0 ! b_e_5t0 ! t0__2t0_a0
332LSYM(x176) t0__5a0 ! t0__4t0_a0 ! b_e_8t0 ! t0__t0_a0
333LSYM(x177) t0__5a0 ! t0__4t0_a0 ! b_e_8t0a0 ! t0__t0_a0
334LSYM(x178) t0__5a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__8t0_a0
335LSYM(x179) t0__5a0 ! t0__2t0_a0 ! b_e_2t0a0 ! t0__8t0_a0
336LSYM(x180) t0__9a0 ! t0__5t0 ! b_e_shift ! r__r_4t0
337LSYM(x181) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__4t0_a0
338LSYM(x182) t0__9a0 ! t0__5t0 ! b_e_2t0 ! t0__2t0_a0
339LSYM(x183) t0__9a0 ! t0__5t0 ! b_e_2t0a0 ! t0__2t0_a0
340LSYM(x184) t0__5a0 ! t0__9t0 ! b_e_4t0 ! t0__t0_a0
341LSYM(x185) t0__9a0 ! t0__4t0_a0 ! b_e_t0 ! t0__5t0
342LSYM(x186) t0__32a0 ! t0__t0ma0 ! b_e_2t0 ! t0__3t0
343LSYM(x187) t0__9a0 ! t0__4t0_a0 ! b_e_t02a0 ! t0__5t0
344LSYM(x188) t0__9a0 ! t0__5t0 ! b_e_4t0 ! t0__t0_2a0
345LSYM(x189) t0__5a0 ! t0__4t0_a0 ! b_e_t0 ! t0__9t0
346LSYM(x190) t0__9a0 ! t0__2t0_a0 ! b_e_2t0 ! t0__5t0
347LSYM(x191) t0__64a0 ! t0__3t0 ! b_e_t0 ! t0__t0ma0
348LSYM(x192) t0__8a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
349LSYM(x193) t0__8a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
350LSYM(x194) t0__8a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
351LSYM(x195) t0__8a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
352LSYM(x196) t0__8a0 ! t0__3t0 ! b_e_4t0 ! t0__2t0_a0
353LSYM(x197) t0__8a0 ! t0__3t0 ! b_e_4t0a0 ! t0__2t0_a0
354LSYM(x198) t0__64a0 ! t0__t0_2a0 ! b_e_t0 ! t0__3t0
355LSYM(x199) t0__8a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
356LSYM(x200) t0__5a0 ! t0__5t0 ! b_e_shift ! r__r_8t0
357LSYM(x201) t0__5a0 ! t0__5t0 ! b_e_t0 ! t0__8t0_a0
358LSYM(x202) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__4t0_a0
359LSYM(x203) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__4t0_a0
360LSYM(x204) t0__8a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
361LSYM(x205) t0__5a0 ! t0__8t0_a0 ! b_e_t0 ! t0__5t0
362LSYM(x206) t0__64a0 ! t0__t0_4a0 ! b_e_t02a0 ! t0__3t0
363LSYM(x207) t0__8a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
364LSYM(x208) t0__5a0 ! t0__5t0 ! b_e_8t0 ! t0__t0_a0
365LSYM(x209) t0__5a0 ! t0__5t0 ! b_e_8t0a0 ! t0__t0_a0
366LSYM(x210) t0__5a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__5t0
367LSYM(x211) t0__5a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__5t0
368LSYM(x212) t0__3a0 ! t0__4t0_a0 ! b_e_4t0 ! t0__4t0_a0
369LSYM(x213) t0__3a0 ! t0__4t0_a0 ! b_e_4t0a0 ! t0__4t0_a0
370LSYM(x214) t0__9a0 ! t0__t0_4a0 ! b_e_2t04a0 ! t0__8t0_a0
371LSYM(x215) t0__5a0 ! t0__4t0_a0 ! b_e_5t0 ! t0__2t0_a0
372LSYM(x216) t0__9a0 ! t0__3t0 ! b_e_shift ! r__r_8t0
373LSYM(x217) t0__9a0 ! t0__3t0 ! b_e_t0 ! t0__8t0_a0
374LSYM(x218) t0__9a0 ! t0__3t0 ! b_e_2t0 ! t0__4t0_a0
375LSYM(x219) t0__9a0 ! t0__8t0_a0 ! b_e_t0 ! t0__3t0
376LSYM(x220) t0__3a0 ! t0__9t0 ! b_e_4t0 ! t0__2t0_a0
377LSYM(x221) t0__3a0 ! t0__9t0 ! b_e_4t0a0 ! t0__2t0_a0
378LSYM(x222) t0__9a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__3t0
379LSYM(x223) t0__9a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__3t0
380LSYM(x224) t0__9a0 ! t0__3t0 ! b_e_8t0 ! t0__t0_a0
381LSYM(x225) t0__9a0 ! t0__5t0 ! b_e_t0 ! t0__5t0
382LSYM(x226) t0__3a0 ! t0__2t0_a0 ! b_e_t02a0 ! t0__32t0
383LSYM(x227) t0__9a0 ! t0__5t0 ! b_e_t02a0 ! t0__5t0
384LSYM(x228) t0__9a0 ! t0__2t0_a0 ! b_e_4t0 ! t0__3t0
385LSYM(x229) t0__9a0 ! t0__2t0_a0 ! b_e_4t0a0 ! t0__3t0
386LSYM(x230) t0__9a0 ! t0__5t0 ! b_e_5t0 ! t0__t0_a0
387LSYM(x231) t0__9a0 ! t0__2t0_a0 ! b_e_3t0 ! t0__4t0_a0
388LSYM(x232) t0__3a0 ! t0__2t0_a0 ! b_e_8t0 ! t0__4t0_a0
389LSYM(x233) t0__3a0 ! t0__2t0_a0 ! b_e_8t0a0 ! t0__4t0_a0
390LSYM(x234) t0__3a0 ! t0__4t0_a0 ! b_e_2t0 ! t0__9t0
391LSYM(x235) t0__3a0 ! t0__4t0_a0 ! b_e_2t0a0 ! t0__9t0
392LSYM(x236) t0__9a0 ! t0__2t0_a0 ! b_e_4t08a0 ! t0__3t0
393LSYM(x237) t0__16a0 ! t0__5t0 ! b_e_3t0 ! t0__t0ma0
394LSYM(x238) t0__3a0 ! t0__4t0_a0 ! b_e_2t04a0 ! t0__9t0
395LSYM(x239) t0__16a0 ! t0__5t0 ! b_e_t0ma0 ! t0__3t0
396LSYM(x240) t0__9a0 ! t0__t0_a0 ! b_e_8t0 ! t0__3t0
397LSYM(x241) t0__9a0 ! t0__t0_a0 ! b_e_8t0a0 ! t0__3t0
398LSYM(x242) t0__5a0 ! t0__3t0 ! b_e_2t0 ! t0__8t0_a0
399LSYM(x243) t0__9a0 ! t0__9t0 ! b_e_t0 ! t0__3t0
400LSYM(x244) t0__5a0 ! t0__3t0 ! b_e_4t0 ! t0__4t0_a0
401LSYM(x245) t0__8a0 ! t0__3t0 ! b_e_5t0 ! t0__2t0_a0
402LSYM(x246) t0__5a0 ! t0__8t0_a0 ! b_e_2t0 ! t0__3t0
403LSYM(x247) t0__5a0 ! t0__8t0_a0 ! b_e_2t0a0 ! t0__3t0
404LSYM(x248) t0__32a0 ! t0__t0ma0 ! b_e_shift ! r__r_8t0
405LSYM(x249) t0__32a0 ! t0__t0ma0 ! b_e_t0 ! t0__8t0_a0
406LSYM(x250) t0__5a0 ! t0__5t0 ! b_e_2t0 ! t0__5t0
407LSYM(x251) t0__5a0 ! t0__5t0 ! b_e_2t0a0 ! t0__5t0
408LSYM(x252) t0__64a0 ! t0__t0ma0 ! b_e_shift ! r__r_4t0
409LSYM(x253) t0__64a0 ! t0__t0ma0 ! b_e_t0 ! t0__4t0_a0
410LSYM(x254) t0__128a0 ! t0__t0ma0 ! b_e_shift ! r__r_2t0
411LSYM(x255) t0__256a0 ! a1_ne_0_b_l0 ! t0__t0ma0 ! b_n_ret_t0
412/*1040 insts before this. */
413LSYM(ret_t0) MILLIRET
414LSYM(e_t0) r__r_t0
415LSYM(e_shift) a1_ne_0_b_l2
416 a0__256a0 /* a0 <<= 8 *********** */
417 MILLIRETN
418LSYM(e_t0ma0) a1_ne_0_b_l0
419 t0__t0ma0
420 MILLIRET
421 r__r_t0
422LSYM(e_t0a0) a1_ne_0_b_l0
423 t0__t0_a0
424 MILLIRET
425 r__r_t0
426LSYM(e_t02a0) a1_ne_0_b_l0
427 t0__t0_2a0
428 MILLIRET
429 r__r_t0
430LSYM(e_t04a0) a1_ne_0_b_l0
431 t0__t0_4a0
432 MILLIRET
433 r__r_t0
434LSYM(e_2t0) a1_ne_0_b_l1
435 r__r_2t0
436 MILLIRETN
437LSYM(e_2t0a0) a1_ne_0_b_l0
438 t0__2t0_a0
439 MILLIRET
440 r__r_t0
441LSYM(e2t04a0) t0__t0_2a0
442 a1_ne_0_b_l1
443 r__r_2t0
444 MILLIRETN
445LSYM(e_3t0) a1_ne_0_b_l0
446 t0__3t0
447 MILLIRET
448 r__r_t0
449LSYM(e_4t0) a1_ne_0_b_l1
450 r__r_4t0
451 MILLIRETN
452LSYM(e_4t0a0) a1_ne_0_b_l0
453 t0__4t0_a0
454 MILLIRET
455 r__r_t0
456LSYM(e4t08a0) t0__t0_2a0
457 a1_ne_0_b_l1
458 r__r_4t0
459 MILLIRETN
460LSYM(e_5t0) a1_ne_0_b_l0
461 t0__5t0
462 MILLIRET
463 r__r_t0
464LSYM(e_8t0) a1_ne_0_b_l1
465 r__r_8t0
466 MILLIRETN
467LSYM(e_8t0a0) a1_ne_0_b_l0
468 t0__8t0_a0
469 MILLIRET
470 r__r_t0
471
472 .procend
473 .end
474#endif
diff --git a/arch/parisc/lib/milli/remI.S b/arch/parisc/lib/milli/remI.S
new file mode 100644
index 000000000000..63bc094471e2
--- /dev/null
+++ b/arch/parisc/lib/milli/remI.S
@@ -0,0 +1,185 @@
1/* 32 and 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame@debian.org>
3 and Alan Modra <alan@linuxcare.com.au>.
4
5 Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
6
7 This file is part of GCC and is released under the terms of
8 of the GNU General Public License as published by the Free Software
9 Foundation; either version 2, or (at your option) any later version.
10 See the file COPYING in the top-level GCC source directory for a copy
11 of the license. */
12
13#include "milli.h"
14
15#ifdef L_remI
16/* ROUTINE: $$remI
17
18 DESCRIPTION:
19 . $$remI returns the remainder of the division of two signed 32-bit
20 . integers. The sign of the remainder is the same as the sign of
21 . the dividend.
22
23
24 INPUT REGISTERS:
25 . arg0 == dividend
26 . arg1 == divisor
27 . mrp == return pc
28 . sr0 == return space when called externally
29
30 OUTPUT REGISTERS:
31 . arg0 = destroyed
32 . arg1 = destroyed
33 . ret1 = remainder
34
35 OTHER REGISTERS AFFECTED:
36 . r1 = undefined
37
38 SIDE EFFECTS:
39 . Causes a trap under the following conditions: DIVIDE BY ZERO
40 . Changes memory at the following places: NONE
41
42 PERMISSIBLE CONTEXT:
43 . Unwindable
44 . Does not create a stack frame
45 . Is usable for internal or external microcode
46
47 DISCUSSION:
48 . Calls other millicode routines via mrp: NONE
49 . Calls other millicode routines: NONE */
50
51RDEFINE(tmp,r1)
52RDEFINE(retreg,ret1)
53
54 SUBSPA_MILLI
55 ATTR_MILLI
56 .proc
57 .callinfo millicode
58 .entry
59GSYM($$remI)
60GSYM($$remoI)
61 .export $$remI,MILLICODE
62 .export $$remoI,MILLICODE
63 ldo -1(arg1),tmp /* is there at most one bit set ? */
64 and,<> arg1,tmp,r0 /* if not, don't use power of 2 */
65 addi,> 0,arg1,r0 /* if denominator > 0, use power */
66 /* of 2 */
67 b,n LREF(neg_denom)
68LSYM(pow2)
69 comb,>,n 0,arg0,LREF(neg_num) /* is numerator < 0 ? */
70 and arg0,tmp,retreg /* get the result */
71 MILLIRETN
72LSYM(neg_num)
73 subi 0,arg0,arg0 /* negate numerator */
74 and arg0,tmp,retreg /* get the result */
75 subi 0,retreg,retreg /* negate result */
76 MILLIRETN
77LSYM(neg_denom)
78 addi,< 0,arg1,r0 /* if arg1 >= 0, it's not power */
79 /* of 2 */
80 b,n LREF(regular_seq)
81 sub r0,arg1,tmp /* make denominator positive */
82 comb,=,n arg1,tmp,LREF(regular_seq) /* test against 0x80000000 and 0 */
83 ldo -1(tmp),retreg /* is there at most one bit set ? */
84 and,= tmp,retreg,r0 /* if not, go to regular_seq */
85 b,n LREF(regular_seq)
86 comb,>,n 0,arg0,LREF(neg_num_2) /* if arg0 < 0, negate it */
87 and arg0,retreg,retreg
88 MILLIRETN
89LSYM(neg_num_2)
90 subi 0,arg0,tmp /* test against 0x80000000 */
91 and tmp,retreg,retreg
92 subi 0,retreg,retreg
93 MILLIRETN
94LSYM(regular_seq)
95 addit,= 0,arg1,0 /* trap if div by zero */
96 add,>= 0,arg0,retreg /* move dividend, if retreg < 0, */
97 sub 0,retreg,retreg /* make it positive */
98 sub 0,arg1, tmp /* clear carry, */
99 /* negate the divisor */
100 ds 0, tmp,0 /* set V-bit to the comple- */
101 /* ment of the divisor sign */
102 or 0,0, tmp /* clear tmp */
103 add retreg,retreg,retreg /* shift msb bit into carry */
104 ds tmp,arg1, tmp /* 1st divide step, if no carry */
105 /* out, msb of quotient = 0 */
106 addc retreg,retreg,retreg /* shift retreg with/into carry */
107LSYM(t1)
108 ds tmp,arg1, tmp /* 2nd divide step */
109 addc retreg,retreg,retreg /* shift retreg with/into carry */
110 ds tmp,arg1, tmp /* 3rd divide step */
111 addc retreg,retreg,retreg /* shift retreg with/into carry */
112 ds tmp,arg1, tmp /* 4th divide step */
113 addc retreg,retreg,retreg /* shift retreg with/into carry */
114 ds tmp,arg1, tmp /* 5th divide step */
115 addc retreg,retreg,retreg /* shift retreg with/into carry */
116 ds tmp,arg1, tmp /* 6th divide step */
117 addc retreg,retreg,retreg /* shift retreg with/into carry */
118 ds tmp,arg1, tmp /* 7th divide step */
119 addc retreg,retreg,retreg /* shift retreg with/into carry */
120 ds tmp,arg1, tmp /* 8th divide step */
121 addc retreg,retreg,retreg /* shift retreg with/into carry */
122 ds tmp,arg1, tmp /* 9th divide step */
123 addc retreg,retreg,retreg /* shift retreg with/into carry */
124 ds tmp,arg1, tmp /* 10th divide step */
125 addc retreg,retreg,retreg /* shift retreg with/into carry */
126 ds tmp,arg1, tmp /* 11th divide step */
127 addc retreg,retreg,retreg /* shift retreg with/into carry */
128 ds tmp,arg1, tmp /* 12th divide step */
129 addc retreg,retreg,retreg /* shift retreg with/into carry */
130 ds tmp,arg1, tmp /* 13th divide step */
131 addc retreg,retreg,retreg /* shift retreg with/into carry */
132 ds tmp,arg1, tmp /* 14th divide step */
133 addc retreg,retreg,retreg /* shift retreg with/into carry */
134 ds tmp,arg1, tmp /* 15th divide step */
135 addc retreg,retreg,retreg /* shift retreg with/into carry */
136 ds tmp,arg1, tmp /* 16th divide step */
137 addc retreg,retreg,retreg /* shift retreg with/into carry */
138 ds tmp,arg1, tmp /* 17th divide step */
139 addc retreg,retreg,retreg /* shift retreg with/into carry */
140 ds tmp,arg1, tmp /* 18th divide step */
141 addc retreg,retreg,retreg /* shift retreg with/into carry */
142 ds tmp,arg1, tmp /* 19th divide step */
143 addc retreg,retreg,retreg /* shift retreg with/into carry */
144 ds tmp,arg1, tmp /* 20th divide step */
145 addc retreg,retreg,retreg /* shift retreg with/into carry */
146 ds tmp,arg1, tmp /* 21st divide step */
147 addc retreg,retreg,retreg /* shift retreg with/into carry */
148 ds tmp,arg1, tmp /* 22nd divide step */
149 addc retreg,retreg,retreg /* shift retreg with/into carry */
150 ds tmp,arg1, tmp /* 23rd divide step */
151 addc retreg,retreg,retreg /* shift retreg with/into carry */
152 ds tmp,arg1, tmp /* 24th divide step */
153 addc retreg,retreg,retreg /* shift retreg with/into carry */
154 ds tmp,arg1, tmp /* 25th divide step */
155 addc retreg,retreg,retreg /* shift retreg with/into carry */
156 ds tmp,arg1, tmp /* 26th divide step */
157 addc retreg,retreg,retreg /* shift retreg with/into carry */
158 ds tmp,arg1, tmp /* 27th divide step */
159 addc retreg,retreg,retreg /* shift retreg with/into carry */
160 ds tmp,arg1, tmp /* 28th divide step */
161 addc retreg,retreg,retreg /* shift retreg with/into carry */
162 ds tmp,arg1, tmp /* 29th divide step */
163 addc retreg,retreg,retreg /* shift retreg with/into carry */
164 ds tmp,arg1, tmp /* 30th divide step */
165 addc retreg,retreg,retreg /* shift retreg with/into carry */
166 ds tmp,arg1, tmp /* 31st divide step */
167 addc retreg,retreg,retreg /* shift retreg with/into carry */
168 ds tmp,arg1, tmp /* 32nd divide step, */
169 addc retreg,retreg,retreg /* shift last bit into retreg */
170 movb,>=,n tmp,retreg,LREF(finish) /* branch if pos. tmp */
171 add,< arg1,0,0 /* if arg1 > 0, add arg1 */
172 add,tr tmp,arg1,retreg /* for correcting remainder tmp */
173 sub tmp,arg1,retreg /* else add absolute value arg1 */
174LSYM(finish)
175 add,>= arg0,0,0 /* set sign of remainder */
176 sub 0,retreg,retreg /* to sign of dividend */
177 MILLIRET
178 nop
179 .exit
180 .procend
181#ifdef milliext
182 .origin 0x00000200
183#endif
184 .end
185#endif
diff --git a/arch/parisc/lib/milli/remU.S b/arch/parisc/lib/milli/remU.S
new file mode 100644
index 000000000000..c0a2d6e247c3
--- /dev/null
+++ b/arch/parisc/lib/milli/remU.S
@@ -0,0 +1,148 @@
1/* 32 and 64-bit millicode, original author Hewlett-Packard
2 adapted for gcc by Paul Bame <bame@debian.org>
3 and Alan Modra <alan@linuxcare.com.au>.
4
5 Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
6
7 This file is part of GCC and is released under the terms of
8 of the GNU General Public License as published by the Free Software
9 Foundation; either version 2, or (at your option) any later version.
10 See the file COPYING in the top-level GCC source directory for a copy
11 of the license. */
12
13#include "milli.h"
14
15#ifdef L_remU
16/* ROUTINE: $$remU
17 . Single precision divide for remainder with unsigned binary integers.
18 .
19 . The remainder must be dividend-(dividend/divisor)*divisor.
20 . Divide by zero is trapped.
21
22 INPUT REGISTERS:
23 . arg0 == dividend
24 . arg1 == divisor
25 . mrp == return pc
26 . sr0 == return space when called externally
27
28 OUTPUT REGISTERS:
29 . arg0 = undefined
30 . arg1 = undefined
31 . ret1 = remainder
32
33 OTHER REGISTERS AFFECTED:
34 . r1 = undefined
35
36 SIDE EFFECTS:
37 . Causes a trap under the following conditions: DIVIDE BY ZERO
38 . Changes memory at the following places: NONE
39
40 PERMISSIBLE CONTEXT:
41 . Unwindable.
42 . Does not create a stack frame.
43 . Suitable for internal or external millicode.
44 . Assumes the special millicode register conventions.
45
46 DISCUSSION:
47 . Calls other millicode routines using mrp: NONE
48 . Calls other millicode routines: NONE */
49
50
51RDEFINE(temp,r1)
52RDEFINE(rmndr,ret1) /* r29 */
53 SUBSPA_MILLI
54 ATTR_MILLI
55 .export $$remU,millicode
56 .proc
57 .callinfo millicode
58 .entry
59GSYM($$remU)
60 ldo -1(arg1),temp /* is there at most one bit set ? */
61 and,= arg1,temp,r0 /* if not, don't use power of 2 */
62 b LREF(regular_seq)
63 addit,= 0,arg1,r0 /* trap on div by zero */
64 and arg0,temp,rmndr /* get the result for power of 2 */
65 MILLIRETN
66LSYM(regular_seq)
67 comib,>=,n 0,arg1,LREF(special_case)
68 subi 0,arg1,rmndr /* clear carry, negate the divisor */
69 ds r0,rmndr,r0 /* set V-bit to 1 */
70 add arg0,arg0,temp /* shift msb bit into carry */
71 ds r0,arg1,rmndr /* 1st divide step, if no carry */
72 addc temp,temp,temp /* shift temp with/into carry */
73 ds rmndr,arg1,rmndr /* 2nd divide step */
74 addc temp,temp,temp /* shift temp with/into carry */
75 ds rmndr,arg1,rmndr /* 3rd divide step */
76 addc temp,temp,temp /* shift temp with/into carry */
77 ds rmndr,arg1,rmndr /* 4th divide step */
78 addc temp,temp,temp /* shift temp with/into carry */
79 ds rmndr,arg1,rmndr /* 5th divide step */
80 addc temp,temp,temp /* shift temp with/into carry */
81 ds rmndr,arg1,rmndr /* 6th divide step */
82 addc temp,temp,temp /* shift temp with/into carry */
83 ds rmndr,arg1,rmndr /* 7th divide step */
84 addc temp,temp,temp /* shift temp with/into carry */
85 ds rmndr,arg1,rmndr /* 8th divide step */
86 addc temp,temp,temp /* shift temp with/into carry */
87 ds rmndr,arg1,rmndr /* 9th divide step */
88 addc temp,temp,temp /* shift temp with/into carry */
89 ds rmndr,arg1,rmndr /* 10th divide step */
90 addc temp,temp,temp /* shift temp with/into carry */
91 ds rmndr,arg1,rmndr /* 11th divide step */
92 addc temp,temp,temp /* shift temp with/into carry */
93 ds rmndr,arg1,rmndr /* 12th divide step */
94 addc temp,temp,temp /* shift temp with/into carry */
95 ds rmndr,arg1,rmndr /* 13th divide step */
96 addc temp,temp,temp /* shift temp with/into carry */
97 ds rmndr,arg1,rmndr /* 14th divide step */
98 addc temp,temp,temp /* shift temp with/into carry */
99 ds rmndr,arg1,rmndr /* 15th divide step */
100 addc temp,temp,temp /* shift temp with/into carry */
101 ds rmndr,arg1,rmndr /* 16th divide step */
102 addc temp,temp,temp /* shift temp with/into carry */
103 ds rmndr,arg1,rmndr /* 17th divide step */
104 addc temp,temp,temp /* shift temp with/into carry */
105 ds rmndr,arg1,rmndr /* 18th divide step */
106 addc temp,temp,temp /* shift temp with/into carry */
107 ds rmndr,arg1,rmndr /* 19th divide step */
108 addc temp,temp,temp /* shift temp with/into carry */
109 ds rmndr,arg1,rmndr /* 20th divide step */
110 addc temp,temp,temp /* shift temp with/into carry */
111 ds rmndr,arg1,rmndr /* 21st divide step */
112 addc temp,temp,temp /* shift temp with/into carry */
113 ds rmndr,arg1,rmndr /* 22nd divide step */
114 addc temp,temp,temp /* shift temp with/into carry */
115 ds rmndr,arg1,rmndr /* 23rd divide step */
116 addc temp,temp,temp /* shift temp with/into carry */
117 ds rmndr,arg1,rmndr /* 24th divide step */
118 addc temp,temp,temp /* shift temp with/into carry */
119 ds rmndr,arg1,rmndr /* 25th divide step */
120 addc temp,temp,temp /* shift temp with/into carry */
121 ds rmndr,arg1,rmndr /* 26th divide step */
122 addc temp,temp,temp /* shift temp with/into carry */
123 ds rmndr,arg1,rmndr /* 27th divide step */
124 addc temp,temp,temp /* shift temp with/into carry */
125 ds rmndr,arg1,rmndr /* 28th divide step */
126 addc temp,temp,temp /* shift temp with/into carry */
127 ds rmndr,arg1,rmndr /* 29th divide step */
128 addc temp,temp,temp /* shift temp with/into carry */
129 ds rmndr,arg1,rmndr /* 30th divide step */
130 addc temp,temp,temp /* shift temp with/into carry */
131 ds rmndr,arg1,rmndr /* 31st divide step */
132 addc temp,temp,temp /* shift temp with/into carry */
133 ds rmndr,arg1,rmndr /* 32nd divide step, */
134 comiclr,<= 0,rmndr,r0
135 add rmndr,arg1,rmndr /* correction */
136 MILLIRETN
137 nop
138
139/* Putting >= on the last DS and deleting COMICLR does not work! */
140LSYM(special_case)
141 sub,>>= arg0,arg1,rmndr
142 copy arg0,rmndr
143 MILLIRETN
144 nop
145 .exit
146 .procend
147 .end
148#endif