aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sh/lib64
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2008-12-12 02:53:14 -0500
committerPaul Mundt <lethal@linux-sh.org>2008-12-22 04:44:05 -0500
commit180ae2037f5bc33b0597ddbb76d36b08a74a238a (patch)
tree623c8e77a7593811124726e669e701443b83e47e /arch/sh/lib64
parent209aa4fdc39eacc145a7f9c32a4b9ffcc68912c6 (diff)
sh: Provide sdivsi3/udivsi3/udivdi3 for sh64, kill off libgcc linking.
This moves in the necessary libgcc bits and kills off the libgcc linking for sh64 kernels as well. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/lib64')
-rw-r--r--arch/sh/lib64/Makefile3
-rw-r--r--arch/sh/lib64/sdivsi3.S131
-rw-r--r--arch/sh/lib64/udivdi3.S120
-rw-r--r--arch/sh/lib64/udivsi3.S59
4 files changed, 313 insertions, 0 deletions
diff --git a/arch/sh/lib64/Makefile b/arch/sh/lib64/Makefile
index 1d932e7d0ca0..4bacb9e83478 100644
--- a/arch/sh/lib64/Makefile
+++ b/arch/sh/lib64/Makefile
@@ -12,3 +12,6 @@
12# Panic should really be compiled as PIC 12# Panic should really be compiled as PIC
13lib-y := udelay.o c-checksum.o dbg.o panic.o memcpy.o memset.o \ 13lib-y := udelay.o c-checksum.o dbg.o panic.o memcpy.o memset.o \
14 copy_user_memcpy.o copy_page.o clear_page.o strcpy.o strlen.o 14 copy_user_memcpy.o copy_page.o clear_page.o strcpy.o strlen.o
15
16# Extracted from libgcc
17lib-y += udivsi3.o udivdi3.o sdivsi3.o
diff --git a/arch/sh/lib64/sdivsi3.S b/arch/sh/lib64/sdivsi3.S
new file mode 100644
index 000000000000..6a800c6a4904
--- /dev/null
+++ b/arch/sh/lib64/sdivsi3.S
@@ -0,0 +1,131 @@
1 .global __sdivsi3
2 .section .text..SHmedia32,"ax"
3 .align 2
4
5 /* inputs: r4,r5 */
6 /* clobbered: r1,r18,r19,r20,r21,r25,tr0 */
7 /* result in r0 */
8__sdivsi3:
9 ptb __div_table,tr0
10
11 nsb r5, r1
12 shlld r5, r1, r25 /* normalize; [-2 ..1, 1..2) in s2.62 */
13 shari r25, 58, r21 /* extract 5(6) bit index (s2.4 with hole -1..1) */
14 /* bubble */
15 gettr tr0,r20
16 ldx.ub r20, r21, r19 /* u0.8 */
17 shari r25, 32, r25 /* normalize to s2.30 */
18 shlli r21, 1, r21
19 muls.l r25, r19, r19 /* s2.38 */
20 ldx.w r20, r21, r21 /* s2.14 */
21 ptabs r18, tr0
22 shari r19, 24, r19 /* truncate to s2.14 */
23 sub r21, r19, r19 /* some 11 bit inverse in s1.14 */
24 muls.l r19, r19, r21 /* u0.28 */
25 sub r63, r1, r1
26 addi r1, 92, r1
27 muls.l r25, r21, r18 /* s2.58 */
28 shlli r19, 45, r19 /* multiply by two and convert to s2.58 */
29 /* bubble */
30 sub r19, r18, r18
31 shari r18, 28, r18 /* some 22 bit inverse in s1.30 */
32 muls.l r18, r25, r0 /* s2.60 */
33 muls.l r18, r4, r25 /* s32.30 */
34 /* bubble */
35 shari r0, 16, r19 /* s-16.44 */
36 muls.l r19, r18, r19 /* s-16.74 */
37 shari r25, 63, r0
38 shari r4, 14, r18 /* s19.-14 */
39 shari r19, 30, r19 /* s-16.44 */
40 muls.l r19, r18, r19 /* s15.30 */
41 xor r21, r0, r21 /* You could also use the constant 1 << 27. */
42 add r21, r25, r21
43 sub r21, r19, r21
44 shard r21, r1, r21
45 sub r21, r0, r0
46 blink tr0, r63
47
48/* This table has been generated by divtab.c .
49Defects for bias -330:
50 Max defect: 6.081536e-07 at -1.000000e+00
51 Min defect: 2.849516e-08 at 1.030651e+00
52 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
53 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
54 Defect at 1: 1.238659e-07
55 Defect at -2: 1.061708e-07 */
56
57 .balign 2
58 .type __div_table,@object
59 .size __div_table,128
60/* negative division constants */
61 .word -16638
62 .word -17135
63 .word -17737
64 .word -18433
65 .word -19103
66 .word -19751
67 .word -20583
68 .word -21383
69 .word -22343
70 .word -23353
71 .word -24407
72 .word -25582
73 .word -26863
74 .word -28382
75 .word -29965
76 .word -31800
77/* negative division factors */
78 .byte 66
79 .byte 70
80 .byte 75
81 .byte 81
82 .byte 87
83 .byte 93
84 .byte 101
85 .byte 109
86 .byte 119
87 .byte 130
88 .byte 142
89 .byte 156
90 .byte 172
91 .byte 192
92 .byte 214
93 .byte 241
94 .skip 16
95 .global __div_table
96__div_table:
97 .skip 16
98/* positive division factors */
99 .byte 241
100 .byte 214
101 .byte 192
102 .byte 172
103 .byte 156
104 .byte 142
105 .byte 130
106 .byte 119
107 .byte 109
108 .byte 101
109 .byte 93
110 .byte 87
111 .byte 81
112 .byte 75
113 .byte 70
114 .byte 66
115/* positive division constants */
116 .word 31801
117 .word 29966
118 .word 28383
119 .word 26864
120 .word 25583
121 .word 24408
122 .word 23354
123 .word 22344
124 .word 21384
125 .word 20584
126 .word 19752
127 .word 19104
128 .word 18434
129 .word 17738
130 .word 17136
131 .word 16639
diff --git a/arch/sh/lib64/udivdi3.S b/arch/sh/lib64/udivdi3.S
new file mode 100644
index 000000000000..6895c0225b85
--- /dev/null
+++ b/arch/sh/lib64/udivdi3.S
@@ -0,0 +1,120 @@
1 .section .text..SHmedia32,"ax"
2 .align 2
3 .global __udivdi3
4__udivdi3:
5 shlri r3,1,r4
6 nsb r4,r22
7 shlld r3,r22,r6
8 shlri r6,49,r5
9 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
10 sub r21,r5,r1
11 mmulfx.w r1,r1,r4
12 mshflo.w r1,r63,r1
13 sub r63,r22,r20 // r63 == 64 % 64
14 mmulfx.w r5,r4,r4
15 pta large_divisor,tr0
16 addi r20,32,r9
17 msub.w r1,r4,r1
18 madd.w r1,r1,r1
19 mmulfx.w r1,r1,r4
20 shlri r6,32,r7
21 bgt/u r9,r63,tr0 // large_divisor
22 mmulfx.w r5,r4,r4
23 shlri r2,32+14,r19
24 addi r22,-31,r0
25 msub.w r1,r4,r1
26
27 mulu.l r1,r7,r4
28 addi r1,-3,r5
29 mulu.l r5,r19,r5
30 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
31 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
32 the case may be, %0000000000000000 000.11111111111, still */
33 muls.l r1,r4,r4 /* leaving at least one sign bit. */
34 mulu.l r5,r3,r8
35 mshalds.l r1,r21,r1
36 shari r4,26,r4
37 shlld r8,r0,r8
38 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
39 sub r2,r8,r2
40 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
41
42 shlri r2,22,r21
43 mulu.l r21,r1,r21
44 shlld r5,r0,r8
45 addi r20,30-22,r0
46 shlrd r21,r0,r21
47 mulu.l r21,r3,r5
48 add r8,r21,r8
49 mcmpgt.l r21,r63,r21 // See Note 1
50 addi r20,30,r0
51 mshfhi.l r63,r21,r21
52 sub r2,r5,r2
53 andc r2,r21,r2
54
55 /* small divisor: need a third divide step */
56 mulu.l r2,r1,r7
57 ptabs r18,tr0
58 addi r2,1,r2
59 shlrd r7,r0,r7
60 mulu.l r7,r3,r5
61 add r8,r7,r8
62 sub r2,r3,r2
63 cmpgt r2,r5,r5
64 add r8,r5,r2
65 /* could test r3 here to check for divide by zero. */
66 blink tr0,r63
67
68large_divisor:
69 mmulfx.w r5,r4,r4
70 shlrd r2,r9,r25
71 shlri r25,32,r8
72 msub.w r1,r4,r1
73
74 mulu.l r1,r7,r4
75 addi r1,-3,r5
76 mulu.l r5,r8,r5
77 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
78 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
79 the case may be, %0000000000000000 000.11111111111, still */
80 muls.l r1,r4,r4 /* leaving at least one sign bit. */
81 shlri r5,14-1,r8
82 mulu.l r8,r7,r5
83 mshalds.l r1,r21,r1
84 shari r4,26,r4
85 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
86 sub r25,r5,r25
87 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
88
89 shlri r25,22,r21
90 mulu.l r21,r1,r21
91 pta no_lo_adj,tr0
92 addi r22,32,r0
93 shlri r21,40,r21
94 mulu.l r21,r7,r5
95 add r8,r21,r8
96 shlld r2,r0,r2
97 sub r25,r5,r25
98 bgtu/u r7,r25,tr0 // no_lo_adj
99 addi r8,1,r8
100 sub r25,r7,r25
101no_lo_adj:
102 mextr4 r2,r25,r2
103
104 /* large_divisor: only needs a few adjustments. */
105 mulu.l r8,r6,r5
106 ptabs r18,tr0
107 /* bubble */
108 cmpgtu r5,r2,r5
109 sub r8,r5,r2
110 blink tr0,r63
111
112/* Note 1: To shift the result of the second divide stage so that the result
113 always fits into 32 bits, yet we still reduce the rest sufficiently
114 would require a lot of instructions to do the shifts just right. Using
115 the full 64 bit shift result to multiply with the divisor would require
116 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
117 Fortunately, if the upper 32 bits of the shift result are nonzero, we
118 know that the rest after taking this partial result into account will
119 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
120 upper 32 bits of the partial result are nonzero. */
diff --git a/arch/sh/lib64/udivsi3.S b/arch/sh/lib64/udivsi3.S
new file mode 100644
index 000000000000..e68120e4b847
--- /dev/null
+++ b/arch/sh/lib64/udivsi3.S
@@ -0,0 +1,59 @@
1 .global __udivsi3
2 .section .text..SHmedia32,"ax"
3 .align 2
4
5/*
6 inputs: r4,r5
7 clobbered: r18,r19,r20,r21,r22,r25,tr0
8 result in r0.
9 */
10__udivsi3:
11 addz.l r5,r63,r22
12 nsb r22,r0
13 shlld r22,r0,r25
14 shlri r25,48,r25
15 movi 0xffffffffffffbb0c,r20 /* shift count eqiv 76 */
16 sub r20,r25,r21
17 mmulfx.w r21,r21,r19
18 mshflo.w r21,r63,r21
19 ptabs r18,tr0
20 mmulfx.w r25,r19,r19
21 sub r20,r0,r0
22 /* bubble */
23 msub.w r21,r19,r19
24
25 /*
26 * It would be nice for scheduling to do this add to r21 before
27 * the msub.w, but we need a different value for r19 to keep
28 * errors under control.
29 */
30 addi r19,-2,r21
31 mulu.l r4,r21,r18
32 mmulfx.w r19,r19,r19
33 shlli r21,15,r21
34 shlrd r18,r0,r18
35 mulu.l r18,r22,r20
36 mmacnfx.wl r25,r19,r21
37 /* bubble */
38 sub r4,r20,r25
39
40 mulu.l r25,r21,r19
41 addi r0,14,r0
42 /* bubble */
43 shlrd r19,r0,r19
44 mulu.l r19,r22,r20
45 add r18,r19,r18
46 /* bubble */
47 sub.l r25,r20,r25
48
49 mulu.l r25,r21,r19
50 addz.l r25,r63,r25
51 sub r25,r22,r25
52 shlrd r19,r0,r19
53 mulu.l r19,r22,r20
54 addi r25,1,r25
55 add r18,r19,r18
56
57 cmpgt r25,r20,r25
58 add.l r18,r25,r0
59 blink tr0,r63