diff options
| author | Paul Mundt <lethal@linux-sh.org> | 2008-12-12 02:53:14 -0500 |
|---|---|---|
| committer | Paul Mundt <lethal@linux-sh.org> | 2008-12-22 04:44:05 -0500 |
| commit | 180ae2037f5bc33b0597ddbb76d36b08a74a238a (patch) | |
| tree | 623c8e77a7593811124726e669e701443b83e47e | |
| parent | 209aa4fdc39eacc145a7f9c32a4b9ffcc68912c6 (diff) | |
sh: Provide sdivsi3/udivsi3/udivdi3 for sh64, kill off libgcc linking.
This moves in the necessary libgcc bits and kills off the libgcc linking
for sh64 kernels as well.
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
| -rw-r--r-- | arch/sh/Makefile | 4 | ||||
| -rw-r--r-- | arch/sh/kernel/sh_ksyms_64.c | 2 | ||||
| -rw-r--r-- | arch/sh/lib64/Makefile | 3 | ||||
| -rw-r--r-- | arch/sh/lib64/sdivsi3.S | 131 | ||||
| -rw-r--r-- | arch/sh/lib64/udivdi3.S | 120 | ||||
| -rw-r--r-- | arch/sh/lib64/udivsi3.S | 59 |
6 files changed, 314 insertions, 5 deletions
diff --git a/arch/sh/Makefile b/arch/sh/Makefile index d56889e62a90..c59098dcdfad 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile | |||
| @@ -177,10 +177,8 @@ KBUILD_CFLAGS += -pipe $(cflags-y) | |||
| 177 | KBUILD_CPPFLAGS += $(cflags-y) | 177 | KBUILD_CPPFLAGS += $(cflags-y) |
| 178 | KBUILD_AFLAGS += $(cflags-y) | 178 | KBUILD_AFLAGS += $(cflags-y) |
| 179 | 179 | ||
| 180 | LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) | ||
| 181 | |||
| 182 | libs-$(CONFIG_SUPERH32) := arch/sh/lib/ $(libs-y) | 180 | libs-$(CONFIG_SUPERH32) := arch/sh/lib/ $(libs-y) |
| 183 | libs-$(CONFIG_SUPERH64) := arch/sh/lib64/ $(libs-y) $(LIBGCC) | 181 | libs-$(CONFIG_SUPERH64) := arch/sh/lib64/ $(libs-y) |
| 184 | 182 | ||
| 185 | PHONY += maketools FORCE | 183 | PHONY += maketools FORCE |
| 186 | 184 | ||
diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c index ab7adaa95f77..0d74d6b8774e 100644 --- a/arch/sh/kernel/sh_ksyms_64.c +++ b/arch/sh/kernel/sh_ksyms_64.c | |||
| @@ -76,7 +76,5 @@ EXPORT_SYMBOL(strcpy); | |||
| 76 | #define DECLARE_EXPORT(name) extern void name(void);EXPORT_SYMBOL(name) | 76 | #define DECLARE_EXPORT(name) extern void name(void);EXPORT_SYMBOL(name) |
| 77 | 77 | ||
| 78 | DECLARE_EXPORT(__sdivsi3); | 78 | DECLARE_EXPORT(__sdivsi3); |
| 79 | DECLARE_EXPORT(__sdivsi3_2); | ||
| 80 | DECLARE_EXPORT(__muldi3); | ||
| 81 | DECLARE_EXPORT(__udivsi3); | 79 | DECLARE_EXPORT(__udivsi3); |
| 82 | DECLARE_EXPORT(__div_table); | 80 | DECLARE_EXPORT(__div_table); |
diff --git a/arch/sh/lib64/Makefile b/arch/sh/lib64/Makefile index 1d932e7d0ca0..4bacb9e83478 100644 --- a/arch/sh/lib64/Makefile +++ b/arch/sh/lib64/Makefile | |||
| @@ -12,3 +12,6 @@ | |||
| 12 | # Panic should really be compiled as PIC | 12 | # Panic should really be compiled as PIC |
| 13 | lib-y := udelay.o c-checksum.o dbg.o panic.o memcpy.o memset.o \ | 13 | lib-y := udelay.o c-checksum.o dbg.o panic.o memcpy.o memset.o \ |
| 14 | copy_user_memcpy.o copy_page.o clear_page.o strcpy.o strlen.o | 14 | copy_user_memcpy.o copy_page.o clear_page.o strcpy.o strlen.o |
| 15 | |||
| 16 | # Extracted from libgcc | ||
| 17 | lib-y += udivsi3.o udivdi3.o sdivsi3.o | ||
diff --git a/arch/sh/lib64/sdivsi3.S b/arch/sh/lib64/sdivsi3.S new file mode 100644 index 000000000000..6a800c6a4904 --- /dev/null +++ b/arch/sh/lib64/sdivsi3.S | |||
| @@ -0,0 +1,131 @@ | |||
| 1 | .global __sdivsi3 | ||
| 2 | .section .text..SHmedia32,"ax" | ||
| 3 | .align 2 | ||
| 4 | |||
| 5 | /* inputs: r4,r5 */ | ||
| 6 | /* clobbered: r1,r18,r19,r20,r21,r25,tr0 */ | ||
| 7 | /* result in r0 */ | ||
| 8 | __sdivsi3: | ||
| 9 | ptb __div_table,tr0 | ||
| 10 | |||
| 11 | nsb r5, r1 | ||
| 12 | shlld r5, r1, r25 /* normalize; [-2 ..1, 1..2) in s2.62 */ | ||
| 13 | shari r25, 58, r21 /* extract 5(6) bit index (s2.4 with hole -1..1) */ | ||
| 14 | /* bubble */ | ||
| 15 | gettr tr0,r20 | ||
| 16 | ldx.ub r20, r21, r19 /* u0.8 */ | ||
| 17 | shari r25, 32, r25 /* normalize to s2.30 */ | ||
| 18 | shlli r21, 1, r21 | ||
| 19 | muls.l r25, r19, r19 /* s2.38 */ | ||
| 20 | ldx.w r20, r21, r21 /* s2.14 */ | ||
| 21 | ptabs r18, tr0 | ||
| 22 | shari r19, 24, r19 /* truncate to s2.14 */ | ||
| 23 | sub r21, r19, r19 /* some 11 bit inverse in s1.14 */ | ||
| 24 | muls.l r19, r19, r21 /* u0.28 */ | ||
| 25 | sub r63, r1, r1 | ||
| 26 | addi r1, 92, r1 | ||
| 27 | muls.l r25, r21, r18 /* s2.58 */ | ||
| 28 | shlli r19, 45, r19 /* multiply by two and convert to s2.58 */ | ||
| 29 | /* bubble */ | ||
| 30 | sub r19, r18, r18 | ||
| 31 | shari r18, 28, r18 /* some 22 bit inverse in s1.30 */ | ||
| 32 | muls.l r18, r25, r0 /* s2.60 */ | ||
| 33 | muls.l r18, r4, r25 /* s32.30 */ | ||
| 34 | /* bubble */ | ||
| 35 | shari r0, 16, r19 /* s-16.44 */ | ||
| 36 | muls.l r19, r18, r19 /* s-16.74 */ | ||
| 37 | shari r25, 63, r0 | ||
| 38 | shari r4, 14, r18 /* s19.-14 */ | ||
| 39 | shari r19, 30, r19 /* s-16.44 */ | ||
| 40 | muls.l r19, r18, r19 /* s15.30 */ | ||
| 41 | xor r21, r0, r21 /* You could also use the constant 1 << 27. */ | ||
| 42 | add r21, r25, r21 | ||
| 43 | sub r21, r19, r21 | ||
| 44 | shard r21, r1, r21 | ||
| 45 | sub r21, r0, r0 | ||
| 46 | blink tr0, r63 | ||
| 47 | |||
| 48 | /* This table has been generated by divtab.c . | ||
| 49 | Defects for bias -330: | ||
| 50 | Max defect: 6.081536e-07 at -1.000000e+00 | ||
| 51 | Min defect: 2.849516e-08 at 1.030651e+00 | ||
| 52 | Max 2nd step defect: 9.606539e-12 at -1.000000e+00 | ||
| 53 | Min 2nd step defect: 0.000000e+00 at 0.000000e+00 | ||
| 54 | Defect at 1: 1.238659e-07 | ||
| 55 | Defect at -2: 1.061708e-07 */ | ||
| 56 | |||
| 57 | .balign 2 | ||
| 58 | .type __div_table,@object | ||
| 59 | .size __div_table,128 | ||
| 60 | /* negative division constants */ | ||
| 61 | .word -16638 | ||
| 62 | .word -17135 | ||
| 63 | .word -17737 | ||
| 64 | .word -18433 | ||
| 65 | .word -19103 | ||
| 66 | .word -19751 | ||
| 67 | .word -20583 | ||
| 68 | .word -21383 | ||
| 69 | .word -22343 | ||
| 70 | .word -23353 | ||
| 71 | .word -24407 | ||
| 72 | .word -25582 | ||
| 73 | .word -26863 | ||
| 74 | .word -28382 | ||
| 75 | .word -29965 | ||
| 76 | .word -31800 | ||
| 77 | /* negative division factors */ | ||
| 78 | .byte 66 | ||
| 79 | .byte 70 | ||
| 80 | .byte 75 | ||
| 81 | .byte 81 | ||
| 82 | .byte 87 | ||
| 83 | .byte 93 | ||
| 84 | .byte 101 | ||
| 85 | .byte 109 | ||
| 86 | .byte 119 | ||
| 87 | .byte 130 | ||
| 88 | .byte 142 | ||
| 89 | .byte 156 | ||
| 90 | .byte 172 | ||
| 91 | .byte 192 | ||
| 92 | .byte 214 | ||
| 93 | .byte 241 | ||
| 94 | .skip 16 | ||
| 95 | .global __div_table | ||
| 96 | __div_table: | ||
| 97 | .skip 16 | ||
| 98 | /* positive division factors */ | ||
| 99 | .byte 241 | ||
| 100 | .byte 214 | ||
| 101 | .byte 192 | ||
| 102 | .byte 172 | ||
| 103 | .byte 156 | ||
| 104 | .byte 142 | ||
| 105 | .byte 130 | ||
| 106 | .byte 119 | ||
| 107 | .byte 109 | ||
| 108 | .byte 101 | ||
| 109 | .byte 93 | ||
| 110 | .byte 87 | ||
| 111 | .byte 81 | ||
| 112 | .byte 75 | ||
| 113 | .byte 70 | ||
| 114 | .byte 66 | ||
| 115 | /* positive division constants */ | ||
| 116 | .word 31801 | ||
| 117 | .word 29966 | ||
| 118 | .word 28383 | ||
| 119 | .word 26864 | ||
| 120 | .word 25583 | ||
| 121 | .word 24408 | ||
| 122 | .word 23354 | ||
| 123 | .word 22344 | ||
| 124 | .word 21384 | ||
| 125 | .word 20584 | ||
| 126 | .word 19752 | ||
| 127 | .word 19104 | ||
| 128 | .word 18434 | ||
| 129 | .word 17738 | ||
| 130 | .word 17136 | ||
| 131 | .word 16639 | ||
diff --git a/arch/sh/lib64/udivdi3.S b/arch/sh/lib64/udivdi3.S new file mode 100644 index 000000000000..6895c0225b85 --- /dev/null +++ b/arch/sh/lib64/udivdi3.S | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | .section .text..SHmedia32,"ax" | ||
| 2 | .align 2 | ||
| 3 | .global __udivdi3 | ||
| 4 | __udivdi3: | ||
| 5 | shlri r3,1,r4 | ||
| 6 | nsb r4,r22 | ||
| 7 | shlld r3,r22,r6 | ||
| 8 | shlri r6,49,r5 | ||
| 9 | movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ | ||
| 10 | sub r21,r5,r1 | ||
| 11 | mmulfx.w r1,r1,r4 | ||
| 12 | mshflo.w r1,r63,r1 | ||
| 13 | sub r63,r22,r20 // r63 == 64 % 64 | ||
| 14 | mmulfx.w r5,r4,r4 | ||
| 15 | pta large_divisor,tr0 | ||
| 16 | addi r20,32,r9 | ||
| 17 | msub.w r1,r4,r1 | ||
| 18 | madd.w r1,r1,r1 | ||
| 19 | mmulfx.w r1,r1,r4 | ||
| 20 | shlri r6,32,r7 | ||
| 21 | bgt/u r9,r63,tr0 // large_divisor | ||
| 22 | mmulfx.w r5,r4,r4 | ||
| 23 | shlri r2,32+14,r19 | ||
| 24 | addi r22,-31,r0 | ||
| 25 | msub.w r1,r4,r1 | ||
| 26 | |||
| 27 | mulu.l r1,r7,r4 | ||
| 28 | addi r1,-3,r5 | ||
| 29 | mulu.l r5,r19,r5 | ||
| 30 | sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 | ||
| 31 | shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as | ||
| 32 | the case may be, %0000000000000000 000.11111111111, still */ | ||
| 33 | muls.l r1,r4,r4 /* leaving at least one sign bit. */ | ||
| 34 | mulu.l r5,r3,r8 | ||
| 35 | mshalds.l r1,r21,r1 | ||
| 36 | shari r4,26,r4 | ||
| 37 | shlld r8,r0,r8 | ||
| 38 | add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) | ||
| 39 | sub r2,r8,r2 | ||
| 40 | /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ | ||
| 41 | |||
| 42 | shlri r2,22,r21 | ||
| 43 | mulu.l r21,r1,r21 | ||
| 44 | shlld r5,r0,r8 | ||
| 45 | addi r20,30-22,r0 | ||
| 46 | shlrd r21,r0,r21 | ||
| 47 | mulu.l r21,r3,r5 | ||
| 48 | add r8,r21,r8 | ||
| 49 | mcmpgt.l r21,r63,r21 // See Note 1 | ||
| 50 | addi r20,30,r0 | ||
| 51 | mshfhi.l r63,r21,r21 | ||
| 52 | sub r2,r5,r2 | ||
| 53 | andc r2,r21,r2 | ||
| 54 | |||
| 55 | /* small divisor: need a third divide step */ | ||
| 56 | mulu.l r2,r1,r7 | ||
| 57 | ptabs r18,tr0 | ||
| 58 | addi r2,1,r2 | ||
| 59 | shlrd r7,r0,r7 | ||
| 60 | mulu.l r7,r3,r5 | ||
| 61 | add r8,r7,r8 | ||
| 62 | sub r2,r3,r2 | ||
| 63 | cmpgt r2,r5,r5 | ||
| 64 | add r8,r5,r2 | ||
| 65 | /* could test r3 here to check for divide by zero. */ | ||
| 66 | blink tr0,r63 | ||
| 67 | |||
| 68 | large_divisor: | ||
| 69 | mmulfx.w r5,r4,r4 | ||
| 70 | shlrd r2,r9,r25 | ||
| 71 | shlri r25,32,r8 | ||
| 72 | msub.w r1,r4,r1 | ||
| 73 | |||
| 74 | mulu.l r1,r7,r4 | ||
| 75 | addi r1,-3,r5 | ||
| 76 | mulu.l r5,r8,r5 | ||
| 77 | sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 | ||
| 78 | shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as | ||
| 79 | the case may be, %0000000000000000 000.11111111111, still */ | ||
| 80 | muls.l r1,r4,r4 /* leaving at least one sign bit. */ | ||
| 81 | shlri r5,14-1,r8 | ||
| 82 | mulu.l r8,r7,r5 | ||
| 83 | mshalds.l r1,r21,r1 | ||
| 84 | shari r4,26,r4 | ||
| 85 | add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) | ||
| 86 | sub r25,r5,r25 | ||
| 87 | /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ | ||
| 88 | |||
| 89 | shlri r25,22,r21 | ||
| 90 | mulu.l r21,r1,r21 | ||
| 91 | pta no_lo_adj,tr0 | ||
| 92 | addi r22,32,r0 | ||
| 93 | shlri r21,40,r21 | ||
| 94 | mulu.l r21,r7,r5 | ||
| 95 | add r8,r21,r8 | ||
| 96 | shlld r2,r0,r2 | ||
| 97 | sub r25,r5,r25 | ||
| 98 | bgtu/u r7,r25,tr0 // no_lo_adj | ||
| 99 | addi r8,1,r8 | ||
| 100 | sub r25,r7,r25 | ||
| 101 | no_lo_adj: | ||
| 102 | mextr4 r2,r25,r2 | ||
| 103 | |||
| 104 | /* large_divisor: only needs a few adjustments. */ | ||
| 105 | mulu.l r8,r6,r5 | ||
| 106 | ptabs r18,tr0 | ||
| 107 | /* bubble */ | ||
| 108 | cmpgtu r5,r2,r5 | ||
| 109 | sub r8,r5,r2 | ||
| 110 | blink tr0,r63 | ||
| 111 | |||
| 112 | /* Note 1: To shift the result of the second divide stage so that the result | ||
| 113 | always fits into 32 bits, yet we still reduce the rest sufficiently | ||
| 114 | would require a lot of instructions to do the shifts just right. Using | ||
| 115 | the full 64 bit shift result to multiply with the divisor would require | ||
| 116 | four extra instructions for the upper 32 bits (shift / mulu / shift / sub). | ||
| 117 | Fortunately, if the upper 32 bits of the shift result are nonzero, we | ||
| 118 | know that the rest after taking this partial result into account will | ||
| 119 | fit into 32 bits. So we just clear the upper 32 bits of the rest if the | ||
| 120 | upper 32 bits of the partial result are nonzero. */ | ||
diff --git a/arch/sh/lib64/udivsi3.S b/arch/sh/lib64/udivsi3.S new file mode 100644 index 000000000000..e68120e4b847 --- /dev/null +++ b/arch/sh/lib64/udivsi3.S | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | .global __udivsi3 | ||
| 2 | .section .text..SHmedia32,"ax" | ||
| 3 | .align 2 | ||
| 4 | |||
| 5 | /* | ||
| 6 | inputs: r4,r5 | ||
| 7 | clobbered: r18,r19,r20,r21,r22,r25,tr0 | ||
| 8 | result in r0. | ||
| 9 | */ | ||
| 10 | __udivsi3: | ||
| 11 | addz.l r5,r63,r22 | ||
| 12 | nsb r22,r0 | ||
| 13 | shlld r22,r0,r25 | ||
| 14 | shlri r25,48,r25 | ||
| 15 | movi 0xffffffffffffbb0c,r20 /* shift count eqiv 76 */ | ||
| 16 | sub r20,r25,r21 | ||
| 17 | mmulfx.w r21,r21,r19 | ||
| 18 | mshflo.w r21,r63,r21 | ||
| 19 | ptabs r18,tr0 | ||
| 20 | mmulfx.w r25,r19,r19 | ||
| 21 | sub r20,r0,r0 | ||
| 22 | /* bubble */ | ||
| 23 | msub.w r21,r19,r19 | ||
| 24 | |||
| 25 | /* | ||
| 26 | * It would be nice for scheduling to do this add to r21 before | ||
| 27 | * the msub.w, but we need a different value for r19 to keep | ||
| 28 | * errors under control. | ||
| 29 | */ | ||
| 30 | addi r19,-2,r21 | ||
| 31 | mulu.l r4,r21,r18 | ||
| 32 | mmulfx.w r19,r19,r19 | ||
| 33 | shlli r21,15,r21 | ||
| 34 | shlrd r18,r0,r18 | ||
| 35 | mulu.l r18,r22,r20 | ||
| 36 | mmacnfx.wl r25,r19,r21 | ||
| 37 | /* bubble */ | ||
| 38 | sub r4,r20,r25 | ||
| 39 | |||
| 40 | mulu.l r25,r21,r19 | ||
| 41 | addi r0,14,r0 | ||
| 42 | /* bubble */ | ||
| 43 | shlrd r19,r0,r19 | ||
| 44 | mulu.l r19,r22,r20 | ||
| 45 | add r18,r19,r18 | ||
| 46 | /* bubble */ | ||
| 47 | sub.l r25,r20,r25 | ||
| 48 | |||
| 49 | mulu.l r25,r21,r19 | ||
| 50 | addz.l r25,r63,r25 | ||
| 51 | sub r25,r22,r25 | ||
| 52 | shlrd r19,r0,r19 | ||
| 53 | mulu.l r19,r22,r20 | ||
| 54 | addi r25,1,r25 | ||
| 55 | add r18,r19,r18 | ||
| 56 | |||
| 57 | cmpgt r25,r20,r25 | ||
| 58 | add.l r18,r25,r0 | ||
| 59 | blink tr0,r63 | ||
