aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sh/lib/udivsi3_i4i-Os.S
diff options
context:
space:
mode:
authorTakashi YOSHII <yoshii.takashi@renesas.com>2008-12-07 21:33:06 -0500
committerPaul Mundt <lethal@linux-sh.org>2008-12-22 04:43:53 -0500
commit3b041227f7ef7c7e97f205c68c6069c0c62e5204 (patch)
treea860f34b268a257611415f33ea8f9df4ad188cb6 /arch/sh/lib/udivsi3_i4i-Os.S
parent1fdae0e59a3fc9e391d2422ddcfbdbdec1e8f724 (diff)
sh: Add plain udivsi3 (not _i4*) for gcc-4.1 and lower.
We chan't share code for udivsi3 and udivsi3_i4, because they have a different clobber list. Copy udivsi3 from gcc-4.1.2. As shown in arch/sh/lib/udivsi3.S (and -Os.S), .global __udivsi3_i4i .global __udivsi3_i4 .global __udivsi3 __udivsi3_i4i: ... Three symbols are sharing one code, which is actually udivsi3_i4i. But, this results unwanted code with gcc 4.1. In gcc, these three are treated as pseudo instructions that have their own clobber list apart from the usual calling convention. According to sh's machine description. The clobber list is as follows: - udivsi3_i4i : t,r1,pr,mach,macl - udivsi3_i4 : t,r0,r1,r4,r5,pr,dr0,dr2,dr4 - udivsi3 : t,r4,pr The caller of udivsi3 will be left with a broken r1 and mac*. gcc-4.1.x and older(at least to 3.4) generate udivsi3. ST's gcc-4.1.1 seems to be OK because it has _i4i. Signed-off-by: Takashi YOSHII <yoshii.takashi@renesas.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/sh/lib/udivsi3_i4i-Os.S')
-rw-r--r--arch/sh/lib/udivsi3_i4i-Os.S149
1 files changed, 149 insertions, 0 deletions
diff --git a/arch/sh/lib/udivsi3_i4i-Os.S b/arch/sh/lib/udivsi3_i4i-Os.S
new file mode 100644
index 000000000000..4835553e1ea9
--- /dev/null
+++ b/arch/sh/lib/udivsi3_i4i-Os.S
@@ -0,0 +1,149 @@
1/* Copyright (C) 2006 Free Software Foundation, Inc.
2
3This file is free software; you can redistribute it and/or modify it
4under the terms of the GNU General Public License as published by the
5Free Software Foundation; either version 2, or (at your option) any
6later version.
7
8In addition to the permissions in the GNU General Public License, the
9Free Software Foundation gives you unlimited permission to link the
10compiled version of this file into combinations with other programs,
11and to distribute those combinations without any restriction coming
12from the use of this file. (The General Public License restrictions
13do apply in other respects; for example, they cover modification of
14the file, and distribution when not linked into a combine
15executable.)
16
17This file is distributed in the hope that it will be useful, but
18WITHOUT ANY WARRANTY; without even the implied warranty of
19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20General Public License for more details.
21
22You should have received a copy of the GNU General Public License
23along with this program; see the file COPYING. If not, write to
24the Free Software Foundation, 51 Franklin Street, Fifth Floor,
25Boston, MA 02110-1301, USA. */
26
27/* Moderately Space-optimized libgcc routines for the Renesas SH /
28 STMicroelectronics ST40 CPUs.
29 Contributed by J"orn Rennecke joern.rennecke@st.com. */
30
31/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
32 sh4-200 run times:
33 udiv small divisor: 55 cycles
34 udiv large divisor: 52 cycles
35 sdiv small divisor, positive result: 59 cycles
36 sdiv large divisor, positive result: 56 cycles
37 sdiv small divisor, negative result: 65 cycles (*)
38 sdiv large divisor, negative result: 62 cycles (*)
39 (*): r2 is restored in the rts delay slot and has a lingering latency
40 of two more cycles. */
41 .balign 4
42 .global __udivsi3_i4i
43 .global __udivsi3_i4
44 .set __udivsi3_i4, __udivsi3_i4i
45 .type __udivsi3_i4i, @function
46 .type __sdivsi3_i4i, @function
47__udivsi3_i4i:
48 sts pr,r1
49 mov.l r4,@-r15
50 extu.w r5,r0
51 cmp/eq r5,r0
52 swap.w r4,r0
53 shlr16 r4
54 bf/s large_divisor
55 div0u
56 mov.l r5,@-r15
57 shll16 r5
58sdiv_small_divisor:
59 div1 r5,r4
60 bsr div6
61 div1 r5,r4
62 div1 r5,r4
63 bsr div6
64 div1 r5,r4
65 xtrct r4,r0
66 xtrct r0,r4
67 bsr div7
68 swap.w r4,r4
69 div1 r5,r4
70 bsr div7
71 div1 r5,r4
72 xtrct r4,r0
73 mov.l @r15+,r5
74 swap.w r0,r0
75 mov.l @r15+,r4
76 jmp @r1
77 rotcl r0
78div7:
79 div1 r5,r4
80div6:
81 div1 r5,r4; div1 r5,r4; div1 r5,r4
82 div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
83
84divx3:
85 rotcl r0
86 div1 r5,r4
87 rotcl r0
88 div1 r5,r4
89 rotcl r0
90 rts
91 div1 r5,r4
92
93large_divisor:
94 mov.l r5,@-r15
95sdiv_large_divisor:
96 xor r4,r0
97 .rept 4
98 rotcl r0
99 bsr divx3
100 div1 r5,r4
101 .endr
102 mov.l @r15+,r5
103 mov.l @r15+,r4
104 jmp @r1
105 rotcl r0
106
107 .global __sdivsi3_i4i
108 .global __sdivsi3_i4
109 .global __sdivsi3
110 .set __sdivsi3_i4, __sdivsi3_i4i
111 .set __sdivsi3, __sdivsi3_i4i
112__sdivsi3_i4i:
113 mov.l r4,@-r15
114 cmp/pz r5
115 mov.l r5,@-r15
116 bt/s pos_divisor
117 cmp/pz r4
118 neg r5,r5
119 extu.w r5,r0
120 bt/s neg_result
121 cmp/eq r5,r0
122 neg r4,r4
123pos_result:
124 swap.w r4,r0
125 bra sdiv_check_divisor
126 sts pr,r1
127pos_divisor:
128 extu.w r5,r0
129 bt/s pos_result
130 cmp/eq r5,r0
131 neg r4,r4
132neg_result:
133 mova negate_result,r0
134 ;
135 mov r0,r1
136 swap.w r4,r0
137 lds r2,macl
138 sts pr,r2
139sdiv_check_divisor:
140 shlr16 r4
141 bf/s sdiv_large_divisor
142 div0u
143 bra sdiv_small_divisor
144 shll16 r5
145 .balign 4
146negate_result:
147 neg r0,r0
148 jmp @r2
149 sts macl,r2