diff options
Diffstat (limited to 'arch/arm/lib/lib1funcs.S')
-rw-r--r-- | arch/arm/lib/lib1funcs.S | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S new file mode 100644 index 000000000000..59026029d017 --- /dev/null +++ b/arch/arm/lib/lib1funcs.S | |||
@@ -0,0 +1,314 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines | ||
3 | * | ||
4 | * Author: Nicolas Pitre <nico@cam.org> | ||
5 | * - contributed to gcc-3.4 on Sep 30, 2003 | ||
6 | * - adapted for the Linux kernel on Oct 2, 2003 | ||
7 | */ | ||
8 | |||
9 | /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. | ||
10 | |||
11 | This file is free software; you can redistribute it and/or modify it | ||
12 | under the terms of the GNU General Public License as published by the | ||
13 | Free Software Foundation; either version 2, or (at your option) any | ||
14 | later version. | ||
15 | |||
16 | In addition to the permissions in the GNU General Public License, the | ||
17 | Free Software Foundation gives you unlimited permission to link the | ||
18 | compiled version of this file into combinations with other programs, | ||
19 | and to distribute those combinations without any restriction coming | ||
20 | from the use of this file. (The General Public License restrictions | ||
21 | do apply in other respects; for example, they cover modification of | ||
22 | the file, and distribution when not linked into a combine | ||
23 | executable.) | ||
24 | |||
25 | This file is distributed in the hope that it will be useful, but | ||
26 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
27 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
28 | General Public License for more details. | ||
29 | |||
30 | You should have received a copy of the GNU General Public License | ||
31 | along with this program; see the file COPYING. If not, write to | ||
32 | the Free Software Foundation, 59 Temple Place - Suite 330, | ||
33 | Boston, MA 02111-1307, USA. */ | ||
34 | |||
35 | |||
36 | #include <linux/linkage.h> | ||
37 | #include <asm/assembler.h> | ||
38 | |||
39 | |||
40 | .macro ARM_DIV_BODY dividend, divisor, result, curbit | ||
41 | |||
42 | #if __LINUX_ARM_ARCH__ >= 5 | ||
43 | |||
44 | clz \curbit, \divisor | ||
45 | clz \result, \dividend | ||
46 | sub \result, \curbit, \result | ||
47 | mov \curbit, #1 | ||
48 | mov \divisor, \divisor, lsl \result | ||
49 | mov \curbit, \curbit, lsl \result | ||
50 | mov \result, #0 | ||
51 | |||
52 | #else | ||
53 | |||
54 | @ Initially shift the divisor left 3 bits if possible, | ||
55 | @ set curbit accordingly. This allows for curbit to be located | ||
56 | @ at the left end of each 4 bit nibbles in the division loop | ||
57 | @ to save one loop in most cases. | ||
58 | tst \divisor, #0xe0000000 | ||
59 | moveq \divisor, \divisor, lsl #3 | ||
60 | moveq \curbit, #8 | ||
61 | movne \curbit, #1 | ||
62 | |||
63 | @ Unless the divisor is very big, shift it up in multiples of | ||
64 | @ four bits, since this is the amount of unwinding in the main | ||
65 | @ division loop. Continue shifting until the divisor is | ||
66 | @ larger than the dividend. | ||
67 | 1: cmp \divisor, #0x10000000 | ||
68 | cmplo \divisor, \dividend | ||
69 | movlo \divisor, \divisor, lsl #4 | ||
70 | movlo \curbit, \curbit, lsl #4 | ||
71 | blo 1b | ||
72 | |||
73 | @ For very big divisors, we must shift it a bit at a time, or | ||
74 | @ we will be in danger of overflowing. | ||
75 | 1: cmp \divisor, #0x80000000 | ||
76 | cmplo \divisor, \dividend | ||
77 | movlo \divisor, \divisor, lsl #1 | ||
78 | movlo \curbit, \curbit, lsl #1 | ||
79 | blo 1b | ||
80 | |||
81 | mov \result, #0 | ||
82 | |||
83 | #endif | ||
84 | |||
85 | @ Division loop | ||
86 | 1: cmp \dividend, \divisor | ||
87 | subhs \dividend, \dividend, \divisor | ||
88 | orrhs \result, \result, \curbit | ||
89 | cmp \dividend, \divisor, lsr #1 | ||
90 | subhs \dividend, \dividend, \divisor, lsr #1 | ||
91 | orrhs \result, \result, \curbit, lsr #1 | ||
92 | cmp \dividend, \divisor, lsr #2 | ||
93 | subhs \dividend, \dividend, \divisor, lsr #2 | ||
94 | orrhs \result, \result, \curbit, lsr #2 | ||
95 | cmp \dividend, \divisor, lsr #3 | ||
96 | subhs \dividend, \dividend, \divisor, lsr #3 | ||
97 | orrhs \result, \result, \curbit, lsr #3 | ||
98 | cmp \dividend, #0 @ Early termination? | ||
99 | movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? | ||
100 | movne \divisor, \divisor, lsr #4 | ||
101 | bne 1b | ||
102 | |||
103 | .endm | ||
104 | |||
105 | |||
106 | .macro ARM_DIV2_ORDER divisor, order | ||
107 | |||
108 | #if __LINUX_ARM_ARCH__ >= 5 | ||
109 | |||
110 | clz \order, \divisor | ||
111 | rsb \order, \order, #31 | ||
112 | |||
113 | #else | ||
114 | |||
115 | cmp \divisor, #(1 << 16) | ||
116 | movhs \divisor, \divisor, lsr #16 | ||
117 | movhs \order, #16 | ||
118 | movlo \order, #0 | ||
119 | |||
120 | cmp \divisor, #(1 << 8) | ||
121 | movhs \divisor, \divisor, lsr #8 | ||
122 | addhs \order, \order, #8 | ||
123 | |||
124 | cmp \divisor, #(1 << 4) | ||
125 | movhs \divisor, \divisor, lsr #4 | ||
126 | addhs \order, \order, #4 | ||
127 | |||
128 | cmp \divisor, #(1 << 2) | ||
129 | addhi \order, \order, #3 | ||
130 | addls \order, \order, \divisor, lsr #1 | ||
131 | |||
132 | #endif | ||
133 | |||
134 | .endm | ||
135 | |||
136 | |||
137 | .macro ARM_MOD_BODY dividend, divisor, order, spare | ||
138 | |||
139 | #if __LINUX_ARM_ARCH__ >= 5 | ||
140 | |||
141 | clz \order, \divisor | ||
142 | clz \spare, \dividend | ||
143 | sub \order, \order, \spare | ||
144 | mov \divisor, \divisor, lsl \order | ||
145 | |||
146 | #else | ||
147 | |||
148 | mov \order, #0 | ||
149 | |||
150 | @ Unless the divisor is very big, shift it up in multiples of | ||
151 | @ four bits, since this is the amount of unwinding in the main | ||
152 | @ division loop. Continue shifting until the divisor is | ||
153 | @ larger than the dividend. | ||
154 | 1: cmp \divisor, #0x10000000 | ||
155 | cmplo \divisor, \dividend | ||
156 | movlo \divisor, \divisor, lsl #4 | ||
157 | addlo \order, \order, #4 | ||
158 | blo 1b | ||
159 | |||
160 | @ For very big divisors, we must shift it a bit at a time, or | ||
161 | @ we will be in danger of overflowing. | ||
162 | 1: cmp \divisor, #0x80000000 | ||
163 | cmplo \divisor, \dividend | ||
164 | movlo \divisor, \divisor, lsl #1 | ||
165 | addlo \order, \order, #1 | ||
166 | blo 1b | ||
167 | |||
168 | #endif | ||
169 | |||
170 | @ Perform all needed substractions to keep only the reminder. | ||
171 | @ Do comparisons in batch of 4 first. | ||
172 | subs \order, \order, #3 @ yes, 3 is intended here | ||
173 | blt 2f | ||
174 | |||
175 | 1: cmp \dividend, \divisor | ||
176 | subhs \dividend, \dividend, \divisor | ||
177 | cmp \dividend, \divisor, lsr #1 | ||
178 | subhs \dividend, \dividend, \divisor, lsr #1 | ||
179 | cmp \dividend, \divisor, lsr #2 | ||
180 | subhs \dividend, \dividend, \divisor, lsr #2 | ||
181 | cmp \dividend, \divisor, lsr #3 | ||
182 | subhs \dividend, \dividend, \divisor, lsr #3 | ||
183 | cmp \dividend, #1 | ||
184 | mov \divisor, \divisor, lsr #4 | ||
185 | subges \order, \order, #4 | ||
186 | bge 1b | ||
187 | |||
188 | tst \order, #3 | ||
189 | teqne \dividend, #0 | ||
190 | beq 5f | ||
191 | |||
192 | @ Either 1, 2 or 3 comparison/substractions are left. | ||
193 | 2: cmn \order, #2 | ||
194 | blt 4f | ||
195 | beq 3f | ||
196 | cmp \dividend, \divisor | ||
197 | subhs \dividend, \dividend, \divisor | ||
198 | mov \divisor, \divisor, lsr #1 | ||
199 | 3: cmp \dividend, \divisor | ||
200 | subhs \dividend, \dividend, \divisor | ||
201 | mov \divisor, \divisor, lsr #1 | ||
202 | 4: cmp \dividend, \divisor | ||
203 | subhs \dividend, \dividend, \divisor | ||
204 | 5: | ||
205 | .endm | ||
206 | |||
207 | |||
208 | ENTRY(__udivsi3) | ||
209 | |||
210 | subs r2, r1, #1 | ||
211 | moveq pc, lr | ||
212 | bcc Ldiv0 | ||
213 | cmp r0, r1 | ||
214 | bls 11f | ||
215 | tst r1, r2 | ||
216 | beq 12f | ||
217 | |||
218 | ARM_DIV_BODY r0, r1, r2, r3 | ||
219 | |||
220 | mov r0, r2 | ||
221 | mov pc, lr | ||
222 | |||
223 | 11: moveq r0, #1 | ||
224 | movne r0, #0 | ||
225 | mov pc, lr | ||
226 | |||
227 | 12: ARM_DIV2_ORDER r1, r2 | ||
228 | |||
229 | mov r0, r0, lsr r2 | ||
230 | mov pc, lr | ||
231 | |||
232 | |||
233 | ENTRY(__umodsi3) | ||
234 | |||
235 | subs r2, r1, #1 @ compare divisor with 1 | ||
236 | bcc Ldiv0 | ||
237 | cmpne r0, r1 @ compare dividend with divisor | ||
238 | moveq r0, #0 | ||
239 | tsthi r1, r2 @ see if divisor is power of 2 | ||
240 | andeq r0, r0, r2 | ||
241 | movls pc, lr | ||
242 | |||
243 | ARM_MOD_BODY r0, r1, r2, r3 | ||
244 | |||
245 | mov pc, lr | ||
246 | |||
247 | |||
248 | ENTRY(__divsi3) | ||
249 | |||
250 | cmp r1, #0 | ||
251 | eor ip, r0, r1 @ save the sign of the result. | ||
252 | beq Ldiv0 | ||
253 | rsbmi r1, r1, #0 @ loops below use unsigned. | ||
254 | subs r2, r1, #1 @ division by 1 or -1 ? | ||
255 | beq 10f | ||
256 | movs r3, r0 | ||
257 | rsbmi r3, r0, #0 @ positive dividend value | ||
258 | cmp r3, r1 | ||
259 | bls 11f | ||
260 | tst r1, r2 @ divisor is power of 2 ? | ||
261 | beq 12f | ||
262 | |||
263 | ARM_DIV_BODY r3, r1, r0, r2 | ||
264 | |||
265 | cmp ip, #0 | ||
266 | rsbmi r0, r0, #0 | ||
267 | mov pc, lr | ||
268 | |||
269 | 10: teq ip, r0 @ same sign ? | ||
270 | rsbmi r0, r0, #0 | ||
271 | mov pc, lr | ||
272 | |||
273 | 11: movlo r0, #0 | ||
274 | moveq r0, ip, asr #31 | ||
275 | orreq r0, r0, #1 | ||
276 | mov pc, lr | ||
277 | |||
278 | 12: ARM_DIV2_ORDER r1, r2 | ||
279 | |||
280 | cmp ip, #0 | ||
281 | mov r0, r3, lsr r2 | ||
282 | rsbmi r0, r0, #0 | ||
283 | mov pc, lr | ||
284 | |||
285 | |||
286 | ENTRY(__modsi3) | ||
287 | |||
288 | cmp r1, #0 | ||
289 | beq Ldiv0 | ||
290 | rsbmi r1, r1, #0 @ loops below use unsigned. | ||
291 | movs ip, r0 @ preserve sign of dividend | ||
292 | rsbmi r0, r0, #0 @ if negative make positive | ||
293 | subs r2, r1, #1 @ compare divisor with 1 | ||
294 | cmpne r0, r1 @ compare dividend with divisor | ||
295 | moveq r0, #0 | ||
296 | tsthi r1, r2 @ see if divisor is power of 2 | ||
297 | andeq r0, r0, r2 | ||
298 | bls 10f | ||
299 | |||
300 | ARM_MOD_BODY r0, r1, r2, r3 | ||
301 | |||
302 | 10: cmp ip, #0 | ||
303 | rsbmi r0, r0, #0 | ||
304 | mov pc, lr | ||
305 | |||
306 | |||
307 | Ldiv0: | ||
308 | |||
309 | str lr, [sp, #-4]! | ||
310 | bl __div0 | ||
311 | mov r0, #0 @ About as wrong as it could be. | ||
312 | ldr pc, [sp], #4 | ||
313 | |||
314 | |||