aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMike Frysinger <michael.frysinger@analog.com>2007-10-21 10:57:36 -0400
committerBryan Wu <bryan.wu@analog.com>2007-10-21 10:57:36 -0400
commitb0a68dc07ec395d44849ce98eb417713ca333410 (patch)
tree5e469225188d63fd296fc04d0c04e9580d8e47db /arch
parent1c668d82465cd5c17030c0f69561841374380ac8 (diff)
Blackfin arch: add assembly function for doing 64bit unsigned division
Signed-off-by: Mike Frysinger <michael.frysinger@analog.com> Signed-off-by: Bryan Wu <bryan.wu@analog.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/blackfin/lib/Makefile2
-rw-r--r--arch/blackfin/lib/udivdi3.S375
2 files changed, 376 insertions, 1 deletions
diff --git a/arch/blackfin/lib/Makefile b/arch/blackfin/lib/Makefile
index 635288fc5f54..bfdad52c570b 100644
--- a/arch/blackfin/lib/Makefile
+++ b/arch/blackfin/lib/Makefile
@@ -4,7 +4,7 @@
4 4
5lib-y := \ 5lib-y := \
6 ashldi3.o ashrdi3.o lshrdi3.o \ 6 ashldi3.o ashrdi3.o lshrdi3.o \
7 muldi3.o divsi3.o udivsi3.o modsi3.o umodsi3.o \ 7 muldi3.o divsi3.o udivsi3.o udivdi3.o modsi3.o umodsi3.o \
8 checksum.o memcpy.o memset.o memcmp.o memchr.o memmove.o \ 8 checksum.o memcpy.o memset.o memcmp.o memchr.o memmove.o \
9 strcmp.o strcpy.o strncmp.o strncpy.o \ 9 strcmp.o strcpy.o strncmp.o strncpy.o \
10 umulsi3_highpart.o smulsi3_highpart.o \ 10 umulsi3_highpart.o smulsi3_highpart.o \
diff --git a/arch/blackfin/lib/udivdi3.S b/arch/blackfin/lib/udivdi3.S
new file mode 100644
index 000000000000..ad1ebee675e1
--- /dev/null
+++ b/arch/blackfin/lib/udivdi3.S
@@ -0,0 +1,375 @@
1/*
2 * udivdi3.S - unsigned long long division
3 *
4 * Copyright 2003-2007 Analog Devices Inc.
5 * Enter bugs at http://blackfin.uclinux.org/
6 *
7 * Licensed under the GPLv2 or later.
8 */
9
10#include <linux/linkage.h>
11
12#define CARRY AC0
13
14#ifdef CONFIG_ARITHMETIC_OPS_L1
15.section .l1.text
16#else
17.text
18#endif
19
20
21ENTRY(___udivdi3)
22 R3 = [SP + 12];
23 [--SP] = (R7:4, P5:3);
24
25 /* Attempt to use divide primitive first; these will handle
26 ** most cases, and they're quick - avoids stalls incurred by
27 ** testing for identities.
28 */
29
30 R4 = R2 | R3;
31 CC = R4 == 0;
32 IF CC JUMP .LDIV_BY_ZERO;
33
34 R4.H = 0x8000;
35 R4 >>>= 16; // R4 now 0xFFFF8000
36 R5 = R0 | R2; // If either dividend or
37 R4 = R5 & R4; // divisor have bits in
38 CC = R4; // top half or low half's sign
39 IF CC JUMP .LIDENTS; // bit, skip builtins.
40 R4 = R1 | R3; // Also check top halves
41 CC = R4;
42 IF CC JUMP .LIDENTS;
43
44 /* Can use the builtins. */
45
46 AQ = CC; // Clear AQ (CC==0)
47 DIVQ(R0, R2);
48 DIVQ(R0, R2);
49 DIVQ(R0, R2);
50 DIVQ(R0, R2);
51 DIVQ(R0, R2);
52 DIVQ(R0, R2);
53 DIVQ(R0, R2);
54 DIVQ(R0, R2);
55 DIVQ(R0, R2);
56 DIVQ(R0, R2);
57 DIVQ(R0, R2);
58 DIVQ(R0, R2);
59 DIVQ(R0, R2);
60 DIVQ(R0, R2);
61 DIVQ(R0, R2);
62 DIVQ(R0, R2);
63 DIVQ(R0, R2);
64 R0 = R0.L (Z);
65 R1 = 0;
66 (R7:4, P5:3) = [SP++];
67 RTS;
68
69.LIDENTS:
70 /* Test for common identities. Value to be returned is
71 ** placed in R6,R7.
72 */
73 // Check for 0/y, return 0
74 R4 = R0 | R1;
75 CC = R4 == 0;
76 IF CC JUMP .LRETURN_R0;
77
78 // Check for x/x, return 1
79 R6 = R0 - R2; // If x == y, then both R6 and R7 will be zero
80 R7 = R1 - R3;
81 R4 = R6 | R7; // making R4 zero.
82 R6 += 1; // which would now make R6:R7==1.
83 CC = R4 == 0;
84 IF CC JUMP .LRETURN_IDENT;
85
86 // Check for x/1, return x
87 R6 = R0;
88 R7 = R1;
89 CC = R3 == 0;
90 IF !CC JUMP .Lnexttest;
91 CC = R2 == 1;
92 IF CC JUMP .LRETURN_IDENT;
93
94.Lnexttest:
95 R4.L = ONES R2; // check for div by power of two which
96 R5.L = ONES R3; // can be done using a shift
97 R6 = PACK (R5.L, R4.L);
98 CC = R6 == 1;
99 IF CC JUMP .Lpower_of_two_upper_zero;
100 R6 = PACK (R4.L, R5.L);
101 CC = R6 == 1;
102 IF CC JUMP .Lpower_of_two_lower_zero;
103
104 // Check for x < y, return 0
105 R6 = 0;
106 R7 = R6;
107 CC = R1 < R3 (IU);
108 IF CC JUMP .LRETURN_IDENT;
109 CC = R1 == R3;
110 IF !CC JUMP .Lno_idents;
111 CC = R0 < R2 (IU);
112 IF CC JUMP .LRETURN_IDENT;
113
114.Lno_idents: // Idents don't match. Go for the full operation
115
116
117 // If X, or X and Y have high bit set, it'll affect the
118 // results, so shift right one to stop this. Note: we've already
119 // checked that X >= Y, so Y's msb won't be set unless X's
120 // is.
121
122 R4 = 0;
123 CC = R1 < 0;
124 IF !CC JUMP .Lx_msb_clear;
125 CC = !CC; // 1 -> 0;
126 R1 = ROT R1 BY -1; // Shift X >> 1
127 R0 = ROT R0 BY -1; // lsb -> CC
128 BITSET(R4,31); // to record only x msb was set
129 CC = R3 < 0;
130 IF !CC JUMP .Ly_msb_clear;
131 CC = !CC;
132 R3 = ROT R3 BY -1; // Shift Y >> 1
133 R2 = ROT R2 BY -1;
134 BITCLR(R4,31); // clear bit to record only x msb was set
135
136.Ly_msb_clear:
137.Lx_msb_clear:
138 // Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits
139 // were lost, so we should shift result left by one.
140
141 [--SP] = R4; // save for later
142
143 // In the loop that follows, each iteration we add
144 // either Y' or -Y' to the Remainder. We compute the
145 // negated Y', and store, for convenience. Y' goes
146 // into P0:P1, while -Y' goes into P2:P3.
147
148 P0 = R2;
149 P1 = R3;
150 R2 = -R2;
151 CC = CARRY;
152 CC = !CC;
153 R4 = CC;
154 R3 = -R3;
155 R3 = R3 - R4;
156
157 R6 = 0; // remainder = 0
158 R7 = R6;
159
160 [--SP] = R2; P2 = SP;
161 [--SP] = R3; P3 = SP;
162 [--SP] = R6; P5 = SP; // AQ = 0
163 [--SP] = P1;
164
165 /* In the loop that follows, we use the following
166 ** register assignments:
167 ** R0,R1 X, workspace
168 ** R2,R3 Y, workspace
169 ** R4,R5 partial Div
170 ** R6,R7 partial remainder
171 ** P5 AQ
172 ** The remainder and div form a 128-bit number, with
173 ** the remainder in the high 64-bits.
174 */
175 R4 = R0; // Div = X'
176 R5 = R1;
177 R3 = 0;
178
179 P4 = 64; // Iterate once per bit
180 LSETUP(.LULST,.LULEND) LC0 = P4;
181.LULST:
182 /* Shift Div and remainder up by one. The bit shifted
183 ** out of the top of the quotient is shifted into the bottom
184 ** of the remainder.
185 */
186 CC = R3;
187 R4 = ROT R4 BY 1;
188 R5 = ROT R5 BY 1 || // low q to high q
189 R2 = [P5]; // load saved AQ
190 R6 = ROT R6 BY 1 || // high q to low r
191 R0 = [P2]; // load -Y'
192 R7 = ROT R7 BY 1 || // low r to high r
193 R1 = [P3];
194
195 // Assume add -Y'
196 CC = R2 < 0; // But if AQ is set...
197 IF CC R0 = P0; // then add Y' instead
198 IF CC R1 = P1;
199
200 R6 = R6 + R0; // Rem += (Y' or -Y')
201 CC = CARRY;
202 R0 = CC;
203 R7 = R7 + R1;
204 R7 = R7 + R0 (NS) ||
205 R1 = [SP];
206 // Set the next AQ bit
207 R1 = R7 ^ R1; // from Remainder and Y'
208 R1 = R1 >> 31 || // Negate AQ's value, and
209 [P5] = R1; // save next AQ
210 BITTGL(R1, 0); // add neg AQ to the Div
211.LULEND: R4 = R4 + R1;
212
213 R6 = [SP + 16];
214
215 R0 = R4;
216 R1 = R5;
217 CC = BITTST(R6,30); // Just set CC=0
218 R4 = ROT R0 BY 1; // but if we had to shift X,
219 R5 = ROT R1 BY 1; // and didn't shift any bits out,
220 CC = BITTST(R6,31); // then the result will be half as
221 IF CC R0 = R4; // much as required, so shift left
222 IF CC R1 = R5; // one space.
223
224 SP += 20;
225 (R7:4, P5:3) = [SP++];
226 RTS;
227
228.Lpower_of_two:
229 /* Y has a single bit set, which means it's a power of two.
230 ** That means we can perform the division just by shifting
231 ** X to the right the appropriate number of bits
232 */
233
234 /* signbits returns the number of sign bits, minus one.
235 ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
236 ** to shift right n-signbits spaces. It also means 0x80000000
237 ** is a special case, because that *also* gives a signbits of 0
238 */
239.Lpower_of_two_lower_zero:
240 R7 = 0;
241 R6 = R1 >> 31;
242 CC = R3 < 0;
243 IF CC JUMP .LRETURN_IDENT;
244
245 R2.L = SIGNBITS R3;
246 R2 = R2.L (Z);
247 R2 += -62;
248 (R7:4, P5:3) = [SP++];
249 JUMP ___lshftli;
250
251.Lpower_of_two_upper_zero:
252 CC = R2 < 0;
253 IF CC JUMP .Lmaxint_shift;
254
255 R2.L = SIGNBITS R2;
256 R2 = R2.L (Z);
257 R2 += -30;
258 (R7:4, P5:3) = [SP++];
259 JUMP ___lshftli;
260
261.Lmaxint_shift:
262 R2 = -31;
263 (R7:4, P5:3) = [SP++];
264 JUMP ___lshftli;
265
266.LRETURN_IDENT:
267 R0 = R6;
268 R1 = R7;
269.LRETURN_R0:
270 (R7:4, P5:3) = [SP++];
271 RTS;
272.LDIV_BY_ZERO:
273 R0 = ~R2;
274 R1 = R0;
275 (R7:4, P5:3) = [SP++];
276 RTS;
277
278ENDPROC(___udivdi3)
279
280
281ENTRY(___lshftli)
282 CC = R2 == 0;
283 IF CC JUMP .Lfinished; // nothing to do
284 CC = R2 < 0;
285 IF CC JUMP .Lrshift;
286 R3 = 64;
287 CC = R2 < R3;
288 IF !CC JUMP .Lretzero;
289
290 // We're shifting left, and it's less than 64 bits, so
291 // a valid result will be returned.
292
293 R3 >>= 1; // R3 now 32
294 CC = R2 < R3;
295
296 IF !CC JUMP .Lzerohalf;
297
298 // We're shifting left, between 1 and 31 bits, which means
299 // some of the low half will be shifted into the high half.
300 // Work out how much.
301
302 R3 = R3 - R2;
303
304 // Save that much data from the bottom half.
305
306 P1 = R7;
307 R7 = R0;
308 R7 >>= R3;
309
310 // Adjust both parts of the parameter.
311
312 R0 <<= R2;
313 R1 <<= R2;
314
315 // And include the bits moved across.
316
317 R1 = R1 | R7;
318 R7 = P1;
319 RTS;
320
321.Lzerohalf:
322 // We're shifting left, between 32 and 63 bits, so the
323 // bottom half will become zero, and the top half will
324 // lose some bits. How many?
325
326 R2 = R2 - R3; // N - 32
327 R1 = LSHIFT R0 BY R2.L;
328 R0 = R0 - R0;
329 RTS;
330
331.Lretzero:
332 R0 = R0 - R0;
333 R1 = R0;
334.Lfinished:
335 RTS;
336
337.Lrshift:
338 // We're shifting right, but by how much?
339 R2 = -R2;
340 R3 = 64;
341 CC = R2 < R3;
342 IF !CC JUMP .Lretzero;
343
344 // Shifting right less than 64 bits, so some result bits will
345 // be retained.
346
347 R3 >>= 1; // R3 now 32
348 CC = R2 < R3;
349 IF !CC JUMP .Lsignhalf;
350
351 // Shifting right between 1 and 31 bits, so need to copy
352 // data across words.
353
354 P1 = R7;
355 R3 = R3 - R2;
356 R7 = R1;
357 R7 <<= R3;
358 R1 >>= R2;
359 R0 >>= R2;
360 R0 = R7 | R0;
361 R7 = P1;
362 RTS;
363
364.Lsignhalf:
365 // Shifting right between 32 and 63 bits, so the top half
366 // will become all zero-bits, and the bottom half is some
367 // of the top half. But how much?
368
369 R2 = R2 - R3;
370 R0 = R1;
371 R0 >>= R2;
372 R1 = 0;
373 RTS;
374
375ENDPROC(___lshftli)