diff options
Diffstat (limited to 'arch/blackfin/lib/udivsi3.S')
-rw-r--r-- | arch/blackfin/lib/udivsi3.S | 298 |
1 files changed, 298 insertions, 0 deletions
diff --git a/arch/blackfin/lib/udivsi3.S b/arch/blackfin/lib/udivsi3.S new file mode 100644 index 00000000000..d39a1291625 --- /dev/null +++ b/arch/blackfin/lib/udivsi3.S | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * File: arch/blackfin/lib/udivsi3.S | ||
3 | * Based on: | ||
4 | * Author: | ||
5 | * | ||
6 | * Created: | ||
7 | * Description: | ||
8 | * | ||
9 | * Modified: | ||
10 | * Copyright 2004-2006 Analog Devices Inc. | ||
11 | * | ||
12 | * Bugs: Enter bugs at http://blackfin.uclinux.org/ | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify | ||
15 | * it under the terms of the GNU General Public License as published by | ||
16 | * the Free Software Foundation; either version 2 of the License, or | ||
17 | * (at your option) any later version. | ||
18 | * | ||
19 | * This program is distributed in the hope that it will be useful, | ||
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
22 | * GNU General Public License for more details. | ||
23 | * | ||
24 | * You should have received a copy of the GNU General Public License | ||
25 | * along with this program; if not, see the file COPYING, or write | ||
26 | * to the Free Software Foundation, Inc., | ||
27 | * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
28 | */ | ||
29 | |||
30 | #include <linux/linkage.h> | ||
31 | |||
32 | #define CARRY AC0 | ||
33 | |||
34 | #ifdef CONFIG_ARITHMETIC_OPS_L1 | ||
35 | .section .l1.text | ||
36 | #else | ||
37 | .text | ||
38 | #endif | ||
39 | |||
40 | |||
41 | ENTRY(___udivsi3) | ||
42 | |||
43 | CC = R0 < R1 (IU); /* If X < Y, always return 0 */ | ||
44 | IF CC JUMP .Lreturn_ident; | ||
45 | |||
46 | R2 = R1 << 16; | ||
47 | CC = R2 <= R0 (IU); | ||
48 | IF CC JUMP .Lidents; | ||
49 | |||
50 | R2 = R0 >> 31; /* if X is a 31-bit number */ | ||
51 | R3 = R1 >> 15; /* and Y is a 15-bit number */ | ||
52 | R2 = R2 | R3; /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/ | ||
53 | CC = R2; | ||
54 | IF CC JUMP .Ly_16bit; | ||
55 | |||
56 | /* METHOD 1: FAST DIVQ | ||
57 | We know we have a 31-bit dividend, and 15-bit divisor so we can use the | ||
58 | simple divq approach (first setting AQ to 0 - implying unsigned division, | ||
59 | then 16 DIVQ's). | ||
60 | */ | ||
61 | |||
62 | AQ = CC; /* Clear AQ (CC==0) */ | ||
63 | |||
64 | /* ISR States: When dividing two integers (32.0/16.0) using divide primitives, | ||
65 | we need to shift the dividend one bit to the left. | ||
66 | We have already checked that we have a 31-bit number so we are safe to do | ||
67 | that. | ||
68 | */ | ||
69 | R0 <<= 1; | ||
70 | DIVQ(R0, R1); // 1 | ||
71 | DIVQ(R0, R1); // 2 | ||
72 | DIVQ(R0, R1); // 3 | ||
73 | DIVQ(R0, R1); // 4 | ||
74 | DIVQ(R0, R1); // 5 | ||
75 | DIVQ(R0, R1); // 6 | ||
76 | DIVQ(R0, R1); // 7 | ||
77 | DIVQ(R0, R1); // 8 | ||
78 | DIVQ(R0, R1); // 9 | ||
79 | DIVQ(R0, R1); // 10 | ||
80 | DIVQ(R0, R1); // 11 | ||
81 | DIVQ(R0, R1); // 12 | ||
82 | DIVQ(R0, R1); // 13 | ||
83 | DIVQ(R0, R1); // 14 | ||
84 | DIVQ(R0, R1); // 15 | ||
85 | DIVQ(R0, R1); // 16 | ||
86 | R0 = R0.L (Z); | ||
87 | RTS; | ||
88 | |||
89 | .Ly_16bit: | ||
90 | /* We know that the upper 17 bits of Y might have bits set, | ||
91 | ** or that the sign bit of X might have a bit. If Y is a | ||
92 | ** 16-bit number, but not bigger, then we can use the builtins | ||
93 | ** with a post-divide correction. | ||
94 | ** R3 currently holds Y>>15, which means R3's LSB is the | ||
95 | ** bit we're interested in. | ||
96 | */ | ||
97 | |||
98 | /* According to the ISR, to use the Divide primitives for | ||
99 | ** unsigned integer divide, the useable range is 31 bits | ||
100 | */ | ||
101 | CC = ! BITTST(R0, 31); | ||
102 | |||
103 | /* IF condition is true we can scale our inputs and use the divide primitives, | ||
104 | ** with some post-adjustment | ||
105 | */ | ||
106 | R3 += -1; /* if so, Y is 0x00008nnn */ | ||
107 | CC &= AZ; | ||
108 | |||
109 | /* If condition is true we can scale our inputs and use the divide primitives, | ||
110 | ** with some post-adjustment | ||
111 | */ | ||
112 | R3 = R1 >> 1; /* Pre-scaled divisor for primitive case */ | ||
113 | R2 = R0 >> 16; | ||
114 | |||
115 | R2 = R3 - R2; /* shifted divisor < upper 16 bits of dividend */ | ||
116 | CC &= CARRY; | ||
117 | IF CC JUMP .Lshift_and_correct; | ||
118 | |||
119 | /* Fall through to the identities */ | ||
120 | |||
121 | /* METHOD 2: identities and manual calculation | ||
122 | We are not able to use the divide primites, but may still catch some special | ||
123 | cases. | ||
124 | */ | ||
125 | .Lidents: | ||
126 | /* Test for common identities. Value to be returned is placed in R2. */ | ||
127 | CC = R0 == 0; /* 0/Y => 0 */ | ||
128 | IF CC JUMP .Lreturn_r0; | ||
129 | CC = R0 == R1; /* X==Y => 1 */ | ||
130 | IF CC JUMP .Lreturn_ident; | ||
131 | CC = R1 == 1; /* X/1 => X */ | ||
132 | IF CC JUMP .Lreturn_ident; | ||
133 | |||
134 | R2.L = ONES R1; | ||
135 | R2 = R2.L (Z); | ||
136 | CC = R2 == 1; | ||
137 | IF CC JUMP .Lpower_of_two; | ||
138 | |||
139 | [--SP] = (R7:5); /* Push registers R5-R7 */ | ||
140 | |||
141 | /* Idents don't match. Go for the full operation. */ | ||
142 | |||
143 | |||
144 | R6 = 2; /* assume we'll shift two */ | ||
145 | R3 = 1; | ||
146 | |||
147 | P2 = R1; | ||
148 | /* If either R0 or R1 have sign set, */ | ||
149 | /* divide them by two, and note it's */ | ||
150 | /* been done. */ | ||
151 | CC = R1 < 0; | ||
152 | R2 = R1 >> 1; | ||
153 | IF CC R1 = R2; /* Possibly-shifted R1 */ | ||
154 | IF !CC R6 = R3; /* R1 doesn't, so at most 1 shifted */ | ||
155 | |||
156 | P0 = 0; | ||
157 | R3 = -R1; | ||
158 | [--SP] = R3; | ||
159 | R2 = R0 >> 1; | ||
160 | R2 = R0 >> 1; | ||
161 | CC = R0 < 0; | ||
162 | IF CC P0 = R6; /* Number of values divided */ | ||
163 | IF !CC R2 = R0; /* Shifted R0 */ | ||
164 | |||
165 | /* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */ | ||
166 | |||
167 | /* r2 holds Copy dividend */ | ||
168 | R3 = 0; /* Clear partial remainder */ | ||
169 | R7 = 0; /* Initialise quotient bit */ | ||
170 | |||
171 | P1 = 32; /* Set loop counter */ | ||
172 | LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */ | ||
173 | .Lulst: R6 = R2 >> 31; /* R6 = sign bit of R2, for carry */ | ||
174 | R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */ | ||
175 | R3 = R3 << 1 || R5 = [SP]; | ||
176 | R3 = R3 | R6; /* Include any carry */ | ||
177 | CC = R7 < 0; /* Check quotient(AQ) */ | ||
178 | /* If AQ==0, we'll sub divisor */ | ||
179 | IF CC R5 = R1; /* and if AQ==1, we'll add it. */ | ||
180 | R3 = R3 + R5; /* Add/sub divsor to partial remainder */ | ||
181 | R7 = R3 ^ R1; /* Generate next quotient bit */ | ||
182 | |||
183 | R5 = R7 >> 31; /* Get AQ */ | ||
184 | BITTGL(R5, 0); /* Invert it, to get what we'll shift */ | ||
185 | .Lulend: R2 = R2 + R5; /* and "shift" it in. */ | ||
186 | |||
187 | CC = P0 == 0; /* Check how many inputs we shifted */ | ||
188 | IF CC JUMP .Lno_mult; /* if none... */ | ||
189 | R6 = R2 << 1; | ||
190 | CC = P0 == 1; | ||
191 | IF CC R2 = R6; /* if 1, Q = Q*2 */ | ||
192 | IF !CC R1 = P2; /* if 2, restore stored divisor */ | ||
193 | |||
194 | R3 = R2; /* Copy of R2 */ | ||
195 | R3 *= R1; /* Q * divisor */ | ||
196 | R5 = R0 - R3; /* Z = (dividend - Q * divisor) */ | ||
197 | CC = R1 <= R5 (IU); /* Check if divisor <= Z? */ | ||
198 | R6 = CC; /* if yes, R6 = 1 */ | ||
199 | R2 = R2 + R6; /* if yes, add one to quotient(Q) */ | ||
200 | .Lno_mult: | ||
201 | SP += 4; | ||
202 | (R7:5) = [SP++]; /* Pop registers R5-R7 */ | ||
203 | R0 = R2; /* Store quotient */ | ||
204 | RTS; | ||
205 | |||
206 | .Lreturn_ident: | ||
207 | CC = R0 < R1 (IU); /* If X < Y, always return 0 */ | ||
208 | R2 = 0; | ||
209 | IF CC JUMP .Ltrue_return_ident; | ||
210 | R2 = -1 (X); /* X/0 => 0xFFFFFFFF */ | ||
211 | CC = R1 == 0; | ||
212 | IF CC JUMP .Ltrue_return_ident; | ||
213 | R2 = -R2; /* R2 now 1 */ | ||
214 | CC = R0 == R1; /* X==Y => 1 */ | ||
215 | IF CC JUMP .Ltrue_return_ident; | ||
216 | R2 = R0; /* X/1 => X */ | ||
217 | /*FALLTHRU*/ | ||
218 | |||
219 | .Ltrue_return_ident: | ||
220 | R0 = R2; | ||
221 | .Lreturn_r0: | ||
222 | RTS; | ||
223 | |||
224 | .Lpower_of_two: | ||
225 | /* Y has a single bit set, which means it's a power of two. | ||
226 | ** That means we can perform the division just by shifting | ||
227 | ** X to the right the appropriate number of bits | ||
228 | */ | ||
229 | |||
230 | /* signbits returns the number of sign bits, minus one. | ||
231 | ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need | ||
232 | ** to shift right n-signbits spaces. It also means 0x80000000 | ||
233 | ** is a special case, because that *also* gives a signbits of 0 | ||
234 | */ | ||
235 | |||
236 | R2 = R0 >> 31; | ||
237 | CC = R1 < 0; | ||
238 | IF CC JUMP .Ltrue_return_ident; | ||
239 | |||
240 | R1.l = SIGNBITS R1; | ||
241 | R1 = R1.L (Z); | ||
242 | R1 += -30; | ||
243 | R0 = LSHIFT R0 by R1.L; | ||
244 | RTS; | ||
245 | |||
246 | /* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION | ||
247 | Two scaling operations are required to use the divide primitives with a | ||
248 | divisor > 0x7FFFF. | ||
249 | Firstly (as in method 1) we need to shift the dividend 1 to the left for | ||
250 | integer division. | ||
251 | Secondly we need to shift both the divisor and dividend 1 to the right so | ||
252 | both are in range for the primitives. | ||
253 | The left/right shift of the dividend does nothing so we can skip it. | ||
254 | */ | ||
255 | .Lshift_and_correct: | ||
256 | R2 = R0; | ||
257 | // R3 is already R1 >> 1 | ||
258 | CC=!CC; | ||
259 | AQ = CC; /* Clear AQ, got here with CC = 0 */ | ||
260 | DIVQ(R2, R3); // 1 | ||
261 | DIVQ(R2, R3); // 2 | ||
262 | DIVQ(R2, R3); // 3 | ||
263 | DIVQ(R2, R3); // 4 | ||
264 | DIVQ(R2, R3); // 5 | ||
265 | DIVQ(R2, R3); // 6 | ||
266 | DIVQ(R2, R3); // 7 | ||
267 | DIVQ(R2, R3); // 8 | ||
268 | DIVQ(R2, R3); // 9 | ||
269 | DIVQ(R2, R3); // 10 | ||
270 | DIVQ(R2, R3); // 11 | ||
271 | DIVQ(R2, R3); // 12 | ||
272 | DIVQ(R2, R3); // 13 | ||
273 | DIVQ(R2, R3); // 14 | ||
274 | DIVQ(R2, R3); // 15 | ||
275 | DIVQ(R2, R3); // 16 | ||
276 | |||
277 | /* According to the Instruction Set Reference: | ||
278 | To divide by a divisor > 0x7FFF, | ||
279 | 1. prescale and perform divide to obtain quotient (Q) (done above), | ||
280 | 2. multiply quotient by unscaled divisor (result M) | ||
281 | 3. subtract the product from the divident to get an error (E = X - M) | ||
282 | 4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q) | ||
283 | */ | ||
284 | R3 = R2.L (Z); /* Q = X' / Y' */ | ||
285 | R2 = R3; /* Preserve Q */ | ||
286 | R2 *= R1; /* M = Q * Y */ | ||
287 | R2 = R0 - R2; /* E = X - M */ | ||
288 | R0 = R3; /* Copy Q into result reg */ | ||
289 | |||
290 | /* Correction: If result of the multiply is negative, we overflowed | ||
291 | and need to correct the result by subtracting 1 from the result.*/ | ||
292 | R3 = 0xFFFF (Z); | ||
293 | R2 = R2 >> 16; /* E >> 16 */ | ||
294 | CC = R2 == R3; | ||
295 | R3 = 1 ; | ||
296 | R1 = R0 - R3; | ||
297 | IF CC R0 = R1; | ||
298 | RTS; | ||