aboutsummaryrefslogtreecommitdiffstats
path: root/arch/blackfin/lib/udivsi3.S
diff options
context:
space:
mode:
authorBryan Wu <bryan.wu@analog.com>2007-05-06 17:50:22 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-07 15:12:58 -0400
commit1394f03221790a988afc3e4b3cb79f2e477246a9 (patch)
tree2c1963c9a4f2d84a5e021307fde240c5d567cf70 /arch/blackfin/lib/udivsi3.S
parent73243284463a761e04d69d22c7516b2be7de096c (diff)
blackfin architecture
This adds support for the Analog Devices Blackfin processor architecture, and currently supports the BF533, BF532, BF531, BF537, BF536, BF534, and BF561 (Dual Core) devices, with a variety of development platforms including those avaliable from Analog Devices (BF533-EZKit, BF533-STAMP, BF537-STAMP, BF561-EZKIT), and Bluetechnix! Tinyboards. The Blackfin architecture was jointly developed by Intel and Analog Devices Inc. (ADI) as the Micro Signal Architecture (MSA) core and introduced it in December of 2000. Since then ADI has put this core into its Blackfin processor family of devices. The Blackfin core has the advantages of a clean, orthogonal,RISC-like microprocessor instruction set. It combines a dual-MAC (Multiply/Accumulate), state-of-the-art signal processing engine and single-instruction, multiple-data (SIMD) multimedia capabilities into a single instruction-set architecture. The Blackfin architecture, including the instruction set, is described by the ADSP-BF53x/BF56x Blackfin Processor Programming Reference http://blackfin.uclinux.org/gf/download/frsrelease/29/2549/Blackfin_PRM.pdf The Blackfin processor is already supported by major releases of gcc, and there are binary and source rpms/tarballs for many architectures at: http://blackfin.uclinux.org/gf/project/toolchain/frs There is complete documentation, including "getting started" guides available at: http://docs.blackfin.uclinux.org/ which provides links to the sources and patches you will need in order to set up a cross-compiling environment for bfin-linux-uclibc This patch, as well as the other patches (toolchain, distribution, uClibc) are actively supported by Analog Devices Inc, at: http://blackfin.uclinux.org/ We have tested this on LTP, and our test plan (including pass/fails) can be found at: http://docs.blackfin.uclinux.org/doku.php?id=testing_the_linux_kernel [m.kozlowski@tuxland.pl: balance parenthesis in blackfin header files] Signed-off-by: Bryan Wu <bryan.wu@analog.com> Signed-off-by: Mariusz Kozlowski <m.kozlowski@tuxland.pl> Signed-off-by: Aubrey Li <aubrey.li@analog.com> Signed-off-by: Jie Zhang <jie.zhang@analog.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/blackfin/lib/udivsi3.S')
-rw-r--r--arch/blackfin/lib/udivsi3.S298
1 files changed, 298 insertions, 0 deletions
diff --git a/arch/blackfin/lib/udivsi3.S b/arch/blackfin/lib/udivsi3.S
new file mode 100644
index 000000000000..d39a12916259
--- /dev/null
+++ b/arch/blackfin/lib/udivsi3.S
@@ -0,0 +1,298 @@
1/*
2 * File: arch/blackfin/lib/udivsi3.S
3 * Based on:
4 * Author:
5 *
6 * Created:
7 * Description:
8 *
9 * Modified:
10 * Copyright 2004-2006 Analog Devices Inc.
11 *
12 * Bugs: Enter bugs at http://blackfin.uclinux.org/
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, see the file COPYING, or write
26 * to the Free Software Foundation, Inc.,
27 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 */
29
30#include <linux/linkage.h>
31
32#define CARRY AC0
33
34#ifdef CONFIG_ARITHMETIC_OPS_L1
35.section .l1.text
36#else
37.text
38#endif
39
40
41ENTRY(___udivsi3)
42
43 CC = R0 < R1 (IU); /* If X < Y, always return 0 */
44 IF CC JUMP .Lreturn_ident;
45
46 R2 = R1 << 16;
47 CC = R2 <= R0 (IU);
48 IF CC JUMP .Lidents;
49
50 R2 = R0 >> 31; /* if X is a 31-bit number */
51 R3 = R1 >> 15; /* and Y is a 15-bit number */
52 R2 = R2 | R3; /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/
53 CC = R2;
54 IF CC JUMP .Ly_16bit;
55
56/* METHOD 1: FAST DIVQ
57 We know we have a 31-bit dividend, and 15-bit divisor so we can use the
58 simple divq approach (first setting AQ to 0 - implying unsigned division,
59 then 16 DIVQ's).
60*/
61
62 AQ = CC; /* Clear AQ (CC==0) */
63
64/* ISR States: When dividing two integers (32.0/16.0) using divide primitives,
65 we need to shift the dividend one bit to the left.
66 We have already checked that we have a 31-bit number so we are safe to do
67 that.
68*/
69 R0 <<= 1;
70 DIVQ(R0, R1); // 1
71 DIVQ(R0, R1); // 2
72 DIVQ(R0, R1); // 3
73 DIVQ(R0, R1); // 4
74 DIVQ(R0, R1); // 5
75 DIVQ(R0, R1); // 6
76 DIVQ(R0, R1); // 7
77 DIVQ(R0, R1); // 8
78 DIVQ(R0, R1); // 9
79 DIVQ(R0, R1); // 10
80 DIVQ(R0, R1); // 11
81 DIVQ(R0, R1); // 12
82 DIVQ(R0, R1); // 13
83 DIVQ(R0, R1); // 14
84 DIVQ(R0, R1); // 15
85 DIVQ(R0, R1); // 16
86 R0 = R0.L (Z);
87 RTS;
88
89.Ly_16bit:
90 /* We know that the upper 17 bits of Y might have bits set,
91 ** or that the sign bit of X might have a bit. If Y is a
92 ** 16-bit number, but not bigger, then we can use the builtins
93 ** with a post-divide correction.
94 ** R3 currently holds Y>>15, which means R3's LSB is the
95 ** bit we're interested in.
96 */
97
98 /* According to the ISR, to use the Divide primitives for
99 ** unsigned integer divide, the useable range is 31 bits
100 */
101 CC = ! BITTST(R0, 31);
102
103 /* IF condition is true we can scale our inputs and use the divide primitives,
104 ** with some post-adjustment
105 */
106 R3 += -1; /* if so, Y is 0x00008nnn */
107 CC &= AZ;
108
109 /* If condition is true we can scale our inputs and use the divide primitives,
110 ** with some post-adjustment
111 */
112 R3 = R1 >> 1; /* Pre-scaled divisor for primitive case */
113 R2 = R0 >> 16;
114
115 R2 = R3 - R2; /* shifted divisor < upper 16 bits of dividend */
116 CC &= CARRY;
117 IF CC JUMP .Lshift_and_correct;
118
119 /* Fall through to the identities */
120
121/* METHOD 2: identities and manual calculation
122 We are not able to use the divide primites, but may still catch some special
123 cases.
124*/
125.Lidents:
126 /* Test for common identities. Value to be returned is placed in R2. */
127 CC = R0 == 0; /* 0/Y => 0 */
128 IF CC JUMP .Lreturn_r0;
129 CC = R0 == R1; /* X==Y => 1 */
130 IF CC JUMP .Lreturn_ident;
131 CC = R1 == 1; /* X/1 => X */
132 IF CC JUMP .Lreturn_ident;
133
134 R2.L = ONES R1;
135 R2 = R2.L (Z);
136 CC = R2 == 1;
137 IF CC JUMP .Lpower_of_two;
138
139 [--SP] = (R7:5); /* Push registers R5-R7 */
140
141 /* Idents don't match. Go for the full operation. */
142
143
144 R6 = 2; /* assume we'll shift two */
145 R3 = 1;
146
147 P2 = R1;
148 /* If either R0 or R1 have sign set, */
149 /* divide them by two, and note it's */
150 /* been done. */
151 CC = R1 < 0;
152 R2 = R1 >> 1;
153 IF CC R1 = R2; /* Possibly-shifted R1 */
154 IF !CC R6 = R3; /* R1 doesn't, so at most 1 shifted */
155
156 P0 = 0;
157 R3 = -R1;
158 [--SP] = R3;
159 R2 = R0 >> 1;
160 R2 = R0 >> 1;
161 CC = R0 < 0;
162 IF CC P0 = R6; /* Number of values divided */
163 IF !CC R2 = R0; /* Shifted R0 */
164
165 /* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */
166
167 /* r2 holds Copy dividend */
168 R3 = 0; /* Clear partial remainder */
169 R7 = 0; /* Initialise quotient bit */
170
171 P1 = 32; /* Set loop counter */
172 LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */
173.Lulst: R6 = R2 >> 31; /* R6 = sign bit of R2, for carry */
174 R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */
175 R3 = R3 << 1 || R5 = [SP];
176 R3 = R3 | R6; /* Include any carry */
177 CC = R7 < 0; /* Check quotient(AQ) */
178 /* If AQ==0, we'll sub divisor */
179 IF CC R5 = R1; /* and if AQ==1, we'll add it. */
180 R3 = R3 + R5; /* Add/sub divsor to partial remainder */
181 R7 = R3 ^ R1; /* Generate next quotient bit */
182
183 R5 = R7 >> 31; /* Get AQ */
184 BITTGL(R5, 0); /* Invert it, to get what we'll shift */
185.Lulend: R2 = R2 + R5; /* and "shift" it in. */
186
187 CC = P0 == 0; /* Check how many inputs we shifted */
188 IF CC JUMP .Lno_mult; /* if none... */
189 R6 = R2 << 1;
190 CC = P0 == 1;
191 IF CC R2 = R6; /* if 1, Q = Q*2 */
192 IF !CC R1 = P2; /* if 2, restore stored divisor */
193
194 R3 = R2; /* Copy of R2 */
195 R3 *= R1; /* Q * divisor */
196 R5 = R0 - R3; /* Z = (dividend - Q * divisor) */
197 CC = R1 <= R5 (IU); /* Check if divisor <= Z? */
198 R6 = CC; /* if yes, R6 = 1 */
199 R2 = R2 + R6; /* if yes, add one to quotient(Q) */
200.Lno_mult:
201 SP += 4;
202 (R7:5) = [SP++]; /* Pop registers R5-R7 */
203 R0 = R2; /* Store quotient */
204 RTS;
205
206.Lreturn_ident:
207 CC = R0 < R1 (IU); /* If X < Y, always return 0 */
208 R2 = 0;
209 IF CC JUMP .Ltrue_return_ident;
210 R2 = -1 (X); /* X/0 => 0xFFFFFFFF */
211 CC = R1 == 0;
212 IF CC JUMP .Ltrue_return_ident;
213 R2 = -R2; /* R2 now 1 */
214 CC = R0 == R1; /* X==Y => 1 */
215 IF CC JUMP .Ltrue_return_ident;
216 R2 = R0; /* X/1 => X */
217 /*FALLTHRU*/
218
219.Ltrue_return_ident:
220 R0 = R2;
221.Lreturn_r0:
222 RTS;
223
224.Lpower_of_two:
225 /* Y has a single bit set, which means it's a power of two.
226 ** That means we can perform the division just by shifting
227 ** X to the right the appropriate number of bits
228 */
229
230 /* signbits returns the number of sign bits, minus one.
231 ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
232 ** to shift right n-signbits spaces. It also means 0x80000000
233 ** is a special case, because that *also* gives a signbits of 0
234 */
235
236 R2 = R0 >> 31;
237 CC = R1 < 0;
238 IF CC JUMP .Ltrue_return_ident;
239
240 R1.l = SIGNBITS R1;
241 R1 = R1.L (Z);
242 R1 += -30;
243 R0 = LSHIFT R0 by R1.L;
244 RTS;
245
246/* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION
247 Two scaling operations are required to use the divide primitives with a
248 divisor > 0x7FFFF.
249 Firstly (as in method 1) we need to shift the dividend 1 to the left for
250 integer division.
251 Secondly we need to shift both the divisor and dividend 1 to the right so
252 both are in range for the primitives.
253 The left/right shift of the dividend does nothing so we can skip it.
254*/
255.Lshift_and_correct:
256 R2 = R0;
257 // R3 is already R1 >> 1
258 CC=!CC;
259 AQ = CC; /* Clear AQ, got here with CC = 0 */
260 DIVQ(R2, R3); // 1
261 DIVQ(R2, R3); // 2
262 DIVQ(R2, R3); // 3
263 DIVQ(R2, R3); // 4
264 DIVQ(R2, R3); // 5
265 DIVQ(R2, R3); // 6
266 DIVQ(R2, R3); // 7
267 DIVQ(R2, R3); // 8
268 DIVQ(R2, R3); // 9
269 DIVQ(R2, R3); // 10
270 DIVQ(R2, R3); // 11
271 DIVQ(R2, R3); // 12
272 DIVQ(R2, R3); // 13
273 DIVQ(R2, R3); // 14
274 DIVQ(R2, R3); // 15
275 DIVQ(R2, R3); // 16
276
277 /* According to the Instruction Set Reference:
278 To divide by a divisor > 0x7FFF,
279 1. prescale and perform divide to obtain quotient (Q) (done above),
280 2. multiply quotient by unscaled divisor (result M)
281 3. subtract the product from the divident to get an error (E = X - M)
282 4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q)
283 */
284 R3 = R2.L (Z); /* Q = X' / Y' */
285 R2 = R3; /* Preserve Q */
286 R2 *= R1; /* M = Q * Y */
287 R2 = R0 - R2; /* E = X - M */
288 R0 = R3; /* Copy Q into result reg */
289
290/* Correction: If result of the multiply is negative, we overflowed
291 and need to correct the result by subtracting 1 from the result.*/
292 R3 = 0xFFFF (Z);
293 R2 = R2 >> 16; /* E >> 16 */
294 CC = R2 == R3;
295 R3 = 1 ;
296 R1 = R0 - R3;
297 IF CC R0 = R1;
298 RTS;