aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/lib/urem.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc/lib/urem.S')
-rw-r--r--arch/sparc/lib/urem.S355
1 files changed, 355 insertions, 0 deletions
diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S
new file mode 100644
index 000000000000..ec7f0c502c56
--- /dev/null
+++ b/arch/sparc/lib/urem.S
@@ -0,0 +1,355 @@
1/* $Id: urem.S,v 1.4 1996/09/30 02:22:42 davem Exp $
2 * urem.S: This routine was taken from glibc-1.09 and is covered
3 * by the GNU Library General Public License Version 2.
4 */
5
6/* This file is generated from divrem.m4; DO NOT EDIT! */
7/*
8 * Division and remainder, from Appendix E of the Sparc Version 8
9 * Architecture Manual, with fixes from Gordon Irlam.
10 */
11
12/*
13 * Input: dividend and divisor in %o0 and %o1 respectively.
14 *
15 * m4 parameters:
16 * .urem name of function to generate
17 * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
18 * false false=true => signed; false=false => unsigned
19 *
20 * Algorithm parameters:
21 * N how many bits per iteration we try to get (4)
22 * WORDSIZE total number of bits (32)
23 *
24 * Derived constants:
25 * TOPBITS number of bits in the top decade of a number
26 *
27 * Important variables:
28 * Q the partial quotient under development (initially 0)
29 * R the remainder so far, initially the dividend
30 * ITER number of main division loop iterations required;
31 * equal to ceil(log2(quotient) / N). Note that this
32 * is the log base (2^N) of the quotient.
33 * V the current comparand, initially divisor*2^(ITER*N-1)
34 *
35 * Cost:
36 * Current estimate for non-large dividend is
37 * ceil(log2(quotient) / N) * (10 + 7N/2) + C
38 * A large dividend is one greater than 2^(31-TOPBITS) and takes a
39 * different path, as the upper bits of the quotient must be developed
40 * one bit at a time.
41 */
42
43 .globl .urem
44.urem:
45
46 ! Ready to divide. Compute size of quotient; scale comparand.
47 orcc %o1, %g0, %o5
48 bne 1f
49 mov %o0, %o3
50
51 ! Divide by zero trap. If it returns, return 0 (about as
52 ! wrong as possible, but that is what SunOS does...).
53 ta ST_DIV0
54 retl
55 clr %o0
56
571:
58 cmp %o3, %o5 ! if %o1 exceeds %o0, done
59 blu Lgot_result ! (and algorithm fails otherwise)
60 clr %o2
61
62 sethi %hi(1 << (32 - 4 - 1)), %g1
63
64 cmp %o3, %g1
65 blu Lnot_really_big
66 clr %o4
67
68 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
69 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
70 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
71 ! Compute ITER in an unorthodox manner: know we need to shift V into
72 ! the top decade: so do not even bother to compare to R.
73 1:
74 cmp %o5, %g1
75 bgeu 3f
76 mov 1, %g7
77
78 sll %o5, 4, %o5
79
80 b 1b
81 add %o4, 1, %o4
82
83 ! Now compute %g7.
84 2:
85 addcc %o5, %o5, %o5
86 bcc Lnot_too_big
87 add %g7, 1, %g7
88
89 ! We get here if the %o1 overflowed while shifting.
90 ! This means that %o3 has the high-order bit set.
91 ! Restore %o5 and subtract from %o3.
92 sll %g1, 4, %g1 ! high order bit
93 srl %o5, 1, %o5 ! rest of %o5
94 add %o5, %g1, %o5
95
96 b Ldo_single_div
97 sub %g7, 1, %g7
98
99 Lnot_too_big:
100 3:
101 cmp %o5, %o3
102 blu 2b
103 nop
104
105 be Ldo_single_div
106 nop
107 /* NB: these are commented out in the V8-Sparc manual as well */
108 /* (I do not understand this) */
109 ! %o5 > %o3: went too far: back up 1 step
110 ! srl %o5, 1, %o5
111 ! dec %g7
112 ! do single-bit divide steps
113 !
114 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
115 ! first divide step without thinking. BUT, the others are conditional,
116 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
117 ! order bit set in the first step, just falling into the regular
118 ! division loop will mess up the first time around.
119 ! So we unroll slightly...
120 Ldo_single_div:
121 subcc %g7, 1, %g7
122 bl Lend_regular_divide
123 nop
124
125 sub %o3, %o5, %o3
126 mov 1, %o2
127
128 b Lend_single_divloop
129 nop
130 Lsingle_divloop:
131 sll %o2, 1, %o2
132 bl 1f
133 srl %o5, 1, %o5
134 ! %o3 >= 0
135 sub %o3, %o5, %o3
136 b 2f
137 add %o2, 1, %o2
138 1: ! %o3 < 0
139 add %o3, %o5, %o3
140 sub %o2, 1, %o2
141 2:
142 Lend_single_divloop:
143 subcc %g7, 1, %g7
144 bge Lsingle_divloop
145 tst %o3
146
147 b,a Lend_regular_divide
148
149Lnot_really_big:
1501:
151 sll %o5, 4, %o5
152
153 cmp %o5, %o3
154 bleu 1b
155 addcc %o4, 1, %o4
156
157 be Lgot_result
158 sub %o4, 1, %o4
159
160 tst %o3 ! set up for initial iteration
161Ldivloop:
162 sll %o2, 4, %o2
163 ! depth 1, accumulated bits 0
164 bl L.1.16
165 srl %o5,1,%o5
166 ! remainder is positive
167 subcc %o3,%o5,%o3
168 ! depth 2, accumulated bits 1
169 bl L.2.17
170 srl %o5,1,%o5
171 ! remainder is positive
172 subcc %o3,%o5,%o3
173 ! depth 3, accumulated bits 3
174 bl L.3.19
175 srl %o5,1,%o5
176 ! remainder is positive
177 subcc %o3,%o5,%o3
178 ! depth 4, accumulated bits 7
179 bl L.4.23
180 srl %o5,1,%o5
181 ! remainder is positive
182 subcc %o3,%o5,%o3
183 b 9f
184 add %o2, (7*2+1), %o2
185
186L.4.23:
187 ! remainder is negative
188 addcc %o3,%o5,%o3
189 b 9f
190 add %o2, (7*2-1), %o2
191
192L.3.19:
193 ! remainder is negative
194 addcc %o3,%o5,%o3
195 ! depth 4, accumulated bits 5
196 bl L.4.21
197 srl %o5,1,%o5
198 ! remainder is positive
199 subcc %o3,%o5,%o3
200 b 9f
201 add %o2, (5*2+1), %o2
202
203L.4.21:
204 ! remainder is negative
205 addcc %o3,%o5,%o3
206 b 9f
207 add %o2, (5*2-1), %o2
208
209L.2.17:
210 ! remainder is negative
211 addcc %o3,%o5,%o3
212 ! depth 3, accumulated bits 1
213 bl L.3.17
214 srl %o5,1,%o5
215 ! remainder is positive
216 subcc %o3,%o5,%o3
217 ! depth 4, accumulated bits 3
218 bl L.4.19
219 srl %o5,1,%o5
220 ! remainder is positive
221 subcc %o3,%o5,%o3
222 b 9f
223 add %o2, (3*2+1), %o2
224
225L.4.19:
226 ! remainder is negative
227 addcc %o3,%o5,%o3
228 b 9f
229 add %o2, (3*2-1), %o2
230
231L.3.17:
232 ! remainder is negative
233 addcc %o3,%o5,%o3
234 ! depth 4, accumulated bits 1
235 bl L.4.17
236 srl %o5,1,%o5
237 ! remainder is positive
238 subcc %o3,%o5,%o3
239 b 9f
240 add %o2, (1*2+1), %o2
241
242L.4.17:
243 ! remainder is negative
244 addcc %o3,%o5,%o3
245 b 9f
246 add %o2, (1*2-1), %o2
247
248L.1.16:
249 ! remainder is negative
250 addcc %o3,%o5,%o3
251 ! depth 2, accumulated bits -1
252 bl L.2.15
253 srl %o5,1,%o5
254 ! remainder is positive
255 subcc %o3,%o5,%o3
256 ! depth 3, accumulated bits -1
257 bl L.3.15
258 srl %o5,1,%o5
259 ! remainder is positive
260 subcc %o3,%o5,%o3
261 ! depth 4, accumulated bits -1
262 bl L.4.15
263 srl %o5,1,%o5
264 ! remainder is positive
265 subcc %o3,%o5,%o3
266 b 9f
267 add %o2, (-1*2+1), %o2
268
269L.4.15:
270 ! remainder is negative
271 addcc %o3,%o5,%o3
272 b 9f
273 add %o2, (-1*2-1), %o2
274
275L.3.15:
276 ! remainder is negative
277 addcc %o3,%o5,%o3
278 ! depth 4, accumulated bits -3
279 bl L.4.13
280 srl %o5,1,%o5
281 ! remainder is positive
282 subcc %o3,%o5,%o3
283 b 9f
284 add %o2, (-3*2+1), %o2
285
286L.4.13:
287 ! remainder is negative
288 addcc %o3,%o5,%o3
289 b 9f
290 add %o2, (-3*2-1), %o2
291
292L.2.15:
293 ! remainder is negative
294 addcc %o3,%o5,%o3
295 ! depth 3, accumulated bits -3
296 bl L.3.13
297 srl %o5,1,%o5
298 ! remainder is positive
299 subcc %o3,%o5,%o3
300 ! depth 4, accumulated bits -5
301 bl L.4.11
302 srl %o5,1,%o5
303 ! remainder is positive
304 subcc %o3,%o5,%o3
305 b 9f
306 add %o2, (-5*2+1), %o2
307
308L.4.11:
309 ! remainder is negative
310 addcc %o3,%o5,%o3
311 b 9f
312 add %o2, (-5*2-1), %o2
313
314L.3.13:
315 ! remainder is negative
316 addcc %o3,%o5,%o3
317 ! depth 4, accumulated bits -7
318 bl L.4.9
319 srl %o5,1,%o5
320 ! remainder is positive
321 subcc %o3,%o5,%o3
322 b 9f
323 add %o2, (-7*2+1), %o2
324
325L.4.9:
326 ! remainder is negative
327 addcc %o3,%o5,%o3
328 b 9f
329 add %o2, (-7*2-1), %o2
330
331 9:
332Lend_regular_divide:
333 subcc %o4, 1, %o4
334 bge Ldivloop
335 tst %o3
336
337 bl,a Lgot_result
338 ! non-restoring fixup here (one instruction only!)
339 add %o3, %o1, %o3
340
341Lgot_result:
342
343 retl
344 mov %o3, %o0
345
346 .globl .urem_patch
347.urem_patch:
348 wr %g0, 0x0, %y
349 nop
350 nop
351 nop
352 udiv %o0, %o1, %o2
353 umul %o2, %o1, %o2
354 retl
355 sub %o0, %o2, %o0