aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/lib
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-22 10:38:37 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-22 10:38:37 -0500
commitfcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch)
treea57612d1888735a2ec7972891b68c1ac5ec8faea /arch/sparc/lib
parent8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff)
Added missing tegra files.HEADmaster
Diffstat (limited to 'arch/sparc/lib')
-rw-r--r--arch/sparc/lib/atomic_32.S99
-rw-r--r--arch/sparc/lib/mul.S137
-rw-r--r--arch/sparc/lib/rem.S384
-rw-r--r--arch/sparc/lib/sdiv.S381
-rw-r--r--arch/sparc/lib/strlen_user_32.S109
-rw-r--r--arch/sparc/lib/strlen_user_64.S95
-rw-r--r--arch/sparc/lib/strncpy_from_user_32.S47
-rw-r--r--arch/sparc/lib/strncpy_from_user_64.S135
-rw-r--r--arch/sparc/lib/udiv.S357
-rw-r--r--arch/sparc/lib/umul.S171
-rw-r--r--arch/sparc/lib/urem.S357
11 files changed, 2272 insertions, 0 deletions
diff --git a/arch/sparc/lib/atomic_32.S b/arch/sparc/lib/atomic_32.S
new file mode 100644
index 00000000000..178cbb8ae1b
--- /dev/null
+++ b/arch/sparc/lib/atomic_32.S
@@ -0,0 +1,99 @@
1/* atomic.S: Move this stuff here for better ICACHE hit rates.
2 *
3 * Copyright (C) 1996 David S. Miller (davem@caipfs.rutgers.edu)
4 */
5
6#include <asm/ptrace.h>
7#include <asm/psr.h>
8
9 .text
10 .align 4
11
12 .globl __atomic_begin
13__atomic_begin:
14
15#ifndef CONFIG_SMP
16 .globl ___xchg32_sun4c
17___xchg32_sun4c:
18 rd %psr, %g3
19 andcc %g3, PSR_PIL, %g0
20 bne 1f
21 nop
22 wr %g3, PSR_PIL, %psr
23 nop; nop; nop
241:
25 andcc %g3, PSR_PIL, %g0
26 ld [%g1], %g7
27 bne 1f
28 st %g2, [%g1]
29 wr %g3, 0x0, %psr
30 nop; nop; nop
311:
32 mov %g7, %g2
33 jmpl %o7 + 8, %g0
34 mov %g4, %o7
35
36 .globl ___xchg32_sun4md
37___xchg32_sun4md:
38 swap [%g1], %g2
39 jmpl %o7 + 8, %g0
40 mov %g4, %o7
41#endif
42
43 /* Read asm-sparc/atomic.h carefully to understand how this works for SMP.
44 * Really, some things here for SMP are overly clever, go read the header.
45 */
46 .globl ___atomic24_add
47___atomic24_add:
48 rd %psr, %g3 ! Keep the code small, old way was stupid
49 nop; nop; nop; ! Let the bits set
50 or %g3, PSR_PIL, %g7 ! Disable interrupts
51 wr %g7, 0x0, %psr ! Set %psr
52 nop; nop; nop; ! Let the bits set
53#ifdef CONFIG_SMP
541: ldstub [%g1 + 3], %g7 ! Spin on the byte lock for SMP.
55 orcc %g7, 0x0, %g0 ! Did we get it?
56 bne 1b ! Nope...
57 ld [%g1], %g7 ! Load locked atomic24_t
58 sra %g7, 8, %g7 ! Get signed 24-bit integer
59 add %g7, %g2, %g2 ! Add in argument
60 sll %g2, 8, %g7 ! Transpose back to atomic24_t
61 st %g7, [%g1] ! Clever: This releases the lock as well.
62#else
63 ld [%g1], %g7 ! Load locked atomic24_t
64 add %g7, %g2, %g2 ! Add in argument
65 st %g2, [%g1] ! Store it back
66#endif
67 wr %g3, 0x0, %psr ! Restore original PSR_PIL
68 nop; nop; nop; ! Let the bits set
69 jmpl %o7, %g0 ! NOTE: not + 8, see callers in atomic.h
70 mov %g4, %o7 ! Restore %o7
71
72 .globl ___atomic24_sub
73___atomic24_sub:
74 rd %psr, %g3 ! Keep the code small, old way was stupid
75 nop; nop; nop; ! Let the bits set
76 or %g3, PSR_PIL, %g7 ! Disable interrupts
77 wr %g7, 0x0, %psr ! Set %psr
78 nop; nop; nop; ! Let the bits set
79#ifdef CONFIG_SMP
801: ldstub [%g1 + 3], %g7 ! Spin on the byte lock for SMP.
81 orcc %g7, 0x0, %g0 ! Did we get it?
82 bne 1b ! Nope...
83 ld [%g1], %g7 ! Load locked atomic24_t
84 sra %g7, 8, %g7 ! Get signed 24-bit integer
85 sub %g7, %g2, %g2 ! Subtract argument
86 sll %g2, 8, %g7 ! Transpose back to atomic24_t
87 st %g7, [%g1] ! Clever: This releases the lock as well
88#else
89 ld [%g1], %g7 ! Load locked atomic24_t
90 sub %g7, %g2, %g2 ! Subtract argument
91 st %g2, [%g1] ! Store it back
92#endif
93 wr %g3, 0x0, %psr ! Restore original PSR_PIL
94 nop; nop; nop; ! Let the bits set
95 jmpl %o7, %g0 ! NOTE: not + 8, see callers in atomic.h
96 mov %g4, %o7 ! Restore %o7
97
98 .globl __atomic_end
99__atomic_end:
diff --git a/arch/sparc/lib/mul.S b/arch/sparc/lib/mul.S
new file mode 100644
index 00000000000..c45470d0b0c
--- /dev/null
+++ b/arch/sparc/lib/mul.S
@@ -0,0 +1,137 @@
1/*
2 * mul.S: This routine was taken from glibc-1.09 and is covered
3 * by the GNU Library General Public License Version 2.
4 */
5
6/*
7 * Signed multiply, from Appendix E of the Sparc Version 8
8 * Architecture Manual.
9 */
10
11/*
12 * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
13 * the 64-bit product).
14 *
15 * This code optimizes short (less than 13-bit) multiplies.
16 */
17
18 .globl .mul
19 .globl _Mul
20.mul:
21_Mul: /* needed for export */
22 mov %o0, %y ! multiplier -> Y
23 andncc %o0, 0xfff, %g0 ! test bits 12..31
24 be Lmul_shortway ! if zero, can do it the short way
25 andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
26
27 /*
28 * Long multiply. 32 steps, followed by a final shift step.
29 */
30 mulscc %o4, %o1, %o4 ! 1
31 mulscc %o4, %o1, %o4 ! 2
32 mulscc %o4, %o1, %o4 ! 3
33 mulscc %o4, %o1, %o4 ! 4
34 mulscc %o4, %o1, %o4 ! 5
35 mulscc %o4, %o1, %o4 ! 6
36 mulscc %o4, %o1, %o4 ! 7
37 mulscc %o4, %o1, %o4 ! 8
38 mulscc %o4, %o1, %o4 ! 9
39 mulscc %o4, %o1, %o4 ! 10
40 mulscc %o4, %o1, %o4 ! 11
41 mulscc %o4, %o1, %o4 ! 12
42 mulscc %o4, %o1, %o4 ! 13
43 mulscc %o4, %o1, %o4 ! 14
44 mulscc %o4, %o1, %o4 ! 15
45 mulscc %o4, %o1, %o4 ! 16
46 mulscc %o4, %o1, %o4 ! 17
47 mulscc %o4, %o1, %o4 ! 18
48 mulscc %o4, %o1, %o4 ! 19
49 mulscc %o4, %o1, %o4 ! 20
50 mulscc %o4, %o1, %o4 ! 21
51 mulscc %o4, %o1, %o4 ! 22
52 mulscc %o4, %o1, %o4 ! 23
53 mulscc %o4, %o1, %o4 ! 24
54 mulscc %o4, %o1, %o4 ! 25
55 mulscc %o4, %o1, %o4 ! 26
56 mulscc %o4, %o1, %o4 ! 27
57 mulscc %o4, %o1, %o4 ! 28
58 mulscc %o4, %o1, %o4 ! 29
59 mulscc %o4, %o1, %o4 ! 30
60 mulscc %o4, %o1, %o4 ! 31
61 mulscc %o4, %o1, %o4 ! 32
62 mulscc %o4, %g0, %o4 ! final shift
63
64 ! If %o0 was negative, the result is
65 ! (%o0 * %o1) + (%o1 << 32))
66 ! We fix that here.
67
68#if 0
69 tst %o0
70 bge 1f
71 rd %y, %o0
72
73 ! %o0 was indeed negative; fix upper 32 bits of result by subtracting
74 ! %o1 (i.e., return %o4 - %o1 in %o1).
75 retl
76 sub %o4, %o1, %o1
77
781:
79 retl
80 mov %o4, %o1
81#else
82 /* Faster code adapted from tege@sics.se's code for umul.S. */
83 sra %o0, 31, %o2 ! make mask from sign bit
84 and %o1, %o2, %o2 ! %o2 = 0 or %o1, depending on sign of %o0
85 rd %y, %o0 ! get lower half of product
86 retl
87 sub %o4, %o2, %o1 ! subtract compensation
88 ! and put upper half in place
89#endif
90
91Lmul_shortway:
92 /*
93 * Short multiply. 12 steps, followed by a final shift step.
94 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
95 * but there is no problem with %o0 being negative (unlike above).
96 */
97 mulscc %o4, %o1, %o4 ! 1
98 mulscc %o4, %o1, %o4 ! 2
99 mulscc %o4, %o1, %o4 ! 3
100 mulscc %o4, %o1, %o4 ! 4
101 mulscc %o4, %o1, %o4 ! 5
102 mulscc %o4, %o1, %o4 ! 6
103 mulscc %o4, %o1, %o4 ! 7
104 mulscc %o4, %o1, %o4 ! 8
105 mulscc %o4, %o1, %o4 ! 9
106 mulscc %o4, %o1, %o4 ! 10
107 mulscc %o4, %o1, %o4 ! 11
108 mulscc %o4, %o1, %o4 ! 12
109 mulscc %o4, %g0, %o4 ! final shift
110
111 /*
112 * %o4 has 20 of the bits that should be in the low part of the
113 * result; %y has the bottom 12 (as %y's top 12). That is:
114 *
115 * %o4 %y
116 * +----------------+----------------+
117 * | -12- | -20- | -12- | -20- |
118 * +------(---------+------)---------+
119 * --hi-- ----low-part----
120 *
121 * The upper 12 bits of %o4 should be sign-extended to form the
122 * high part of the product (i.e., highpart = %o4 >> 20).
123 */
124
125 rd %y, %o5
126 sll %o4, 12, %o0 ! shift middle bits left 12
127 srl %o5, 20, %o5 ! shift low bits right 20, zero fill at left
128 or %o5, %o0, %o0 ! construct low part of result
129 retl
130 sra %o4, 20, %o1 ! ... and extract high part of result
131
132 .globl .mul_patch
133.mul_patch:
134 smul %o0, %o1, %o0
135 retl
136 rd %y, %o1
137 nop
diff --git a/arch/sparc/lib/rem.S b/arch/sparc/lib/rem.S
new file mode 100644
index 00000000000..42fb8625281
--- /dev/null
+++ b/arch/sparc/lib/rem.S
@@ -0,0 +1,384 @@
1/*
2 * rem.S: This routine was taken from glibc-1.09 and is covered
3 * by the GNU Library General Public License Version 2.
4 */
5
6
7/* This file is generated from divrem.m4; DO NOT EDIT! */
8/*
9 * Division and remainder, from Appendix E of the Sparc Version 8
10 * Architecture Manual, with fixes from Gordon Irlam.
11 */
12
13/*
14 * Input: dividend and divisor in %o0 and %o1 respectively.
15 *
16 * m4 parameters:
17 * .rem name of function to generate
18 * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
19 * true true=true => signed; true=false => unsigned
20 *
21 * Algorithm parameters:
22 * N how many bits per iteration we try to get (4)
23 * WORDSIZE total number of bits (32)
24 *
25 * Derived constants:
26 * TOPBITS number of bits in the top decade of a number
27 *
28 * Important variables:
29 * Q the partial quotient under development (initially 0)
30 * R the remainder so far, initially the dividend
31 * ITER number of main division loop iterations required;
32 * equal to ceil(log2(quotient) / N). Note that this
33 * is the log base (2^N) of the quotient.
34 * V the current comparand, initially divisor*2^(ITER*N-1)
35 *
36 * Cost:
37 * Current estimate for non-large dividend is
38 * ceil(log2(quotient) / N) * (10 + 7N/2) + C
39 * A large dividend is one greater than 2^(31-TOPBITS) and takes a
40 * different path, as the upper bits of the quotient must be developed
41 * one bit at a time.
42 */
43
44
45 .globl .rem
46 .globl _Rem
47.rem:
48_Rem: /* needed for export */
49 ! compute sign of result; if neither is negative, no problem
50 orcc %o1, %o0, %g0 ! either negative?
51 bge 2f ! no, go do the divide
52 mov %o0, %g2 ! compute sign in any case
53
54 tst %o1
55 bge 1f
56 tst %o0
57 ! %o1 is definitely negative; %o0 might also be negative
58 bge 2f ! if %o0 not negative...
59 sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
601: ! %o0 is negative, %o1 is nonnegative
61 sub %g0, %o0, %o0 ! make %o0 nonnegative
622:
63
64 ! Ready to divide. Compute size of quotient; scale comparand.
65 orcc %o1, %g0, %o5
66 bne 1f
67 mov %o0, %o3
68
69 ! Divide by zero trap. If it returns, return 0 (about as
70 ! wrong as possible, but that is what SunOS does...).
71 ta ST_DIV0
72 retl
73 clr %o0
74
751:
76 cmp %o3, %o5 ! if %o1 exceeds %o0, done
77 blu Lgot_result ! (and algorithm fails otherwise)
78 clr %o2
79
80 sethi %hi(1 << (32 - 4 - 1)), %g1
81
82 cmp %o3, %g1
83 blu Lnot_really_big
84 clr %o4
85
86 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
87 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
88 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
89 ! Compute ITER in an unorthodox manner: know we need to shift V into
90 ! the top decade: so do not even bother to compare to R.
91 1:
92 cmp %o5, %g1
93 bgeu 3f
94 mov 1, %g7
95
96 sll %o5, 4, %o5
97
98 b 1b
99 add %o4, 1, %o4
100
101 ! Now compute %g7.
102 2:
103 addcc %o5, %o5, %o5
104
105 bcc Lnot_too_big
106 add %g7, 1, %g7
107
108 ! We get here if the %o1 overflowed while shifting.
109 ! This means that %o3 has the high-order bit set.
110 ! Restore %o5 and subtract from %o3.
111 sll %g1, 4, %g1 ! high order bit
112 srl %o5, 1, %o5 ! rest of %o5
113 add %o5, %g1, %o5
114
115 b Ldo_single_div
116 sub %g7, 1, %g7
117
118 Lnot_too_big:
119 3:
120 cmp %o5, %o3
121 blu 2b
122 nop
123
124 be Ldo_single_div
125 nop
126 /* NB: these are commented out in the V8-Sparc manual as well */
127 /* (I do not understand this) */
128 ! %o5 > %o3: went too far: back up 1 step
129 ! srl %o5, 1, %o5
130 ! dec %g7
131 ! do single-bit divide steps
132 !
133 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
134 ! first divide step without thinking. BUT, the others are conditional,
135 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
136 ! order bit set in the first step, just falling into the regular
137 ! division loop will mess up the first time around.
138 ! So we unroll slightly...
139 Ldo_single_div:
140 subcc %g7, 1, %g7
141 bl Lend_regular_divide
142 nop
143
144 sub %o3, %o5, %o3
145 mov 1, %o2
146
147 b Lend_single_divloop
148 nop
149 Lsingle_divloop:
150 sll %o2, 1, %o2
151
152 bl 1f
153 srl %o5, 1, %o5
154 ! %o3 >= 0
155 sub %o3, %o5, %o3
156
157 b 2f
158 add %o2, 1, %o2
159 1: ! %o3 < 0
160 add %o3, %o5, %o3
161 sub %o2, 1, %o2
162 2:
163 Lend_single_divloop:
164 subcc %g7, 1, %g7
165 bge Lsingle_divloop
166 tst %o3
167
168 b,a Lend_regular_divide
169
170Lnot_really_big:
1711:
172 sll %o5, 4, %o5
173 cmp %o5, %o3
174 bleu 1b
175 addcc %o4, 1, %o4
176 be Lgot_result
177 sub %o4, 1, %o4
178
179 tst %o3 ! set up for initial iteration
180Ldivloop:
181 sll %o2, 4, %o2
182 ! depth 1, accumulated bits 0
183 bl L.1.16
184 srl %o5,1,%o5
185 ! remainder is positive
186 subcc %o3,%o5,%o3
187 ! depth 2, accumulated bits 1
188 bl L.2.17
189 srl %o5,1,%o5
190 ! remainder is positive
191 subcc %o3,%o5,%o3
192 ! depth 3, accumulated bits 3
193 bl L.3.19
194 srl %o5,1,%o5
195 ! remainder is positive
196 subcc %o3,%o5,%o3
197 ! depth 4, accumulated bits 7
198 bl L.4.23
199 srl %o5,1,%o5
200 ! remainder is positive
201 subcc %o3,%o5,%o3
202
203 b 9f
204 add %o2, (7*2+1), %o2
205
206L.4.23:
207 ! remainder is negative
208 addcc %o3,%o5,%o3
209 b 9f
210 add %o2, (7*2-1), %o2
211
212L.3.19:
213 ! remainder is negative
214 addcc %o3,%o5,%o3
215 ! depth 4, accumulated bits 5
216 bl L.4.21
217 srl %o5,1,%o5
218 ! remainder is positive
219 subcc %o3,%o5,%o3
220 b 9f
221 add %o2, (5*2+1), %o2
222
223L.4.21:
224 ! remainder is negative
225 addcc %o3,%o5,%o3
226 b 9f
227 add %o2, (5*2-1), %o2
228
229L.2.17:
230 ! remainder is negative
231 addcc %o3,%o5,%o3
232 ! depth 3, accumulated bits 1
233 bl L.3.17
234 srl %o5,1,%o5
235 ! remainder is positive
236 subcc %o3,%o5,%o3
237 ! depth 4, accumulated bits 3
238 bl L.4.19
239 srl %o5,1,%o5
240 ! remainder is positive
241 subcc %o3,%o5,%o3
242 b 9f
243 add %o2, (3*2+1), %o2
244
245L.4.19:
246 ! remainder is negative
247 addcc %o3,%o5,%o3
248 b 9f
249 add %o2, (3*2-1), %o2
250
251L.3.17:
252 ! remainder is negative
253 addcc %o3,%o5,%o3
254 ! depth 4, accumulated bits 1
255 bl L.4.17
256 srl %o5,1,%o5
257 ! remainder is positive
258 subcc %o3,%o5,%o3
259 b 9f
260 add %o2, (1*2+1), %o2
261
262L.4.17:
263 ! remainder is negative
264 addcc %o3,%o5,%o3
265 b 9f
266 add %o2, (1*2-1), %o2
267
268L.1.16:
269 ! remainder is negative
270 addcc %o3,%o5,%o3
271 ! depth 2, accumulated bits -1
272 bl L.2.15
273 srl %o5,1,%o5
274 ! remainder is positive
275 subcc %o3,%o5,%o3
276 ! depth 3, accumulated bits -1
277 bl L.3.15
278 srl %o5,1,%o5
279 ! remainder is positive
280 subcc %o3,%o5,%o3
281 ! depth 4, accumulated bits -1
282 bl L.4.15
283 srl %o5,1,%o5
284 ! remainder is positive
285 subcc %o3,%o5,%o3
286 b 9f
287 add %o2, (-1*2+1), %o2
288
289L.4.15:
290 ! remainder is negative
291 addcc %o3,%o5,%o3
292 b 9f
293 add %o2, (-1*2-1), %o2
294
295L.3.15:
296 ! remainder is negative
297 addcc %o3,%o5,%o3
298 ! depth 4, accumulated bits -3
299 bl L.4.13
300 srl %o5,1,%o5
301 ! remainder is positive
302 subcc %o3,%o5,%o3
303 b 9f
304 add %o2, (-3*2+1), %o2
305
306L.4.13:
307 ! remainder is negative
308 addcc %o3,%o5,%o3
309 b 9f
310 add %o2, (-3*2-1), %o2
311
312L.2.15:
313 ! remainder is negative
314 addcc %o3,%o5,%o3
315 ! depth 3, accumulated bits -3
316 bl L.3.13
317 srl %o5,1,%o5
318 ! remainder is positive
319 subcc %o3,%o5,%o3
320 ! depth 4, accumulated bits -5
321 bl L.4.11
322 srl %o5,1,%o5
323 ! remainder is positive
324 subcc %o3,%o5,%o3
325 b 9f
326 add %o2, (-5*2+1), %o2
327
328L.4.11:
329 ! remainder is negative
330 addcc %o3,%o5,%o3
331 b 9f
332 add %o2, (-5*2-1), %o2
333
334
335L.3.13:
336 ! remainder is negative
337 addcc %o3,%o5,%o3
338 ! depth 4, accumulated bits -7
339 bl L.4.9
340 srl %o5,1,%o5
341 ! remainder is positive
342 subcc %o3,%o5,%o3
343 b 9f
344 add %o2, (-7*2+1), %o2
345
346L.4.9:
347 ! remainder is negative
348 addcc %o3,%o5,%o3
349 b 9f
350 add %o2, (-7*2-1), %o2
351
352 9:
353Lend_regular_divide:
354 subcc %o4, 1, %o4
355 bge Ldivloop
356 tst %o3
357
358 bl,a Lgot_result
359 ! non-restoring fixup here (one instruction only!)
360 add %o3, %o1, %o3
361
362Lgot_result:
363 ! check to see if answer should be < 0
364 tst %g2
365 bl,a 1f
366 sub %g0, %o3, %o3
3671:
368 retl
369 mov %o3, %o0
370
371 .globl .rem_patch
372.rem_patch:
373 sra %o0, 0x1f, %o4
374 wr %o4, 0x0, %y
375 nop
376 nop
377 nop
378 sdivcc %o0, %o1, %o2
379 bvs,a 1f
380 xnor %o2, %g0, %o2
3811: smul %o2, %o1, %o2
382 retl
383 sub %o0, %o2, %o0
384 nop
diff --git a/arch/sparc/lib/sdiv.S b/arch/sparc/lib/sdiv.S
new file mode 100644
index 00000000000..f0a0d4e4db7
--- /dev/null
+++ b/arch/sparc/lib/sdiv.S
@@ -0,0 +1,381 @@
1/*
2 * sdiv.S: This routine was taken from glibc-1.09 and is covered
3 * by the GNU Library General Public License Version 2.
4 */
5
6
7/* This file is generated from divrem.m4; DO NOT EDIT! */
8/*
9 * Division and remainder, from Appendix E of the Sparc Version 8
10 * Architecture Manual, with fixes from Gordon Irlam.
11 */
12
13/*
14 * Input: dividend and divisor in %o0 and %o1 respectively.
15 *
16 * m4 parameters:
17 * .div name of function to generate
18 * div div=div => %o0 / %o1; div=rem => %o0 % %o1
19 * true true=true => signed; true=false => unsigned
20 *
21 * Algorithm parameters:
22 * N how many bits per iteration we try to get (4)
23 * WORDSIZE total number of bits (32)
24 *
25 * Derived constants:
26 * TOPBITS number of bits in the top decade of a number
27 *
28 * Important variables:
29 * Q the partial quotient under development (initially 0)
30 * R the remainder so far, initially the dividend
31 * ITER number of main division loop iterations required;
32 * equal to ceil(log2(quotient) / N). Note that this
33 * is the log base (2^N) of the quotient.
34 * V the current comparand, initially divisor*2^(ITER*N-1)
35 *
36 * Cost:
37 * Current estimate for non-large dividend is
38 * ceil(log2(quotient) / N) * (10 + 7N/2) + C
39 * A large dividend is one greater than 2^(31-TOPBITS) and takes a
40 * different path, as the upper bits of the quotient must be developed
41 * one bit at a time.
42 */
43
44
45 .globl .div
46 .globl _Div
47.div:
48_Div: /* needed for export */
49 ! compute sign of result; if neither is negative, no problem
50 orcc %o1, %o0, %g0 ! either negative?
51 bge 2f ! no, go do the divide
52 xor %o1, %o0, %g2 ! compute sign in any case
53
54 tst %o1
55 bge 1f
56 tst %o0
57 ! %o1 is definitely negative; %o0 might also be negative
58 bge 2f ! if %o0 not negative...
59 sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
601: ! %o0 is negative, %o1 is nonnegative
61 sub %g0, %o0, %o0 ! make %o0 nonnegative
622:
63
64 ! Ready to divide. Compute size of quotient; scale comparand.
65 orcc %o1, %g0, %o5
66 bne 1f
67 mov %o0, %o3
68
69 ! Divide by zero trap. If it returns, return 0 (about as
70 ! wrong as possible, but that is what SunOS does...).
71 ta ST_DIV0
72 retl
73 clr %o0
74
751:
76 cmp %o3, %o5 ! if %o1 exceeds %o0, done
77 blu Lgot_result ! (and algorithm fails otherwise)
78 clr %o2
79
80 sethi %hi(1 << (32 - 4 - 1)), %g1
81
82 cmp %o3, %g1
83 blu Lnot_really_big
84 clr %o4
85
86 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
87 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
88 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
89 ! Compute ITER in an unorthodox manner: know we need to shift V into
90 ! the top decade: so do not even bother to compare to R.
91 1:
92 cmp %o5, %g1
93 bgeu 3f
94 mov 1, %g7
95
96 sll %o5, 4, %o5
97
98 b 1b
99 add %o4, 1, %o4
100
101 ! Now compute %g7.
102 2:
103 addcc %o5, %o5, %o5
104 bcc Lnot_too_big
105 add %g7, 1, %g7
106
107 ! We get here if the %o1 overflowed while shifting.
108 ! This means that %o3 has the high-order bit set.
109 ! Restore %o5 and subtract from %o3.
110 sll %g1, 4, %g1 ! high order bit
111 srl %o5, 1, %o5 ! rest of %o5
112 add %o5, %g1, %o5
113
114 b Ldo_single_div
115 sub %g7, 1, %g7
116
117 Lnot_too_big:
118 3:
119 cmp %o5, %o3
120 blu 2b
121 nop
122
123 be Ldo_single_div
124 nop
125 /* NB: these are commented out in the V8-Sparc manual as well */
126 /* (I do not understand this) */
127 ! %o5 > %o3: went too far: back up 1 step
128 ! srl %o5, 1, %o5
129 ! dec %g7
130 ! do single-bit divide steps
131 !
132 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
133 ! first divide step without thinking. BUT, the others are conditional,
134 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
135 ! order bit set in the first step, just falling into the regular
136 ! division loop will mess up the first time around.
137 ! So we unroll slightly...
138 Ldo_single_div:
139 subcc %g7, 1, %g7
140 bl Lend_regular_divide
141 nop
142
143 sub %o3, %o5, %o3
144 mov 1, %o2
145
146 b Lend_single_divloop
147 nop
148 Lsingle_divloop:
149 sll %o2, 1, %o2
150
151 bl 1f
152 srl %o5, 1, %o5
153 ! %o3 >= 0
154 sub %o3, %o5, %o3
155
156 b 2f
157 add %o2, 1, %o2
158 1: ! %o3 < 0
159 add %o3, %o5, %o3
160 sub %o2, 1, %o2
161 2:
162 Lend_single_divloop:
163 subcc %g7, 1, %g7
164 bge Lsingle_divloop
165 tst %o3
166
167 b,a Lend_regular_divide
168
169Lnot_really_big:
1701:
171 sll %o5, 4, %o5
172 cmp %o5, %o3
173 bleu 1b
174 addcc %o4, 1, %o4
175
176 be Lgot_result
177 sub %o4, 1, %o4
178
179 tst %o3 ! set up for initial iteration
180Ldivloop:
181 sll %o2, 4, %o2
182 ! depth 1, accumulated bits 0
183 bl L.1.16
184 srl %o5,1,%o5
185 ! remainder is positive
186 subcc %o3,%o5,%o3
187 ! depth 2, accumulated bits 1
188 bl L.2.17
189 srl %o5,1,%o5
190 ! remainder is positive
191 subcc %o3,%o5,%o3
192 ! depth 3, accumulated bits 3
193 bl L.3.19
194 srl %o5,1,%o5
195 ! remainder is positive
196 subcc %o3,%o5,%o3
197 ! depth 4, accumulated bits 7
198 bl L.4.23
199 srl %o5,1,%o5
200 ! remainder is positive
201 subcc %o3,%o5,%o3
202 b 9f
203 add %o2, (7*2+1), %o2
204
205L.4.23:
206 ! remainder is negative
207 addcc %o3,%o5,%o3
208 b 9f
209 add %o2, (7*2-1), %o2
210
211L.3.19:
212 ! remainder is negative
213 addcc %o3,%o5,%o3
214 ! depth 4, accumulated bits 5
215 bl L.4.21
216 srl %o5,1,%o5
217 ! remainder is positive
218 subcc %o3,%o5,%o3
219 b 9f
220 add %o2, (5*2+1), %o2
221
222L.4.21:
223 ! remainder is negative
224 addcc %o3,%o5,%o3
225 b 9f
226 add %o2, (5*2-1), %o2
227
228L.2.17:
229 ! remainder is negative
230 addcc %o3,%o5,%o3
231 ! depth 3, accumulated bits 1
232 bl L.3.17
233 srl %o5,1,%o5
234 ! remainder is positive
235 subcc %o3,%o5,%o3
236 ! depth 4, accumulated bits 3
237 bl L.4.19
238 srl %o5,1,%o5
239 ! remainder is positive
240 subcc %o3,%o5,%o3
241 b 9f
242 add %o2, (3*2+1), %o2
243
244L.4.19:
245 ! remainder is negative
246 addcc %o3,%o5,%o3
247 b 9f
248 add %o2, (3*2-1), %o2
249
250
251L.3.17:
252 ! remainder is negative
253 addcc %o3,%o5,%o3
254 ! depth 4, accumulated bits 1
255 bl L.4.17
256 srl %o5,1,%o5
257 ! remainder is positive
258 subcc %o3,%o5,%o3
259 b 9f
260 add %o2, (1*2+1), %o2
261
262L.4.17:
263 ! remainder is negative
264 addcc %o3,%o5,%o3
265 b 9f
266 add %o2, (1*2-1), %o2
267
268L.1.16:
269 ! remainder is negative
270 addcc %o3,%o5,%o3
271 ! depth 2, accumulated bits -1
272 bl L.2.15
273 srl %o5,1,%o5
274 ! remainder is positive
275 subcc %o3,%o5,%o3
276 ! depth 3, accumulated bits -1
277 bl L.3.15
278 srl %o5,1,%o5
279 ! remainder is positive
280 subcc %o3,%o5,%o3
281 ! depth 4, accumulated bits -1
282 bl L.4.15
283 srl %o5,1,%o5
284 ! remainder is positive
285 subcc %o3,%o5,%o3
286 b 9f
287 add %o2, (-1*2+1), %o2
288
289L.4.15:
290 ! remainder is negative
291 addcc %o3,%o5,%o3
292 b 9f
293 add %o2, (-1*2-1), %o2
294
295L.3.15:
296 ! remainder is negative
297 addcc %o3,%o5,%o3
298 ! depth 4, accumulated bits -3
299 bl L.4.13
300 srl %o5,1,%o5
301 ! remainder is positive
302 subcc %o3,%o5,%o3
303 b 9f
304 add %o2, (-3*2+1), %o2
305
306L.4.13:
307 ! remainder is negative
308 addcc %o3,%o5,%o3
309 b 9f
310 add %o2, (-3*2-1), %o2
311
312L.2.15:
313 ! remainder is negative
314 addcc %o3,%o5,%o3
315 ! depth 3, accumulated bits -3
316 bl L.3.13
317 srl %o5,1,%o5
318 ! remainder is positive
319 subcc %o3,%o5,%o3
320 ! depth 4, accumulated bits -5
321 bl L.4.11
322 srl %o5,1,%o5
323 ! remainder is positive
324 subcc %o3,%o5,%o3
325 b 9f
326 add %o2, (-5*2+1), %o2
327
328L.4.11:
329 ! remainder is negative
330 addcc %o3,%o5,%o3
331 b 9f
332 add %o2, (-5*2-1), %o2
333
334L.3.13:
335 ! remainder is negative
336 addcc %o3,%o5,%o3
337 ! depth 4, accumulated bits -7
338 bl L.4.9
339 srl %o5,1,%o5
340 ! remainder is positive
341 subcc %o3,%o5,%o3
342 b 9f
343 add %o2, (-7*2+1), %o2
344
345L.4.9:
346 ! remainder is negative
347 addcc %o3,%o5,%o3
348 b 9f
349 add %o2, (-7*2-1), %o2
350
351 9:
352Lend_regular_divide:
353 subcc %o4, 1, %o4
354 bge Ldivloop
355 tst %o3
356
357 bl,a Lgot_result
358 ! non-restoring fixup here (one instruction only!)
359 sub %o2, 1, %o2
360
361Lgot_result:
362 ! check to see if answer should be < 0
363 tst %g2
364 bl,a 1f
365 sub %g0, %o2, %o2
3661:
367 retl
368 mov %o2, %o0
369
370 .globl .div_patch
371.div_patch:
372 sra %o0, 0x1f, %o2
373 wr %o2, 0x0, %y
374 nop
375 nop
376 nop
377 sdivcc %o0, %o1, %o0
378 bvs,a 1f
379 xnor %o0, %g0, %o0
3801: retl
381 nop
diff --git a/arch/sparc/lib/strlen_user_32.S b/arch/sparc/lib/strlen_user_32.S
new file mode 100644
index 00000000000..8c8a371df3c
--- /dev/null
+++ b/arch/sparc/lib/strlen_user_32.S
@@ -0,0 +1,109 @@
1/* strlen_user.S: Sparc optimized strlen_user code
2 *
3 * Return length of string in userspace including terminating 0
4 * or 0 for error
5 *
6 * Copyright (C) 1991,1996 Free Software Foundation
7 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
8 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
9 */
10
11#define LO_MAGIC 0x01010101
12#define HI_MAGIC 0x80808080
13
1410:
15 ldub [%o0], %o5
16 cmp %o5, 0
17 be 1f
18 add %o0, 1, %o0
19 andcc %o0, 3, %g0
20 be 4f
21 or %o4, %lo(HI_MAGIC), %o3
2211:
23 ldub [%o0], %o5
24 cmp %o5, 0
25 be 2f
26 add %o0, 1, %o0
27 andcc %o0, 3, %g0
28 be 5f
29 sethi %hi(LO_MAGIC), %o4
3012:
31 ldub [%o0], %o5
32 cmp %o5, 0
33 be 3f
34 add %o0, 1, %o0
35 b 13f
36 or %o4, %lo(LO_MAGIC), %o2
371:
38 retl
39 mov 1, %o0
402:
41 retl
42 mov 2, %o0
433:
44 retl
45 mov 3, %o0
46
47 .align 4
48 .global __strlen_user, __strnlen_user
49__strlen_user:
50 sethi %hi(32768), %o1
51__strnlen_user:
52 mov %o1, %g1
53 mov %o0, %o1
54 andcc %o0, 3, %g0
55 bne 10b
56 sethi %hi(HI_MAGIC), %o4
57 or %o4, %lo(HI_MAGIC), %o3
584:
59 sethi %hi(LO_MAGIC), %o4
605:
61 or %o4, %lo(LO_MAGIC), %o2
6213:
63 ld [%o0], %o5
642:
65 sub %o5, %o2, %o4
66 andcc %o4, %o3, %g0
67 bne 82f
68 add %o0, 4, %o0
69 sub %o0, %o1, %g2
7081: cmp %g2, %g1
71 blu 13b
72 mov %o0, %o4
73 ba,a 1f
74
75 /* Check every byte. */
7682: srl %o5, 24, %g5
77 andcc %g5, 0xff, %g0
78 be 1f
79 add %o0, -3, %o4
80 srl %o5, 16, %g5
81 andcc %g5, 0xff, %g0
82 be 1f
83 add %o4, 1, %o4
84 srl %o5, 8, %g5
85 andcc %g5, 0xff, %g0
86 be 1f
87 add %o4, 1, %o4
88 andcc %o5, 0xff, %g0
89 bne 81b
90 sub %o0, %o1, %g2
91
92 add %o4, 1, %o4
931:
94 retl
95 sub %o4, %o1, %o0
96
97 .section .fixup,#alloc,#execinstr
98 .align 4
999:
100 retl
101 clr %o0
102
103 .section __ex_table,#alloc
104 .align 4
105
106 .word 10b, 9b
107 .word 11b, 9b
108 .word 12b, 9b
109 .word 13b, 9b
diff --git a/arch/sparc/lib/strlen_user_64.S b/arch/sparc/lib/strlen_user_64.S
new file mode 100644
index 00000000000..114ed111e25
--- /dev/null
+++ b/arch/sparc/lib/strlen_user_64.S
@@ -0,0 +1,95 @@
1/* strlen_user.S: Sparc64 optimized strlen_user code
2 *
3 * Return length of string in userspace including terminating 0
4 * or 0 for error
5 *
6 * Copyright (C) 1991,1996 Free Software Foundation
7 * Copyright (C) 1996,1999 David S. Miller (davem@redhat.com)
8 * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
9 */
10
11#include <asm/asi.h>
12
13#define LO_MAGIC 0x01010101
14#define HI_MAGIC 0x80808080
15
16 .align 4
17 .global __strlen_user, __strnlen_user
18__strlen_user:
19 sethi %hi(32768), %o1
20__strnlen_user:
21 mov %o1, %g1
22 mov %o0, %o1
23 andcc %o0, 3, %g0
24 be,pt %icc, 9f
25 sethi %hi(HI_MAGIC), %o4
2610: lduba [%o0] %asi, %o5
27 brz,pn %o5, 21f
28 add %o0, 1, %o0
29 andcc %o0, 3, %g0
30 be,pn %icc, 4f
31 or %o4, %lo(HI_MAGIC), %o3
3211: lduba [%o0] %asi, %o5
33 brz,pn %o5, 22f
34 add %o0, 1, %o0
35 andcc %o0, 3, %g0
36 be,pt %icc, 13f
37 srl %o3, 7, %o2
3812: lduba [%o0] %asi, %o5
39 brz,pn %o5, 23f
40 add %o0, 1, %o0
41 ba,pt %icc, 2f
4215: lda [%o0] %asi, %o5
439: or %o4, %lo(HI_MAGIC), %o3
444: srl %o3, 7, %o2
4513: lda [%o0] %asi, %o5
462: sub %o5, %o2, %o4
47 andcc %o4, %o3, %g0
48 bne,pn %icc, 82f
49 add %o0, 4, %o0
50 sub %o0, %o1, %g2
5181: cmp %g2, %g1
52 blu,pt %icc, 13b
53 mov %o0, %o4
54 ba,a,pt %xcc, 1f
55
56 /* Check every byte. */
5782: srl %o5, 24, %g7
58 andcc %g7, 0xff, %g0
59 be,pn %icc, 1f
60 add %o0, -3, %o4
61 srl %o5, 16, %g7
62 andcc %g7, 0xff, %g0
63 be,pn %icc, 1f
64 add %o4, 1, %o4
65 srl %o5, 8, %g7
66 andcc %g7, 0xff, %g0
67 be,pn %icc, 1f
68 add %o4, 1, %o4
69 andcc %o5, 0xff, %g0
70 bne,pt %icc, 81b
71 sub %o0, %o1, %g2
72 add %o4, 1, %o4
731: retl
74 sub %o4, %o1, %o0
7521: retl
76 mov 1, %o0
7722: retl
78 mov 2, %o0
7923: retl
80 mov 3, %o0
81
82 .section .fixup,#alloc,#execinstr
83 .align 4
8430:
85 retl
86 clr %o0
87
88 .section __ex_table,"a"
89 .align 4
90
91 .word 10b, 30b
92 .word 11b, 30b
93 .word 12b, 30b
94 .word 15b, 30b
95 .word 13b, 30b
diff --git a/arch/sparc/lib/strncpy_from_user_32.S b/arch/sparc/lib/strncpy_from_user_32.S
new file mode 100644
index 00000000000..d77198976a6
--- /dev/null
+++ b/arch/sparc/lib/strncpy_from_user_32.S
@@ -0,0 +1,47 @@
1/* strncpy_from_user.S: Sparc strncpy from userspace.
2 *
3 * Copyright(C) 1996 David S. Miller
4 */
5
6#include <asm/ptrace.h>
7#include <asm/errno.h>
8
9 .text
10 .align 4
11
12 /* Must return:
13 *
14 * -EFAULT for an exception
15 * count if we hit the buffer limit
16 * bytes copied if we hit a null byte
17 */
18
19 .globl __strncpy_from_user
20__strncpy_from_user:
21 /* %o0=dest, %o1=src, %o2=count */
22 mov %o2, %o3
231:
24 subcc %o2, 1, %o2
25 bneg 2f
26 nop
2710:
28 ldub [%o1], %o4
29 add %o0, 1, %o0
30 cmp %o4, 0
31 add %o1, 1, %o1
32 bne 1b
33 stb %o4, [%o0 - 1]
342:
35 add %o2, 1, %o0
36 retl
37 sub %o3, %o0, %o0
38
39 .section .fixup,#alloc,#execinstr
40 .align 4
414:
42 retl
43 mov -EFAULT, %o0
44
45 .section __ex_table,#alloc
46 .align 4
47 .word 10b, 4b
diff --git a/arch/sparc/lib/strncpy_from_user_64.S b/arch/sparc/lib/strncpy_from_user_64.S
new file mode 100644
index 00000000000..511c8f136f9
--- /dev/null
+++ b/arch/sparc/lib/strncpy_from_user_64.S
@@ -0,0 +1,135 @@
1/*
2 * strncpy_from_user.S: Sparc64 strncpy from userspace.
3 *
4 * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
5 */
6
7#include <asm/asi.h>
8#include <asm/errno.h>
9
10 .data
11 .align 8
120: .xword 0x0101010101010101
13
14 .text
15 .align 32
16
17 /* Must return:
18 *
19 * -EFAULT for an exception
20 * count if we hit the buffer limit
21 * bytes copied if we hit a null byte
22 * (without the null byte)
23 *
24 * This implementation assumes:
25 * %o1 is 8 aligned => !(%o2 & 7)
26 * %o0 is 8 aligned (if not, it will be slooooow, but will work)
27 *
28 * This is optimized for the common case:
29 * in my stats, 90% of src are 8 aligned (even on sparc32)
30 * and average length is 18 or so.
31 */
32
33 .globl __strncpy_from_user
34 .type __strncpy_from_user,#function
35__strncpy_from_user:
36 /* %o0=dest, %o1=src, %o2=count */
37 andcc %o1, 7, %g0 ! IEU1 Group
38 bne,pn %icc, 30f ! CTI
39 add %o0, %o2, %g3 ! IEU0
4060: ldxa [%o1] %asi, %g1 ! Load Group
41 brlez,pn %o2, 10f ! CTI
42 mov %o0, %o3 ! IEU0
4350: sethi %hi(0b), %o4 ! IEU0 Group
44 ldx [%o4 + %lo(0b)], %o4 ! Load
45 sllx %o4, 7, %o5 ! IEU1 Group
461: sub %g1, %o4, %g2 ! IEU0 Group
47 stx %g1, [%o0] ! Store
48 add %o0, 8, %o0 ! IEU1
49 andcc %g2, %o5, %g0 ! IEU1 Group
50 bne,pn %xcc, 5f ! CTI
51 add %o1, 8, %o1 ! IEU0
52 cmp %o0, %g3 ! IEU1 Group
53 bl,a,pt %xcc, 1b ! CTI
5461: ldxa [%o1] %asi, %g1 ! Load
5510: retl ! CTI Group
56 mov %o2, %o0 ! IEU0
575: srlx %g2, 32, %g7 ! IEU0 Group
58 sethi %hi(0xff00), %o4 ! IEU1
59 andcc %g7, %o5, %g0 ! IEU1 Group
60 be,pn %icc, 2f ! CTI
61 or %o4, %lo(0xff00), %o4 ! IEU0
62 srlx %g1, 48, %g7 ! IEU0 Group
63 andcc %g7, %o4, %g0 ! IEU1 Group
64 be,pn %icc, 50f ! CTI
65 andcc %g7, 0xff, %g0 ! IEU1 Group
66 be,pn %icc, 51f ! CTI
67 srlx %g1, 32, %g7 ! IEU0
68 andcc %g7, %o4, %g0 ! IEU1 Group
69 be,pn %icc, 52f ! CTI
70 andcc %g7, 0xff, %g0 ! IEU1 Group
71 be,pn %icc, 53f ! CTI
722: andcc %g2, %o5, %g0 ! IEU1 Group
73 be,pn %icc, 2f ! CTI
74 srl %g1, 16, %g7 ! IEU0
75 andcc %g7, %o4, %g0 ! IEU1 Group
76 be,pn %icc, 54f ! CTI
77 andcc %g7, 0xff, %g0 ! IEU1 Group
78 be,pn %icc, 55f ! CTI
79 andcc %g1, %o4, %g0 ! IEU1 Group
80 be,pn %icc, 56f ! CTI
81 andcc %g1, 0xff, %g0 ! IEU1 Group
82 be,a,pn %icc, 57f ! CTI
83 sub %o0, %o3, %o0 ! IEU0
842: cmp %o0, %g3 ! IEU1 Group
85 bl,a,pt %xcc, 50b ! CTI
8662: ldxa [%o1] %asi, %g1 ! Load
87 retl ! CTI Group
88 mov %o2, %o0 ! IEU0
8950: sub %o0, %o3, %o0
90 retl
91 sub %o0, 8, %o0
9251: sub %o0, %o3, %o0
93 retl
94 sub %o0, 7, %o0
9552: sub %o0, %o3, %o0
96 retl
97 sub %o0, 6, %o0
9853: sub %o0, %o3, %o0
99 retl
100 sub %o0, 5, %o0
10154: sub %o0, %o3, %o0
102 retl
103 sub %o0, 4, %o0
10455: sub %o0, %o3, %o0
105 retl
106 sub %o0, 3, %o0
10756: sub %o0, %o3, %o0
108 retl
109 sub %o0, 2, %o0
11057: retl
111 sub %o0, 1, %o0
11230: brlez,pn %o2, 3f
113 sub %g0, %o2, %o3
114 add %o0, %o2, %o0
11563: lduba [%o1] %asi, %o4
1161: add %o1, 1, %o1
117 brz,pn %o4, 2f
118 stb %o4, [%o0 + %o3]
119 addcc %o3, 1, %o3
120 bne,pt %xcc, 1b
12164: lduba [%o1] %asi, %o4
1223: retl
123 mov %o2, %o0
1242: retl
125 add %o2, %o3, %o0
126 .size __strncpy_from_user, .-__strncpy_from_user
127
128 .section __ex_table,"a"
129 .align 4
130 .word 60b, __retl_efault
131 .word 61b, __retl_efault
132 .word 62b, __retl_efault
133 .word 63b, __retl_efault
134 .word 64b, __retl_efault
135 .previous
diff --git a/arch/sparc/lib/udiv.S b/arch/sparc/lib/udiv.S
new file mode 100644
index 00000000000..2101405bdfc
--- /dev/null
+++ b/arch/sparc/lib/udiv.S
@@ -0,0 +1,357 @@
1/*
2 * udiv.S: This routine was taken from glibc-1.09 and is covered
3 * by the GNU Library General Public License Version 2.
4 */
5
6
7/* This file is generated from divrem.m4; DO NOT EDIT! */
8/*
9 * Division and remainder, from Appendix E of the Sparc Version 8
10 * Architecture Manual, with fixes from Gordon Irlam.
11 */
12
13/*
14 * Input: dividend and divisor in %o0 and %o1 respectively.
15 *
16 * m4 parameters:
17 * .udiv name of function to generate
18 * div div=div => %o0 / %o1; div=rem => %o0 % %o1
19 * false false=true => signed; false=false => unsigned
20 *
21 * Algorithm parameters:
22 * N how many bits per iteration we try to get (4)
23 * WORDSIZE total number of bits (32)
24 *
25 * Derived constants:
26 * TOPBITS number of bits in the top decade of a number
27 *
28 * Important variables:
29 * Q the partial quotient under development (initially 0)
30 * R the remainder so far, initially the dividend
31 * ITER number of main division loop iterations required;
32 * equal to ceil(log2(quotient) / N). Note that this
33 * is the log base (2^N) of the quotient.
34 * V the current comparand, initially divisor*2^(ITER*N-1)
35 *
36 * Cost:
37 * Current estimate for non-large dividend is
38 * ceil(log2(quotient) / N) * (10 + 7N/2) + C
39 * A large dividend is one greater than 2^(31-TOPBITS) and takes a
40 * different path, as the upper bits of the quotient must be developed
41 * one bit at a time.
42 */
43
44
45 .globl .udiv
46 .globl _Udiv
47.udiv:
48_Udiv: /* needed for export */
49
50 ! Ready to divide. Compute size of quotient; scale comparand.
51 orcc %o1, %g0, %o5
52 bne 1f
53 mov %o0, %o3
54
55 ! Divide by zero trap. If it returns, return 0 (about as
56 ! wrong as possible, but that is what SunOS does...).
57 ta ST_DIV0
58 retl
59 clr %o0
60
611:
62 cmp %o3, %o5 ! if %o1 exceeds %o0, done
63 blu Lgot_result ! (and algorithm fails otherwise)
64 clr %o2
65
66 sethi %hi(1 << (32 - 4 - 1)), %g1
67
68 cmp %o3, %g1
69 blu Lnot_really_big
70 clr %o4
71
72 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
73 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
74 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
75 ! Compute ITER in an unorthodox manner: know we need to shift V into
76 ! the top decade: so do not even bother to compare to R.
77 1:
78 cmp %o5, %g1
79 bgeu 3f
80 mov 1, %g7
81
82 sll %o5, 4, %o5
83
84 b 1b
85 add %o4, 1, %o4
86
87 ! Now compute %g7.
88 2:
89 addcc %o5, %o5, %o5
90 bcc Lnot_too_big
91 add %g7, 1, %g7
92
93 ! We get here if the %o1 overflowed while shifting.
94 ! This means that %o3 has the high-order bit set.
95 ! Restore %o5 and subtract from %o3.
96 sll %g1, 4, %g1 ! high order bit
97 srl %o5, 1, %o5 ! rest of %o5
98 add %o5, %g1, %o5
99
100 b Ldo_single_div
101 sub %g7, 1, %g7
102
103 Lnot_too_big:
104 3:
105 cmp %o5, %o3
106 blu 2b
107 nop
108
109 be Ldo_single_div
110 nop
111 /* NB: these are commented out in the V8-Sparc manual as well */
112 /* (I do not understand this) */
113 ! %o5 > %o3: went too far: back up 1 step
114 ! srl %o5, 1, %o5
115 ! dec %g7
116 ! do single-bit divide steps
117 !
118 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
119 ! first divide step without thinking. BUT, the others are conditional,
120 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
121 ! order bit set in the first step, just falling into the regular
122 ! division loop will mess up the first time around.
123 ! So we unroll slightly...
124 Ldo_single_div:
125 subcc %g7, 1, %g7
126 bl Lend_regular_divide
127 nop
128
129 sub %o3, %o5, %o3
130 mov 1, %o2
131
132 b Lend_single_divloop
133 nop
134 Lsingle_divloop:
135 sll %o2, 1, %o2
136 bl 1f
137 srl %o5, 1, %o5
138 ! %o3 >= 0
139 sub %o3, %o5, %o3
140 b 2f
141 add %o2, 1, %o2
142 1: ! %o3 < 0
143 add %o3, %o5, %o3
144 sub %o2, 1, %o2
145 2:
146 Lend_single_divloop:
147 subcc %g7, 1, %g7
148 bge Lsingle_divloop
149 tst %o3
150
151 b,a Lend_regular_divide
152
153Lnot_really_big:
1541:
155 sll %o5, 4, %o5
156
157 cmp %o5, %o3
158 bleu 1b
159 addcc %o4, 1, %o4
160
161 be Lgot_result
162 sub %o4, 1, %o4
163
164 tst %o3 ! set up for initial iteration
165Ldivloop:
166 sll %o2, 4, %o2
167 ! depth 1, accumulated bits 0
168 bl L.1.16
169 srl %o5,1,%o5
170 ! remainder is positive
171 subcc %o3,%o5,%o3
172 ! depth 2, accumulated bits 1
173 bl L.2.17
174 srl %o5,1,%o5
175 ! remainder is positive
176 subcc %o3,%o5,%o3
177 ! depth 3, accumulated bits 3
178 bl L.3.19
179 srl %o5,1,%o5
180 ! remainder is positive
181 subcc %o3,%o5,%o3
182 ! depth 4, accumulated bits 7
183 bl L.4.23
184 srl %o5,1,%o5
185 ! remainder is positive
186 subcc %o3,%o5,%o3
187 b 9f
188 add %o2, (7*2+1), %o2
189
190L.4.23:
191 ! remainder is negative
192 addcc %o3,%o5,%o3
193 b 9f
194 add %o2, (7*2-1), %o2
195
196L.3.19:
197 ! remainder is negative
198 addcc %o3,%o5,%o3
199 ! depth 4, accumulated bits 5
200 bl L.4.21
201 srl %o5,1,%o5
202 ! remainder is positive
203 subcc %o3,%o5,%o3
204 b 9f
205 add %o2, (5*2+1), %o2
206
207L.4.21:
208 ! remainder is negative
209 addcc %o3,%o5,%o3
210 b 9f
211 add %o2, (5*2-1), %o2
212
213L.2.17:
214 ! remainder is negative
215 addcc %o3,%o5,%o3
216 ! depth 3, accumulated bits 1
217 bl L.3.17
218 srl %o5,1,%o5
219 ! remainder is positive
220 subcc %o3,%o5,%o3
221 ! depth 4, accumulated bits 3
222 bl L.4.19
223 srl %o5,1,%o5
224 ! remainder is positive
225 subcc %o3,%o5,%o3
226 b 9f
227 add %o2, (3*2+1), %o2
228
229L.4.19:
230 ! remainder is negative
231 addcc %o3,%o5,%o3
232 b 9f
233 add %o2, (3*2-1), %o2
234
235L.3.17:
236 ! remainder is negative
237 addcc %o3,%o5,%o3
238 ! depth 4, accumulated bits 1
239 bl L.4.17
240 srl %o5,1,%o5
241 ! remainder is positive
242 subcc %o3,%o5,%o3
243 b 9f
244 add %o2, (1*2+1), %o2
245
246L.4.17:
247 ! remainder is negative
248 addcc %o3,%o5,%o3
249 b 9f
250 add %o2, (1*2-1), %o2
251
252L.1.16:
253 ! remainder is negative
254 addcc %o3,%o5,%o3
255 ! depth 2, accumulated bits -1
256 bl L.2.15
257 srl %o5,1,%o5
258 ! remainder is positive
259 subcc %o3,%o5,%o3
260 ! depth 3, accumulated bits -1
261 bl L.3.15
262 srl %o5,1,%o5
263 ! remainder is positive
264 subcc %o3,%o5,%o3
265 ! depth 4, accumulated bits -1
266 bl L.4.15
267 srl %o5,1,%o5
268 ! remainder is positive
269 subcc %o3,%o5,%o3
270 b 9f
271 add %o2, (-1*2+1), %o2
272
273L.4.15:
274 ! remainder is negative
275 addcc %o3,%o5,%o3
276 b 9f
277 add %o2, (-1*2-1), %o2
278
279L.3.15:
280 ! remainder is negative
281 addcc %o3,%o5,%o3
282 ! depth 4, accumulated bits -3
283 bl L.4.13
284 srl %o5,1,%o5
285 ! remainder is positive
286 subcc %o3,%o5,%o3
287 b 9f
288 add %o2, (-3*2+1), %o2
289
290L.4.13:
291 ! remainder is negative
292 addcc %o3,%o5,%o3
293 b 9f
294 add %o2, (-3*2-1), %o2
295
296L.2.15:
297 ! remainder is negative
298 addcc %o3,%o5,%o3
299 ! depth 3, accumulated bits -3
300 bl L.3.13
301 srl %o5,1,%o5
302 ! remainder is positive
303 subcc %o3,%o5,%o3
304 ! depth 4, accumulated bits -5
305 bl L.4.11
306 srl %o5,1,%o5
307 ! remainder is positive
308 subcc %o3,%o5,%o3
309 b 9f
310 add %o2, (-5*2+1), %o2
311
312L.4.11:
313 ! remainder is negative
314 addcc %o3,%o5,%o3
315 b 9f
316 add %o2, (-5*2-1), %o2
317
318L.3.13:
319 ! remainder is negative
320 addcc %o3,%o5,%o3
321 ! depth 4, accumulated bits -7
322 bl L.4.9
323 srl %o5,1,%o5
324 ! remainder is positive
325 subcc %o3,%o5,%o3
326 b 9f
327 add %o2, (-7*2+1), %o2
328
329L.4.9:
330 ! remainder is negative
331 addcc %o3,%o5,%o3
332 b 9f
333 add %o2, (-7*2-1), %o2
334
335 9:
336Lend_regular_divide:
337 subcc %o4, 1, %o4
338 bge Ldivloop
339 tst %o3
340
341 bl,a Lgot_result
342 ! non-restoring fixup here (one instruction only!)
343 sub %o2, 1, %o2
344
345Lgot_result:
346
347 retl
348 mov %o2, %o0
349
350 .globl .udiv_patch
351.udiv_patch:
352 wr %g0, 0x0, %y
353 nop
354 nop
355 retl
356 udiv %o0, %o1, %o0
357 nop
diff --git a/arch/sparc/lib/umul.S b/arch/sparc/lib/umul.S
new file mode 100644
index 00000000000..1f36ae68252
--- /dev/null
+++ b/arch/sparc/lib/umul.S
@@ -0,0 +1,171 @@
1/*
2 * umul.S: This routine was taken from glibc-1.09 and is covered
3 * by the GNU Library General Public License Version 2.
4 */
5
6
7/*
8 * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
9 * upper 32 bits of the 64-bit product).
10 *
11 * This code optimizes short (less than 13-bit) multiplies. Short
12 * multiplies require 25 instruction cycles, and long ones require
13 * 45 instruction cycles.
14 *
15 * On return, overflow has occurred (%o1 is not zero) if and only if
16 * the Z condition code is clear, allowing, e.g., the following:
17 *
18 * call .umul
19 * nop
20 * bnz overflow (or tnz)
21 */
22
23 .globl .umul
24 .globl _Umul
25.umul:
26_Umul: /* needed for export */
27 or %o0, %o1, %o4
28 mov %o0, %y ! multiplier -> Y
29
30 andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
31 be Lmul_shortway ! if zero, can do it the short way
32 andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
33
34 /*
35 * Long multiply. 32 steps, followed by a final shift step.
36 */
37 mulscc %o4, %o1, %o4 ! 1
38 mulscc %o4, %o1, %o4 ! 2
39 mulscc %o4, %o1, %o4 ! 3
40 mulscc %o4, %o1, %o4 ! 4
41 mulscc %o4, %o1, %o4 ! 5
42 mulscc %o4, %o1, %o4 ! 6
43 mulscc %o4, %o1, %o4 ! 7
44 mulscc %o4, %o1, %o4 ! 8
45 mulscc %o4, %o1, %o4 ! 9
46 mulscc %o4, %o1, %o4 ! 10
47 mulscc %o4, %o1, %o4 ! 11
48 mulscc %o4, %o1, %o4 ! 12
49 mulscc %o4, %o1, %o4 ! 13
50 mulscc %o4, %o1, %o4 ! 14
51 mulscc %o4, %o1, %o4 ! 15
52 mulscc %o4, %o1, %o4 ! 16
53 mulscc %o4, %o1, %o4 ! 17
54 mulscc %o4, %o1, %o4 ! 18
55 mulscc %o4, %o1, %o4 ! 19
56 mulscc %o4, %o1, %o4 ! 20
57 mulscc %o4, %o1, %o4 ! 21
58 mulscc %o4, %o1, %o4 ! 22
59 mulscc %o4, %o1, %o4 ! 23
60 mulscc %o4, %o1, %o4 ! 24
61 mulscc %o4, %o1, %o4 ! 25
62 mulscc %o4, %o1, %o4 ! 26
63 mulscc %o4, %o1, %o4 ! 27
64 mulscc %o4, %o1, %o4 ! 28
65 mulscc %o4, %o1, %o4 ! 29
66 mulscc %o4, %o1, %o4 ! 30
67 mulscc %o4, %o1, %o4 ! 31
68 mulscc %o4, %o1, %o4 ! 32
69 mulscc %o4, %g0, %o4 ! final shift
70
71
72 /*
73 * Normally, with the shift-and-add approach, if both numbers are
74 * positive you get the correct result. With 32-bit two's-complement
75 * numbers, -x is represented as
76 *
77 * x 32
78 * ( 2 - ------ ) mod 2 * 2
79 * 32
80 * 2
81 *
82 * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s,
83 * we can treat this as if the radix point were just to the left
84 * of the sign bit (multiply by 2^32), and get
85 *
86 * -x = (2 - x) mod 2
87 *
88 * Then, ignoring the `mod 2's for convenience:
89 *
90 * x * y = xy
91 * -x * y = 2y - xy
92 * x * -y = 2x - xy
93 * -x * -y = 4 - 2x - 2y + xy
94 *
95 * For signed multiplies, we subtract (x << 32) from the partial
96 * product to fix this problem for negative multipliers (see mul.s).
97 * Because of the way the shift into the partial product is calculated
98 * (N xor V), this term is automatically removed for the multiplicand,
99 * so we don't have to adjust.
100 *
101 * But for unsigned multiplies, the high order bit wasn't a sign bit,
102 * and the correction is wrong. So for unsigned multiplies where the
103 * high order bit is one, we end up with xy - (y << 32). To fix it
104 * we add y << 32.
105 */
106#if 0
107 tst %o1
108 bl,a 1f ! if %o1 < 0 (high order bit = 1),
109 add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
110
1111:
112 rd %y, %o0 ! get lower half of product
113 retl
114 addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
115#else
116 /* Faster code from tege@sics.se. */
117 sra %o1, 31, %o2 ! make mask from sign bit
118 and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1
119 rd %y, %o0 ! get lower half of product
120 retl
121 addcc %o4, %o2, %o1 ! add compensation and put upper half in place
122#endif
123
124Lmul_shortway:
125 /*
126 * Short multiply. 12 steps, followed by a final shift step.
127 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
128 * but there is no problem with %o0 being negative (unlike above),
129 * and overflow is impossible (the answer is at most 24 bits long).
130 */
131 mulscc %o4, %o1, %o4 ! 1
132 mulscc %o4, %o1, %o4 ! 2
133 mulscc %o4, %o1, %o4 ! 3
134 mulscc %o4, %o1, %o4 ! 4
135 mulscc %o4, %o1, %o4 ! 5
136 mulscc %o4, %o1, %o4 ! 6
137 mulscc %o4, %o1, %o4 ! 7
138 mulscc %o4, %o1, %o4 ! 8
139 mulscc %o4, %o1, %o4 ! 9
140 mulscc %o4, %o1, %o4 ! 10
141 mulscc %o4, %o1, %o4 ! 11
142 mulscc %o4, %o1, %o4 ! 12
143 mulscc %o4, %g0, %o4 ! final shift
144
145 /*
146 * %o4 has 20 of the bits that should be in the result; %y has
147 * the bottom 12 (as %y's top 12). That is:
148 *
149 * %o4 %y
150 * +----------------+----------------+
151 * | -12- | -20- | -12- | -20- |
152 * +------(---------+------)---------+
153 * -----result-----
154 *
155 * The 12 bits of %o4 left of the `result' area are all zero;
156 * in fact, all top 20 bits of %o4 are zero.
157 */
158
159 rd %y, %o5
160 sll %o4, 12, %o0 ! shift middle bits left 12
161 srl %o5, 20, %o5 ! shift low bits right 20
162 or %o5, %o0, %o0
163 retl
164 addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
165
166 .globl .umul_patch
167.umul_patch:
168 umul %o0, %o1, %o0
169 retl
170 rd %y, %o1
171 nop
diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S
new file mode 100644
index 00000000000..77123eb83c4
--- /dev/null
+++ b/arch/sparc/lib/urem.S
@@ -0,0 +1,357 @@
1/*
2 * urem.S: This routine was taken from glibc-1.09 and is covered
3 * by the GNU Library General Public License Version 2.
4 */
5
6/* This file is generated from divrem.m4; DO NOT EDIT! */
7/*
8 * Division and remainder, from Appendix E of the Sparc Version 8
9 * Architecture Manual, with fixes from Gordon Irlam.
10 */
11
12/*
13 * Input: dividend and divisor in %o0 and %o1 respectively.
14 *
15 * m4 parameters:
16 * .urem name of function to generate
17 * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
18 * false false=true => signed; false=false => unsigned
19 *
20 * Algorithm parameters:
21 * N how many bits per iteration we try to get (4)
22 * WORDSIZE total number of bits (32)
23 *
24 * Derived constants:
25 * TOPBITS number of bits in the top decade of a number
26 *
27 * Important variables:
28 * Q the partial quotient under development (initially 0)
29 * R the remainder so far, initially the dividend
30 * ITER number of main division loop iterations required;
31 * equal to ceil(log2(quotient) / N). Note that this
32 * is the log base (2^N) of the quotient.
33 * V the current comparand, initially divisor*2^(ITER*N-1)
34 *
35 * Cost:
36 * Current estimate for non-large dividend is
37 * ceil(log2(quotient) / N) * (10 + 7N/2) + C
38 * A large dividend is one greater than 2^(31-TOPBITS) and takes a
39 * different path, as the upper bits of the quotient must be developed
40 * one bit at a time.
41 */
42
43 .globl .urem
44 .globl _Urem
45.urem:
46_Urem: /* needed for export */
47
48 ! Ready to divide. Compute size of quotient; scale comparand.
49 orcc %o1, %g0, %o5
50 bne 1f
51 mov %o0, %o3
52
53 ! Divide by zero trap. If it returns, return 0 (about as
54 ! wrong as possible, but that is what SunOS does...).
55 ta ST_DIV0
56 retl
57 clr %o0
58
591:
60 cmp %o3, %o5 ! if %o1 exceeds %o0, done
61 blu Lgot_result ! (and algorithm fails otherwise)
62 clr %o2
63
64 sethi %hi(1 << (32 - 4 - 1)), %g1
65
66 cmp %o3, %g1
67 blu Lnot_really_big
68 clr %o4
69
70 ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
71 ! as our usual N-at-a-shot divide step will cause overflow and havoc.
72 ! The number of bits in the result here is N*ITER+SC, where SC <= N.
73 ! Compute ITER in an unorthodox manner: know we need to shift V into
74 ! the top decade: so do not even bother to compare to R.
75 1:
76 cmp %o5, %g1
77 bgeu 3f
78 mov 1, %g7
79
80 sll %o5, 4, %o5
81
82 b 1b
83 add %o4, 1, %o4
84
85 ! Now compute %g7.
86 2:
87 addcc %o5, %o5, %o5
88 bcc Lnot_too_big
89 add %g7, 1, %g7
90
91 ! We get here if the %o1 overflowed while shifting.
92 ! This means that %o3 has the high-order bit set.
93 ! Restore %o5 and subtract from %o3.
94 sll %g1, 4, %g1 ! high order bit
95 srl %o5, 1, %o5 ! rest of %o5
96 add %o5, %g1, %o5
97
98 b Ldo_single_div
99 sub %g7, 1, %g7
100
101 Lnot_too_big:
102 3:
103 cmp %o5, %o3
104 blu 2b
105 nop
106
107 be Ldo_single_div
108 nop
109 /* NB: these are commented out in the V8-Sparc manual as well */
110 /* (I do not understand this) */
111 ! %o5 > %o3: went too far: back up 1 step
112 ! srl %o5, 1, %o5
113 ! dec %g7
114 ! do single-bit divide steps
115 !
116 ! We have to be careful here. We know that %o3 >= %o5, so we can do the
117 ! first divide step without thinking. BUT, the others are conditional,
118 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
119 ! order bit set in the first step, just falling into the regular
120 ! division loop will mess up the first time around.
121 ! So we unroll slightly...
122 Ldo_single_div:
123 subcc %g7, 1, %g7
124 bl Lend_regular_divide
125 nop
126
127 sub %o3, %o5, %o3
128 mov 1, %o2
129
130 b Lend_single_divloop
131 nop
132 Lsingle_divloop:
133 sll %o2, 1, %o2
134 bl 1f
135 srl %o5, 1, %o5
136 ! %o3 >= 0
137 sub %o3, %o5, %o3
138 b 2f
139 add %o2, 1, %o2
140 1: ! %o3 < 0
141 add %o3, %o5, %o3
142 sub %o2, 1, %o2
143 2:
144 Lend_single_divloop:
145 subcc %g7, 1, %g7
146 bge Lsingle_divloop
147 tst %o3
148
149 b,a Lend_regular_divide
150
151Lnot_really_big:
1521:
153 sll %o5, 4, %o5
154
155 cmp %o5, %o3
156 bleu 1b
157 addcc %o4, 1, %o4
158
159 be Lgot_result
160 sub %o4, 1, %o4
161
162 tst %o3 ! set up for initial iteration
163Ldivloop:
164 sll %o2, 4, %o2
165 ! depth 1, accumulated bits 0
166 bl L.1.16
167 srl %o5,1,%o5
168 ! remainder is positive
169 subcc %o3,%o5,%o3
170 ! depth 2, accumulated bits 1
171 bl L.2.17
172 srl %o5,1,%o5
173 ! remainder is positive
174 subcc %o3,%o5,%o3
175 ! depth 3, accumulated bits 3
176 bl L.3.19
177 srl %o5,1,%o5
178 ! remainder is positive
179 subcc %o3,%o5,%o3
180 ! depth 4, accumulated bits 7
181 bl L.4.23
182 srl %o5,1,%o5
183 ! remainder is positive
184 subcc %o3,%o5,%o3
185 b 9f
186 add %o2, (7*2+1), %o2
187
188L.4.23:
189 ! remainder is negative
190 addcc %o3,%o5,%o3
191 b 9f
192 add %o2, (7*2-1), %o2
193
194L.3.19:
195 ! remainder is negative
196 addcc %o3,%o5,%o3
197 ! depth 4, accumulated bits 5
198 bl L.4.21
199 srl %o5,1,%o5
200 ! remainder is positive
201 subcc %o3,%o5,%o3
202 b 9f
203 add %o2, (5*2+1), %o2
204
205L.4.21:
206 ! remainder is negative
207 addcc %o3,%o5,%o3
208 b 9f
209 add %o2, (5*2-1), %o2
210
211L.2.17:
212 ! remainder is negative
213 addcc %o3,%o5,%o3
214 ! depth 3, accumulated bits 1
215 bl L.3.17
216 srl %o5,1,%o5
217 ! remainder is positive
218 subcc %o3,%o5,%o3
219 ! depth 4, accumulated bits 3
220 bl L.4.19
221 srl %o5,1,%o5
222 ! remainder is positive
223 subcc %o3,%o5,%o3
224 b 9f
225 add %o2, (3*2+1), %o2
226
227L.4.19:
228 ! remainder is negative
229 addcc %o3,%o5,%o3
230 b 9f
231 add %o2, (3*2-1), %o2
232
233L.3.17:
234 ! remainder is negative
235 addcc %o3,%o5,%o3
236 ! depth 4, accumulated bits 1
237 bl L.4.17
238 srl %o5,1,%o5
239 ! remainder is positive
240 subcc %o3,%o5,%o3
241 b 9f
242 add %o2, (1*2+1), %o2
243
244L.4.17:
245 ! remainder is negative
246 addcc %o3,%o5,%o3
247 b 9f
248 add %o2, (1*2-1), %o2
249
250L.1.16:
251 ! remainder is negative
252 addcc %o3,%o5,%o3
253 ! depth 2, accumulated bits -1
254 bl L.2.15
255 srl %o5,1,%o5
256 ! remainder is positive
257 subcc %o3,%o5,%o3
258 ! depth 3, accumulated bits -1
259 bl L.3.15
260 srl %o5,1,%o5
261 ! remainder is positive
262 subcc %o3,%o5,%o3
263 ! depth 4, accumulated bits -1
264 bl L.4.15
265 srl %o5,1,%o5
266 ! remainder is positive
267 subcc %o3,%o5,%o3
268 b 9f
269 add %o2, (-1*2+1), %o2
270
271L.4.15:
272 ! remainder is negative
273 addcc %o3,%o5,%o3
274 b 9f
275 add %o2, (-1*2-1), %o2
276
277L.3.15:
278 ! remainder is negative
279 addcc %o3,%o5,%o3
280 ! depth 4, accumulated bits -3
281 bl L.4.13
282 srl %o5,1,%o5
283 ! remainder is positive
284 subcc %o3,%o5,%o3
285 b 9f
286 add %o2, (-3*2+1), %o2
287
288L.4.13:
289 ! remainder is negative
290 addcc %o3,%o5,%o3
291 b 9f
292 add %o2, (-3*2-1), %o2
293
294L.2.15:
295 ! remainder is negative
296 addcc %o3,%o5,%o3
297 ! depth 3, accumulated bits -3
298 bl L.3.13
299 srl %o5,1,%o5
300 ! remainder is positive
301 subcc %o3,%o5,%o3
302 ! depth 4, accumulated bits -5
303 bl L.4.11
304 srl %o5,1,%o5
305 ! remainder is positive
306 subcc %o3,%o5,%o3
307 b 9f
308 add %o2, (-5*2+1), %o2
309
310L.4.11:
311 ! remainder is negative
312 addcc %o3,%o5,%o3
313 b 9f
314 add %o2, (-5*2-1), %o2
315
316L.3.13:
317 ! remainder is negative
318 addcc %o3,%o5,%o3
319 ! depth 4, accumulated bits -7
320 bl L.4.9
321 srl %o5,1,%o5
322 ! remainder is positive
323 subcc %o3,%o5,%o3
324 b 9f
325 add %o2, (-7*2+1), %o2
326
327L.4.9:
328 ! remainder is negative
329 addcc %o3,%o5,%o3
330 b 9f
331 add %o2, (-7*2-1), %o2
332
333 9:
334Lend_regular_divide:
335 subcc %o4, 1, %o4
336 bge Ldivloop
337 tst %o3
338
339 bl,a Lgot_result
340 ! non-restoring fixup here (one instruction only!)
341 add %o3, %o1, %o3
342
343Lgot_result:
344
345 retl
346 mov %o3, %o0
347
348 .globl .urem_patch
349.urem_patch:
350 wr %g0, 0x0, %y
351 nop
352 nop
353 nop
354 udiv %o0, %o1, %o2
355 umul %o2, %o1, %o2
356 retl
357 sub %o0, %o2, %o0