diff options
Diffstat (limited to 'arch/arm/lib/div64.S')
-rw-r--r-- | arch/arm/lib/div64.S | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S new file mode 100644 index 000000000000..ec9a1cd6176f --- /dev/null +++ b/arch/arm/lib/div64.S | |||
@@ -0,0 +1,200 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/lib/div64.S | ||
3 | * | ||
4 | * Optimized computation of 64-bit dividend / 32-bit divisor | ||
5 | * | ||
6 | * Author: Nicolas Pitre | ||
7 | * Created: Oct 5, 2003 | ||
8 | * Copyright: Monta Vista Software, Inc. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | */ | ||
14 | |||
15 | #include <linux/linkage.h> | ||
16 | |||
17 | #ifdef __ARMEB__ | ||
18 | #define xh r0 | ||
19 | #define xl r1 | ||
20 | #define yh r2 | ||
21 | #define yl r3 | ||
22 | #else | ||
23 | #define xl r0 | ||
24 | #define xh r1 | ||
25 | #define yl r2 | ||
26 | #define yh r3 | ||
27 | #endif | ||
28 | |||
29 | /* | ||
30 | * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. | ||
31 | * | ||
32 | * Note: Calling convention is totally non standard for optimal code. | ||
33 | * This is meant to be used by do_div() from include/asm/div64.h only. | ||
34 | * | ||
35 | * Input parameters: | ||
36 | * xh-xl = dividend (clobbered) | ||
37 | * r4 = divisor (preserved) | ||
38 | * | ||
39 | * Output values: | ||
40 | * yh-yl = result | ||
41 | * xh = remainder | ||
42 | * | ||
43 | * Clobbered regs: xl, ip | ||
44 | */ | ||
45 | |||
46 | ENTRY(__do_div64) | ||
47 | |||
48 | @ Test for easy paths first. | ||
49 | subs ip, r4, #1 | ||
50 | bls 9f @ divisor is 0 or 1 | ||
51 | tst ip, r4 | ||
52 | beq 8f @ divisor is power of 2 | ||
53 | |||
54 | @ See if we need to handle upper 32-bit result. | ||
55 | cmp xh, r4 | ||
56 | mov yh, #0 | ||
57 | blo 3f | ||
58 | |||
59 | @ Align divisor with upper part of dividend. | ||
60 | @ The aligned divisor is stored in yl preserving the original. | ||
61 | @ The bit position is stored in ip. | ||
62 | |||
63 | #if __LINUX_ARM_ARCH__ >= 5 | ||
64 | |||
65 | clz yl, r4 | ||
66 | clz ip, xh | ||
67 | sub yl, yl, ip | ||
68 | mov ip, #1 | ||
69 | mov ip, ip, lsl yl | ||
70 | mov yl, r4, lsl yl | ||
71 | |||
72 | #else | ||
73 | |||
74 | mov yl, r4 | ||
75 | mov ip, #1 | ||
76 | 1: cmp yl, #0x80000000 | ||
77 | cmpcc yl, xh | ||
78 | movcc yl, yl, lsl #1 | ||
79 | movcc ip, ip, lsl #1 | ||
80 | bcc 1b | ||
81 | |||
82 | #endif | ||
83 | |||
84 | @ The division loop for needed upper bit positions. | ||
85 | @ Break out early if dividend reaches 0. | ||
86 | 2: cmp xh, yl | ||
87 | orrcs yh, yh, ip | ||
88 | subcss xh, xh, yl | ||
89 | movnes ip, ip, lsr #1 | ||
90 | mov yl, yl, lsr #1 | ||
91 | bne 2b | ||
92 | |||
93 | @ See if we need to handle lower 32-bit result. | ||
94 | 3: cmp xh, #0 | ||
95 | mov yl, #0 | ||
96 | cmpeq xl, r4 | ||
97 | movlo xh, xl | ||
98 | movlo pc, lr | ||
99 | |||
100 | @ The division loop for lower bit positions. | ||
101 | @ Here we shift remainer bits leftwards rather than moving the | ||
102 | @ divisor for comparisons, considering the carry-out bit as well. | ||
103 | mov ip, #0x80000000 | ||
104 | 4: movs xl, xl, lsl #1 | ||
105 | adcs xh, xh, xh | ||
106 | beq 6f | ||
107 | cmpcc xh, r4 | ||
108 | 5: orrcs yl, yl, ip | ||
109 | subcs xh, xh, r4 | ||
110 | movs ip, ip, lsr #1 | ||
111 | bne 4b | ||
112 | mov pc, lr | ||
113 | |||
114 | @ The top part of remainder became zero. If carry is set | ||
115 | @ (the 33th bit) this is a false positive so resume the loop. | ||
116 | @ Otherwise, if lower part is also null then we are done. | ||
117 | 6: bcs 5b | ||
118 | cmp xl, #0 | ||
119 | moveq pc, lr | ||
120 | |||
121 | @ We still have remainer bits in the low part. Bring them up. | ||
122 | |||
123 | #if __LINUX_ARM_ARCH__ >= 5 | ||
124 | |||
125 | clz xh, xl @ we know xh is zero here so... | ||
126 | add xh, xh, #1 | ||
127 | mov xl, xl, lsl xh | ||
128 | mov ip, ip, lsr xh | ||
129 | |||
130 | #else | ||
131 | |||
132 | 7: movs xl, xl, lsl #1 | ||
133 | mov ip, ip, lsr #1 | ||
134 | bcc 7b | ||
135 | |||
136 | #endif | ||
137 | |||
138 | @ Current remainder is now 1. It is worthless to compare with | ||
139 | @ divisor at this point since divisor can not be smaller than 3 here. | ||
140 | @ If possible, branch for another shift in the division loop. | ||
141 | @ If no bit position left then we are done. | ||
142 | movs ip, ip, lsr #1 | ||
143 | mov xh, #1 | ||
144 | bne 4b | ||
145 | mov pc, lr | ||
146 | |||
147 | 8: @ Division by a power of 2: determine what that divisor order is | ||
148 | @ then simply shift values around | ||
149 | |||
150 | #if __LINUX_ARM_ARCH__ >= 5 | ||
151 | |||
152 | clz ip, r4 | ||
153 | rsb ip, ip, #31 | ||
154 | |||
155 | #else | ||
156 | |||
157 | mov yl, r4 | ||
158 | cmp r4, #(1 << 16) | ||
159 | mov ip, #0 | ||
160 | movhs yl, yl, lsr #16 | ||
161 | movhs ip, #16 | ||
162 | |||
163 | cmp yl, #(1 << 8) | ||
164 | movhs yl, yl, lsr #8 | ||
165 | addhs ip, ip, #8 | ||
166 | |||
167 | cmp yl, #(1 << 4) | ||
168 | movhs yl, yl, lsr #4 | ||
169 | addhs ip, ip, #4 | ||
170 | |||
171 | cmp yl, #(1 << 2) | ||
172 | addhi ip, ip, #3 | ||
173 | addls ip, ip, yl, lsr #1 | ||
174 | |||
175 | #endif | ||
176 | |||
177 | mov yh, xh, lsr ip | ||
178 | mov yl, xl, lsr ip | ||
179 | rsb ip, ip, #32 | ||
180 | orr yl, yl, xh, lsl ip | ||
181 | mov xh, xl, lsl ip | ||
182 | mov xh, xh, lsr ip | ||
183 | mov pc, lr | ||
184 | |||
185 | @ eq -> division by 1: obvious enough... | ||
186 | 9: moveq yl, xl | ||
187 | moveq yh, xh | ||
188 | moveq xh, #0 | ||
189 | moveq pc, lr | ||
190 | |||
191 | @ Division by 0: | ||
192 | str lr, [sp, #-4]! | ||
193 | bl __div0 | ||
194 | |||
195 | @ as wrong as it could be... | ||
196 | mov yl, #0 | ||
197 | mov yh, #0 | ||
198 | mov xh, #0 | ||
199 | ldr pc, [sp], #4 | ||
200 | |||