diff options
-rw-r--r-- | arch/mips/lib/csum_partial.S | 144 |
1 files changed, 74 insertions, 70 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 15611d9df7ac..3bffdbb1c1f9 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -12,19 +12,23 @@ | |||
12 | #include <asm/regdef.h> | 12 | #include <asm/regdef.h> |
13 | 13 | ||
14 | #ifdef CONFIG_64BIT | 14 | #ifdef CONFIG_64BIT |
15 | #define T0 ta0 | 15 | /* |
16 | #define T1 ta1 | 16 | * As we are sharing code base with the mips32 tree (which use the o32 ABI |
17 | #define T2 ta2 | 17 | * register definitions). We need to redefine the register definitions from |
18 | #define T3 ta3 | 18 | * the n64 ABI register naming to the o32 ABI register naming. |
19 | #define T4 t0 | 19 | */ |
20 | #define T7 t3 | 20 | #undef t0 |
21 | #else | 21 | #undef t1 |
22 | #define T0 t0 | 22 | #undef t2 |
23 | #define T1 t1 | 23 | #undef t3 |
24 | #define T2 t2 | 24 | #define t0 $8 |
25 | #define T3 t3 | 25 | #define t1 $9 |
26 | #define T4 t4 | 26 | #define t2 $10 |
27 | #define T7 t7 | 27 | #define t3 $11 |
28 | #define t4 $12 | ||
29 | #define t5 $13 | ||
30 | #define t6 $14 | ||
31 | #define t7 $15 | ||
28 | #endif | 32 | #endif |
29 | 33 | ||
30 | #define ADDC(sum,reg) \ | 34 | #define ADDC(sum,reg) \ |
@@ -64,37 +68,37 @@ | |||
64 | 68 | ||
65 | /* unknown src alignment and < 8 bytes to go */ | 69 | /* unknown src alignment and < 8 bytes to go */ |
66 | small_csumcpy: | 70 | small_csumcpy: |
67 | move a1, T2 | 71 | move a1, t2 |
68 | 72 | ||
69 | andi T0, a1, 4 | 73 | andi t0, a1, 4 |
70 | beqz T0, 1f | 74 | beqz t0, 1f |
71 | andi T0, a1, 2 | 75 | andi t0, a1, 2 |
72 | 76 | ||
73 | /* Still a full word to go */ | 77 | /* Still a full word to go */ |
74 | ulw T1, (src) | 78 | ulw t1, (src) |
75 | PTR_ADDIU src, 4 | 79 | PTR_ADDIU src, 4 |
76 | ADDC(sum, T1) | 80 | ADDC(sum, t1) |
77 | 81 | ||
78 | 1: move T1, zero | 82 | 1: move t1, zero |
79 | beqz T0, 1f | 83 | beqz t0, 1f |
80 | andi T0, a1, 1 | 84 | andi t0, a1, 1 |
81 | 85 | ||
82 | /* Still a halfword to go */ | 86 | /* Still a halfword to go */ |
83 | ulhu T1, (src) | 87 | ulhu t1, (src) |
84 | PTR_ADDIU src, 2 | 88 | PTR_ADDIU src, 2 |
85 | 89 | ||
86 | 1: beqz T0, 1f | 90 | 1: beqz t0, 1f |
87 | sll T1, T1, 16 | 91 | sll t1, t1, 16 |
88 | 92 | ||
89 | lbu T2, (src) | 93 | lbu t2, (src) |
90 | nop | 94 | nop |
91 | 95 | ||
92 | #ifdef __MIPSEB__ | 96 | #ifdef __MIPSEB__ |
93 | sll T2, T2, 8 | 97 | sll t2, t2, 8 |
94 | #endif | 98 | #endif |
95 | or T1, T2 | 99 | or t1, t2 |
96 | 100 | ||
97 | 1: ADDC(sum, T1) | 101 | 1: ADDC(sum, t1) |
98 | 102 | ||
99 | /* fold checksum */ | 103 | /* fold checksum */ |
100 | sll v1, sum, 16 | 104 | sll v1, sum, 16 |
@@ -104,7 +108,7 @@ small_csumcpy: | |||
104 | addu sum, v1 | 108 | addu sum, v1 |
105 | 109 | ||
106 | /* odd buffer alignment? */ | 110 | /* odd buffer alignment? */ |
107 | beqz T7, 1f | 111 | beqz t7, 1f |
108 | nop | 112 | nop |
109 | sll v1, sum, 8 | 113 | sll v1, sum, 8 |
110 | srl sum, sum, 8 | 114 | srl sum, sum, 8 |
@@ -122,25 +126,25 @@ small_csumcpy: | |||
122 | .align 5 | 126 | .align 5 |
123 | LEAF(csum_partial) | 127 | LEAF(csum_partial) |
124 | move sum, zero | 128 | move sum, zero |
125 | move T7, zero | 129 | move t7, zero |
126 | 130 | ||
127 | sltiu t8, a1, 0x8 | 131 | sltiu t8, a1, 0x8 |
128 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | 132 | bnez t8, small_csumcpy /* < 8 bytes to copy */ |
129 | move T2, a1 | 133 | move t2, a1 |
130 | 134 | ||
131 | beqz a1, out | 135 | beqz a1, out |
132 | andi T7, src, 0x1 /* odd buffer? */ | 136 | andi t7, src, 0x1 /* odd buffer? */ |
133 | 137 | ||
134 | hword_align: | 138 | hword_align: |
135 | beqz T7, word_align | 139 | beqz t7, word_align |
136 | andi t8, src, 0x2 | 140 | andi t8, src, 0x2 |
137 | 141 | ||
138 | lbu T0, (src) | 142 | lbu t0, (src) |
139 | LONG_SUBU a1, a1, 0x1 | 143 | LONG_SUBU a1, a1, 0x1 |
140 | #ifdef __MIPSEL__ | 144 | #ifdef __MIPSEL__ |
141 | sll T0, T0, 8 | 145 | sll t0, t0, 8 |
142 | #endif | 146 | #endif |
143 | ADDC(sum, T0) | 147 | ADDC(sum, t0) |
144 | PTR_ADDU src, src, 0x1 | 148 | PTR_ADDU src, src, 0x1 |
145 | andi t8, src, 0x2 | 149 | andi t8, src, 0x2 |
146 | 150 | ||
@@ -148,9 +152,9 @@ word_align: | |||
148 | beqz t8, dword_align | 152 | beqz t8, dword_align |
149 | sltiu t8, a1, 56 | 153 | sltiu t8, a1, 56 |
150 | 154 | ||
151 | lhu T0, (src) | 155 | lhu t0, (src) |
152 | LONG_SUBU a1, a1, 0x2 | 156 | LONG_SUBU a1, a1, 0x2 |
153 | ADDC(sum, T0) | 157 | ADDC(sum, t0) |
154 | sltiu t8, a1, 56 | 158 | sltiu t8, a1, 56 |
155 | PTR_ADDU src, src, 0x2 | 159 | PTR_ADDU src, src, 0x2 |
156 | 160 | ||
@@ -162,9 +166,9 @@ dword_align: | |||
162 | beqz t8, qword_align | 166 | beqz t8, qword_align |
163 | andi t8, src, 0x8 | 167 | andi t8, src, 0x8 |
164 | 168 | ||
165 | lw T0, 0x00(src) | 169 | lw t0, 0x00(src) |
166 | LONG_SUBU a1, a1, 0x4 | 170 | LONG_SUBU a1, a1, 0x4 |
167 | ADDC(sum, T0) | 171 | ADDC(sum, t0) |
168 | PTR_ADDU src, src, 0x4 | 172 | PTR_ADDU src, src, 0x4 |
169 | andi t8, src, 0x8 | 173 | andi t8, src, 0x8 |
170 | 174 | ||
@@ -172,11 +176,11 @@ qword_align: | |||
172 | beqz t8, oword_align | 176 | beqz t8, oword_align |
173 | andi t8, src, 0x10 | 177 | andi t8, src, 0x10 |
174 | 178 | ||
175 | lw T0, 0x00(src) | 179 | lw t0, 0x00(src) |
176 | lw T1, 0x04(src) | 180 | lw t1, 0x04(src) |
177 | LONG_SUBU a1, a1, 0x8 | 181 | LONG_SUBU a1, a1, 0x8 |
178 | ADDC(sum, T0) | 182 | ADDC(sum, t0) |
179 | ADDC(sum, T1) | 183 | ADDC(sum, t1) |
180 | PTR_ADDU src, src, 0x8 | 184 | PTR_ADDU src, src, 0x8 |
181 | andi t8, src, 0x10 | 185 | andi t8, src, 0x10 |
182 | 186 | ||
@@ -184,46 +188,46 @@ oword_align: | |||
184 | beqz t8, begin_movement | 188 | beqz t8, begin_movement |
185 | LONG_SRL t8, a1, 0x7 | 189 | LONG_SRL t8, a1, 0x7 |
186 | 190 | ||
187 | lw T3, 0x08(src) | 191 | lw t3, 0x08(src) |
188 | lw T4, 0x0c(src) | 192 | lw t4, 0x0c(src) |
189 | lw T0, 0x00(src) | 193 | lw t0, 0x00(src) |
190 | lw T1, 0x04(src) | 194 | lw t1, 0x04(src) |
191 | ADDC(sum, T3) | 195 | ADDC(sum, t3) |
192 | ADDC(sum, T4) | 196 | ADDC(sum, t4) |
193 | ADDC(sum, T0) | 197 | ADDC(sum, t0) |
194 | ADDC(sum, T1) | 198 | ADDC(sum, t1) |
195 | LONG_SUBU a1, a1, 0x10 | 199 | LONG_SUBU a1, a1, 0x10 |
196 | PTR_ADDU src, src, 0x10 | 200 | PTR_ADDU src, src, 0x10 |
197 | LONG_SRL t8, a1, 0x7 | 201 | LONG_SRL t8, a1, 0x7 |
198 | 202 | ||
199 | begin_movement: | 203 | begin_movement: |
200 | beqz t8, 1f | 204 | beqz t8, 1f |
201 | andi T2, a1, 0x40 | 205 | andi t2, a1, 0x40 |
202 | 206 | ||
203 | move_128bytes: | 207 | move_128bytes: |
204 | CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) | 208 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
205 | CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) | 209 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) |
206 | CSUM_BIGCHUNK(src, 0x40, sum, T0, T1, T3, T4) | 210 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) |
207 | CSUM_BIGCHUNK(src, 0x60, sum, T0, T1, T3, T4) | 211 | CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) |
208 | LONG_SUBU t8, t8, 0x01 | 212 | LONG_SUBU t8, t8, 0x01 |
209 | bnez t8, move_128bytes | 213 | bnez t8, move_128bytes |
210 | PTR_ADDU src, src, 0x80 | 214 | PTR_ADDU src, src, 0x80 |
211 | 215 | ||
212 | 1: | 216 | 1: |
213 | beqz T2, 1f | 217 | beqz t2, 1f |
214 | andi T2, a1, 0x20 | 218 | andi t2, a1, 0x20 |
215 | 219 | ||
216 | move_64bytes: | 220 | move_64bytes: |
217 | CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) | 221 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
218 | CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) | 222 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) |
219 | PTR_ADDU src, src, 0x40 | 223 | PTR_ADDU src, src, 0x40 |
220 | 224 | ||
221 | 1: | 225 | 1: |
222 | beqz T2, do_end_words | 226 | beqz t2, do_end_words |
223 | andi t8, a1, 0x1c | 227 | andi t8, a1, 0x1c |
224 | 228 | ||
225 | move_32bytes: | 229 | move_32bytes: |
226 | CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) | 230 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
227 | andi t8, a1, 0x1c | 231 | andi t8, a1, 0x1c |
228 | PTR_ADDU src, src, 0x20 | 232 | PTR_ADDU src, src, 0x20 |
229 | 233 | ||
@@ -232,22 +236,22 @@ do_end_words: | |||
232 | LONG_SRL t8, t8, 0x2 | 236 | LONG_SRL t8, t8, 0x2 |
233 | 237 | ||
234 | end_words: | 238 | end_words: |
235 | lw T0, (src) | 239 | lw t0, (src) |
236 | LONG_SUBU t8, t8, 0x1 | 240 | LONG_SUBU t8, t8, 0x1 |
237 | ADDC(sum, T0) | 241 | ADDC(sum, t0) |
238 | bnez t8, end_words | 242 | bnez t8, end_words |
239 | PTR_ADDU src, src, 0x4 | 243 | PTR_ADDU src, src, 0x4 |
240 | 244 | ||
241 | maybe_end_cruft: | 245 | maybe_end_cruft: |
242 | andi T2, a1, 0x3 | 246 | andi t2, a1, 0x3 |
243 | 247 | ||
244 | small_memcpy: | 248 | small_memcpy: |
245 | j small_csumcpy; move a1, T2 /* XXX ??? */ | 249 | j small_csumcpy; move a1, t2 /* XXX ??? */ |
246 | beqz t2, out | 250 | beqz t2, out |
247 | move a1, T2 | 251 | move a1, t2 |
248 | 252 | ||
249 | end_bytes: | 253 | end_bytes: |
250 | lb T0, (src) | 254 | lb t0, (src) |
251 | LONG_SUBU a1, a1, 0x1 | 255 | LONG_SUBU a1, a1, 0x1 |
252 | bnez a2, end_bytes | 256 | bnez a2, end_bytes |
253 | PTR_ADDU src, src, 0x1 | 257 | PTR_ADDU src, src, 0x1 |