diff options
Diffstat (limited to 'arch/sh/lib/checksum.S')
-rw-r--r-- | arch/sh/lib/checksum.S | 385 |
1 files changed, 385 insertions, 0 deletions
diff --git a/arch/sh/lib/checksum.S b/arch/sh/lib/checksum.S new file mode 100644 index 000000000000..7c50dfe68c07 --- /dev/null +++ b/arch/sh/lib/checksum.S | |||
@@ -0,0 +1,385 @@ | |||
1 | /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ | ||
2 | * | ||
3 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
4 | * operating system. INET is implemented using the BSD Socket | ||
5 | * interface as the means of communication with the user level. | ||
6 | * | ||
7 | * IP/TCP/UDP checksumming routines | ||
8 | * | ||
9 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | ||
10 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | ||
11 | * Tom May, <ftom@netcom.com> | ||
12 | * Pentium Pro/II routines: | ||
13 | * Alexander Kjeldaas <astor@guardian.no> | ||
14 | * Finn Arne Gangstad <finnag@guardian.no> | ||
15 | * Lots of code moved from tcp.c and ip.c; see those files | ||
16 | * for more names. | ||
17 | * | ||
18 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | ||
19 | * handling. | ||
20 | * Andi Kleen, add zeroing on error | ||
21 | * converted to pure assembler | ||
22 | * | ||
23 | * SuperH version: Copyright (C) 1999 Niibe Yutaka | ||
24 | * | ||
25 | * This program is free software; you can redistribute it and/or | ||
26 | * modify it under the terms of the GNU General Public License | ||
27 | * as published by the Free Software Foundation; either version | ||
28 | * 2 of the License, or (at your option) any later version. | ||
29 | */ | ||
30 | |||
31 | #include <asm/errno.h> | ||
32 | #include <linux/linkage.h> | ||
33 | |||
34 | /* | ||
35 | * computes a partial checksum, e.g. for TCP/UDP fragments | ||
36 | */ | ||
37 | |||
38 | /* | ||
39 | * unsigned int csum_partial(const unsigned char *buf, int len, | ||
40 | * unsigned int sum); | ||
41 | */ | ||
42 | |||
43 | .text | ||
44 | ENTRY(csum_partial) | ||
45 | /* | ||
46 | * Experiments with Ethernet and SLIP connections show that buff | ||
47 | * is aligned on either a 2-byte or 4-byte boundary. We get at | ||
48 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | ||
49 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | ||
50 | * alignment for the unrolled loop. | ||
51 | */ | ||
52 | mov r5, r1 | ||
53 | mov r4, r0 | ||
54 | tst #2, r0 ! Check alignment. | ||
55 | bt 2f ! Jump if alignment is ok. | ||
56 | ! | ||
57 | add #-2, r5 ! Alignment uses up two bytes. | ||
58 | cmp/pz r5 ! | ||
59 | bt/s 1f ! Jump if we had at least two bytes. | ||
60 | clrt | ||
61 | bra 6f | ||
62 | add #2, r5 ! r5 was < 2. Deal with it. | ||
63 | 1: | ||
64 | mov r5, r1 ! Save new len for later use. | ||
65 | mov.w @r4+, r0 | ||
66 | extu.w r0, r0 | ||
67 | addc r0, r6 | ||
68 | bf 2f | ||
69 | add #1, r6 | ||
70 | 2: | ||
71 | mov #-5, r0 | ||
72 | shld r0, r5 | ||
73 | tst r5, r5 | ||
74 | bt/s 4f ! if it's =0, go to 4f | ||
75 | clrt | ||
76 | .align 2 | ||
77 | 3: | ||
78 | mov.l @r4+, r0 | ||
79 | mov.l @r4+, r2 | ||
80 | mov.l @r4+, r3 | ||
81 | addc r0, r6 | ||
82 | mov.l @r4+, r0 | ||
83 | addc r2, r6 | ||
84 | mov.l @r4+, r2 | ||
85 | addc r3, r6 | ||
86 | mov.l @r4+, r3 | ||
87 | addc r0, r6 | ||
88 | mov.l @r4+, r0 | ||
89 | addc r2, r6 | ||
90 | mov.l @r4+, r2 | ||
91 | addc r3, r6 | ||
92 | addc r0, r6 | ||
93 | addc r2, r6 | ||
94 | movt r0 | ||
95 | dt r5 | ||
96 | bf/s 3b | ||
97 | cmp/eq #1, r0 | ||
98 | ! here, we know r5==0 | ||
99 | addc r5, r6 ! add carry to r6 | ||
100 | 4: | ||
101 | mov r1, r0 | ||
102 | and #0x1c, r0 | ||
103 | tst r0, r0 | ||
104 | bt/s 6f | ||
105 | mov r0, r5 | ||
106 | shlr2 r5 | ||
107 | mov #0, r2 | ||
108 | 5: | ||
109 | addc r2, r6 | ||
110 | mov.l @r4+, r2 | ||
111 | movt r0 | ||
112 | dt r5 | ||
113 | bf/s 5b | ||
114 | cmp/eq #1, r0 | ||
115 | addc r2, r6 | ||
116 | addc r5, r6 ! r5==0 here, so it means add carry-bit | ||
117 | 6: | ||
118 | mov r1, r5 | ||
119 | mov #3, r0 | ||
120 | and r0, r5 | ||
121 | tst r5, r5 | ||
122 | bt 9f ! if it's =0 go to 9f | ||
123 | mov #2, r1 | ||
124 | cmp/hs r1, r5 | ||
125 | bf 7f | ||
126 | mov.w @r4+, r0 | ||
127 | extu.w r0, r0 | ||
128 | cmp/eq r1, r5 | ||
129 | bt/s 8f | ||
130 | clrt | ||
131 | shll16 r0 | ||
132 | addc r0, r6 | ||
133 | 7: | ||
134 | mov.b @r4+, r0 | ||
135 | extu.b r0, r0 | ||
136 | #ifndef __LITTLE_ENDIAN__ | ||
137 | shll8 r0 | ||
138 | #endif | ||
139 | 8: | ||
140 | addc r0, r6 | ||
141 | mov #0, r0 | ||
142 | addc r0, r6 | ||
143 | 9: | ||
144 | rts | ||
145 | mov r6, r0 | ||
146 | |||
147 | /* | ||
148 | unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, | ||
149 | int sum, int *src_err_ptr, int *dst_err_ptr) | ||
150 | */ | ||
151 | |||
152 | /* | ||
153 | * Copy from ds while checksumming, otherwise like csum_partial | ||
154 | * | ||
155 | * The macros SRC and DST specify the type of access for the instruction. | ||
156 | * thus we can call a custom exception handler for all access types. | ||
157 | * | ||
158 | * FIXME: could someone double-check whether I haven't mixed up some SRC and | ||
159 | * DST definitions? It's damn hard to trigger all cases. I hope I got | ||
160 | * them all but there's no guarantee. | ||
161 | */ | ||
162 | |||
163 | #define SRC(...) \ | ||
164 | 9999: __VA_ARGS__ ; \ | ||
165 | .section __ex_table, "a"; \ | ||
166 | .long 9999b, 6001f ; \ | ||
167 | .previous | ||
168 | |||
169 | #define DST(...) \ | ||
170 | 9999: __VA_ARGS__ ; \ | ||
171 | .section __ex_table, "a"; \ | ||
172 | .long 9999b, 6002f ; \ | ||
173 | .previous | ||
174 | |||
175 | ! | ||
176 | ! r4: const char *SRC | ||
177 | ! r5: char *DST | ||
178 | ! r6: int LEN | ||
179 | ! r7: int SUM | ||
180 | ! | ||
181 | ! on stack: | ||
182 | ! int *SRC_ERR_PTR | ||
183 | ! int *DST_ERR_PTR | ||
184 | ! | ||
185 | ENTRY(csum_partial_copy_generic) | ||
186 | mov.l r5,@-r15 | ||
187 | mov.l r6,@-r15 | ||
188 | |||
189 | mov #3,r0 ! Check src and dest are equally aligned | ||
190 | mov r4,r1 | ||
191 | and r0,r1 | ||
192 | and r5,r0 | ||
193 | cmp/eq r1,r0 | ||
194 | bf 3f ! Different alignments, use slow version | ||
195 | tst #1,r0 ! Check dest word aligned | ||
196 | bf 3f ! If not, do it the slow way | ||
197 | |||
198 | mov #2,r0 | ||
199 | tst r0,r5 ! Check dest alignment. | ||
200 | bt 2f ! Jump if alignment is ok. | ||
201 | add #-2,r6 ! Alignment uses up two bytes. | ||
202 | cmp/pz r6 ! Jump if we had at least two bytes. | ||
203 | bt/s 1f | ||
204 | clrt | ||
205 | bra 4f | ||
206 | add #2,r6 ! r6 was < 2. Deal with it. | ||
207 | |||
208 | 3: ! Handle different src and dest alignments. | ||
209 | ! This is not common, so simple byte by byte copy will do. | ||
210 | mov r6,r2 | ||
211 | shlr r6 | ||
212 | tst r6,r6 | ||
213 | bt 4f | ||
214 | clrt | ||
215 | .align 2 | ||
216 | 5: | ||
217 | SRC( mov.b @r4+,r1 ) | ||
218 | SRC( mov.b @r4+,r0 ) | ||
219 | extu.b r1,r1 | ||
220 | DST( mov.b r1,@r5 ) | ||
221 | DST( mov.b r0,@(1,r5) ) | ||
222 | extu.b r0,r0 | ||
223 | add #2,r5 | ||
224 | |||
225 | #ifdef __LITTLE_ENDIAN__ | ||
226 | shll8 r0 | ||
227 | #else | ||
228 | shll8 r1 | ||
229 | #endif | ||
230 | or r1,r0 | ||
231 | |||
232 | addc r0,r7 | ||
233 | movt r0 | ||
234 | dt r6 | ||
235 | bf/s 5b | ||
236 | cmp/eq #1,r0 | ||
237 | mov #0,r0 | ||
238 | addc r0, r7 | ||
239 | |||
240 | mov r2, r0 | ||
241 | tst #1, r0 | ||
242 | bt 7f | ||
243 | bra 5f | ||
244 | clrt | ||
245 | |||
246 | ! src and dest equally aligned, but to a two byte boundary. | ||
247 | ! Handle first two bytes as a special case | ||
248 | .align 2 | ||
249 | 1: | ||
250 | SRC( mov.w @r4+,r0 ) | ||
251 | DST( mov.w r0,@r5 ) | ||
252 | add #2,r5 | ||
253 | extu.w r0,r0 | ||
254 | addc r0,r7 | ||
255 | mov #0,r0 | ||
256 | addc r0,r7 | ||
257 | 2: | ||
258 | mov r6,r2 | ||
259 | mov #-5,r0 | ||
260 | shld r0,r6 | ||
261 | tst r6,r6 | ||
262 | bt/s 2f | ||
263 | clrt | ||
264 | .align 2 | ||
265 | 1: | ||
266 | SRC( mov.l @r4+,r0 ) | ||
267 | SRC( mov.l @r4+,r1 ) | ||
268 | addc r0,r7 | ||
269 | DST( mov.l r0,@r5 ) | ||
270 | DST( mov.l r1,@(4,r5) ) | ||
271 | addc r1,r7 | ||
272 | |||
273 | SRC( mov.l @r4+,r0 ) | ||
274 | SRC( mov.l @r4+,r1 ) | ||
275 | addc r0,r7 | ||
276 | DST( mov.l r0,@(8,r5) ) | ||
277 | DST( mov.l r1,@(12,r5) ) | ||
278 | addc r1,r7 | ||
279 | |||
280 | SRC( mov.l @r4+,r0 ) | ||
281 | SRC( mov.l @r4+,r1 ) | ||
282 | addc r0,r7 | ||
283 | DST( mov.l r0,@(16,r5) ) | ||
284 | DST( mov.l r1,@(20,r5) ) | ||
285 | addc r1,r7 | ||
286 | |||
287 | SRC( mov.l @r4+,r0 ) | ||
288 | SRC( mov.l @r4+,r1 ) | ||
289 | addc r0,r7 | ||
290 | DST( mov.l r0,@(24,r5) ) | ||
291 | DST( mov.l r1,@(28,r5) ) | ||
292 | addc r1,r7 | ||
293 | add #32,r5 | ||
294 | movt r0 | ||
295 | dt r6 | ||
296 | bf/s 1b | ||
297 | cmp/eq #1,r0 | ||
298 | mov #0,r0 | ||
299 | addc r0,r7 | ||
300 | |||
301 | 2: mov r2,r6 | ||
302 | mov #0x1c,r0 | ||
303 | and r0,r6 | ||
304 | cmp/pl r6 | ||
305 | bf/s 4f | ||
306 | clrt | ||
307 | shlr2 r6 | ||
308 | 3: | ||
309 | SRC( mov.l @r4+,r0 ) | ||
310 | addc r0,r7 | ||
311 | DST( mov.l r0,@r5 ) | ||
312 | add #4,r5 | ||
313 | movt r0 | ||
314 | dt r6 | ||
315 | bf/s 3b | ||
316 | cmp/eq #1,r0 | ||
317 | mov #0,r0 | ||
318 | addc r0,r7 | ||
319 | 4: mov r2,r6 | ||
320 | mov #3,r0 | ||
321 | and r0,r6 | ||
322 | cmp/pl r6 | ||
323 | bf 7f | ||
324 | mov #2,r1 | ||
325 | cmp/hs r1,r6 | ||
326 | bf 5f | ||
327 | SRC( mov.w @r4+,r0 ) | ||
328 | DST( mov.w r0,@r5 ) | ||
329 | extu.w r0,r0 | ||
330 | add #2,r5 | ||
331 | cmp/eq r1,r6 | ||
332 | bt/s 6f | ||
333 | clrt | ||
334 | shll16 r0 | ||
335 | addc r0,r7 | ||
336 | 5: | ||
337 | SRC( mov.b @r4+,r0 ) | ||
338 | DST( mov.b r0,@r5 ) | ||
339 | extu.b r0,r0 | ||
340 | #ifndef __LITTLE_ENDIAN__ | ||
341 | shll8 r0 | ||
342 | #endif | ||
343 | 6: addc r0,r7 | ||
344 | mov #0,r0 | ||
345 | addc r0,r7 | ||
346 | 7: | ||
347 | 5000: | ||
348 | |||
349 | # Exception handler: | ||
350 | .section .fixup, "ax" | ||
351 | |||
352 | 6001: | ||
353 | mov.l @(8,r15),r0 ! src_err_ptr | ||
354 | mov #-EFAULT,r1 | ||
355 | mov.l r1,@r0 | ||
356 | |||
357 | ! zero the complete destination - computing the rest | ||
358 | ! is too much work | ||
359 | mov.l @(4,r15),r5 ! dst | ||
360 | mov.l @r15,r6 ! len | ||
361 | mov #0,r7 | ||
362 | 1: mov.b r7,@r5 | ||
363 | dt r6 | ||
364 | bf/s 1b | ||
365 | add #1,r5 | ||
366 | mov.l 8000f,r0 | ||
367 | jmp @r0 | ||
368 | nop | ||
369 | .align 2 | ||
370 | 8000: .long 5000b | ||
371 | |||
372 | 6002: | ||
373 | mov.l @(12,r15),r0 ! dst_err_ptr | ||
374 | mov #-EFAULT,r1 | ||
375 | mov.l r1,@r0 | ||
376 | mov.l 8001f,r0 | ||
377 | jmp @r0 | ||
378 | nop | ||
379 | .align 2 | ||
380 | 8001: .long 5000b | ||
381 | |||
382 | .previous | ||
383 | add #8,r15 | ||
384 | rts | ||
385 | mov r7,r0 | ||