diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/um/sys-i386/checksum.S |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/um/sys-i386/checksum.S')
-rw-r--r-- | arch/um/sys-i386/checksum.S | 460 |
1 files changed, 460 insertions, 0 deletions
diff --git a/arch/um/sys-i386/checksum.S b/arch/um/sys-i386/checksum.S new file mode 100644 index 000000000000..a11171fb6223 --- /dev/null +++ b/arch/um/sys-i386/checksum.S | |||
@@ -0,0 +1,460 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * IP/TCP/UDP checksumming routines | ||
7 | * | ||
8 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | ||
9 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | ||
10 | * Tom May, <ftom@netcom.com> | ||
11 | * Pentium Pro/II routines: | ||
12 | * Alexander Kjeldaas <astor@guardian.no> | ||
13 | * Finn Arne Gangstad <finnag@guardian.no> | ||
14 | * Lots of code moved from tcp.c and ip.c; see those files | ||
15 | * for more names. | ||
16 | * | ||
17 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | ||
18 | * handling. | ||
19 | * Andi Kleen, add zeroing on error | ||
20 | * converted to pure assembler | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or | ||
23 | * modify it under the terms of the GNU General Public License | ||
24 | * as published by the Free Software Foundation; either version | ||
25 | * 2 of the License, or (at your option) any later version. | ||
26 | */ | ||
27 | |||
28 | #include <linux/config.h> | ||
29 | #include <asm/errno.h> | ||
30 | |||
31 | /* | ||
32 | * computes a partial checksum, e.g. for TCP/UDP fragments | ||
33 | */ | ||
34 | |||
35 | /* | ||
36 | unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | ||
37 | */ | ||
38 | |||
39 | .text | ||
40 | .align 4 | ||
41 | .globl arch_csum_partial | ||
42 | |||
43 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM | ||
44 | |||
45 | /* | ||
46 | * Experiments with Ethernet and SLIP connections show that buff | ||
47 | * is aligned on either a 2-byte or 4-byte boundary. We get at | ||
48 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | ||
49 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | ||
50 | * alignment for the unrolled loop. | ||
51 | */ | ||
52 | arch_csum_partial: | ||
53 | pushl %esi | ||
54 | pushl %ebx | ||
55 | movl 20(%esp),%eax # Function arg: unsigned int sum | ||
56 | movl 16(%esp),%ecx # Function arg: int len | ||
57 | movl 12(%esp),%esi # Function arg: unsigned char *buff | ||
58 | testl $2, %esi # Check alignment. | ||
59 | jz 2f # Jump if alignment is ok. | ||
60 | subl $2, %ecx # Alignment uses up two bytes. | ||
61 | jae 1f # Jump if we had at least two bytes. | ||
62 | addl $2, %ecx # ecx was < 2. Deal with it. | ||
63 | jmp 4f | ||
64 | 1: movw (%esi), %bx | ||
65 | addl $2, %esi | ||
66 | addw %bx, %ax | ||
67 | adcl $0, %eax | ||
68 | 2: | ||
69 | movl %ecx, %edx | ||
70 | shrl $5, %ecx | ||
71 | jz 2f | ||
72 | testl %esi, %esi | ||
73 | 1: movl (%esi), %ebx | ||
74 | adcl %ebx, %eax | ||
75 | movl 4(%esi), %ebx | ||
76 | adcl %ebx, %eax | ||
77 | movl 8(%esi), %ebx | ||
78 | adcl %ebx, %eax | ||
79 | movl 12(%esi), %ebx | ||
80 | adcl %ebx, %eax | ||
81 | movl 16(%esi), %ebx | ||
82 | adcl %ebx, %eax | ||
83 | movl 20(%esi), %ebx | ||
84 | adcl %ebx, %eax | ||
85 | movl 24(%esi), %ebx | ||
86 | adcl %ebx, %eax | ||
87 | movl 28(%esi), %ebx | ||
88 | adcl %ebx, %eax | ||
89 | lea 32(%esi), %esi | ||
90 | dec %ecx | ||
91 | jne 1b | ||
92 | adcl $0, %eax | ||
93 | 2: movl %edx, %ecx | ||
94 | andl $0x1c, %edx | ||
95 | je 4f | ||
96 | shrl $2, %edx # This clears CF | ||
97 | 3: adcl (%esi), %eax | ||
98 | lea 4(%esi), %esi | ||
99 | dec %edx | ||
100 | jne 3b | ||
101 | adcl $0, %eax | ||
102 | 4: andl $3, %ecx | ||
103 | jz 7f | ||
104 | cmpl $2, %ecx | ||
105 | jb 5f | ||
106 | movw (%esi),%cx | ||
107 | leal 2(%esi),%esi | ||
108 | je 6f | ||
109 | shll $16,%ecx | ||
110 | 5: movb (%esi),%cl | ||
111 | 6: addl %ecx,%eax | ||
112 | adcl $0, %eax | ||
113 | 7: | ||
114 | popl %ebx | ||
115 | popl %esi | ||
116 | ret | ||
117 | |||
118 | #else | ||
119 | |||
120 | /* Version for PentiumII/PPro */ | ||
121 | |||
122 | arch_csum_partial: | ||
123 | pushl %esi | ||
124 | pushl %ebx | ||
125 | movl 20(%esp),%eax # Function arg: unsigned int sum | ||
126 | movl 16(%esp),%ecx # Function arg: int len | ||
127 | movl 12(%esp),%esi # Function arg: const unsigned char *buf | ||
128 | |||
129 | testl $2, %esi | ||
130 | jnz 30f | ||
131 | 10: | ||
132 | movl %ecx, %edx | ||
133 | movl %ecx, %ebx | ||
134 | andl $0x7c, %ebx | ||
135 | shrl $7, %ecx | ||
136 | addl %ebx,%esi | ||
137 | shrl $2, %ebx | ||
138 | negl %ebx | ||
139 | lea 45f(%ebx,%ebx,2), %ebx | ||
140 | testl %esi, %esi | ||
141 | jmp *%ebx | ||
142 | |||
143 | # Handle 2-byte-aligned regions | ||
144 | 20: addw (%esi), %ax | ||
145 | lea 2(%esi), %esi | ||
146 | adcl $0, %eax | ||
147 | jmp 10b | ||
148 | |||
149 | 30: subl $2, %ecx | ||
150 | ja 20b | ||
151 | je 32f | ||
152 | movzbl (%esi),%ebx # csumming 1 byte, 2-aligned | ||
153 | addl %ebx, %eax | ||
154 | adcl $0, %eax | ||
155 | jmp 80f | ||
156 | 32: | ||
157 | addw (%esi), %ax # csumming 2 bytes, 2-aligned | ||
158 | adcl $0, %eax | ||
159 | jmp 80f | ||
160 | |||
161 | 40: | ||
162 | addl -128(%esi), %eax | ||
163 | adcl -124(%esi), %eax | ||
164 | adcl -120(%esi), %eax | ||
165 | adcl -116(%esi), %eax | ||
166 | adcl -112(%esi), %eax | ||
167 | adcl -108(%esi), %eax | ||
168 | adcl -104(%esi), %eax | ||
169 | adcl -100(%esi), %eax | ||
170 | adcl -96(%esi), %eax | ||
171 | adcl -92(%esi), %eax | ||
172 | adcl -88(%esi), %eax | ||
173 | adcl -84(%esi), %eax | ||
174 | adcl -80(%esi), %eax | ||
175 | adcl -76(%esi), %eax | ||
176 | adcl -72(%esi), %eax | ||
177 | adcl -68(%esi), %eax | ||
178 | adcl -64(%esi), %eax | ||
179 | adcl -60(%esi), %eax | ||
180 | adcl -56(%esi), %eax | ||
181 | adcl -52(%esi), %eax | ||
182 | adcl -48(%esi), %eax | ||
183 | adcl -44(%esi), %eax | ||
184 | adcl -40(%esi), %eax | ||
185 | adcl -36(%esi), %eax | ||
186 | adcl -32(%esi), %eax | ||
187 | adcl -28(%esi), %eax | ||
188 | adcl -24(%esi), %eax | ||
189 | adcl -20(%esi), %eax | ||
190 | adcl -16(%esi), %eax | ||
191 | adcl -12(%esi), %eax | ||
192 | adcl -8(%esi), %eax | ||
193 | adcl -4(%esi), %eax | ||
194 | 45: | ||
195 | lea 128(%esi), %esi | ||
196 | adcl $0, %eax | ||
197 | dec %ecx | ||
198 | jge 40b | ||
199 | movl %edx, %ecx | ||
200 | 50: andl $3, %ecx | ||
201 | jz 80f | ||
202 | |||
203 | # Handle the last 1-3 bytes without jumping | ||
204 | notl %ecx # 1->2, 2->1, 3->0, higher bits are masked | ||
205 | movl $0xffffff,%ebx # by the shll and shrl instructions | ||
206 | shll $3,%ecx | ||
207 | shrl %cl,%ebx | ||
208 | andl -128(%esi),%ebx # esi is 4-aligned so should be ok | ||
209 | addl %ebx,%eax | ||
210 | adcl $0,%eax | ||
211 | 80: | ||
212 | popl %ebx | ||
213 | popl %esi | ||
214 | ret | ||
215 | |||
216 | #endif | ||
217 | |||
218 | /* | ||
219 | unsigned int csum_partial_copy_generic (const char *src, char *dst, | ||
220 | int len, int sum, int *src_err_ptr, int *dst_err_ptr) | ||
221 | */ | ||
222 | |||
223 | /* | ||
224 | * Copy from ds while checksumming, otherwise like csum_partial | ||
225 | * | ||
226 | * The macros SRC and DST specify the type of access for the instruction. | ||
227 | * thus we can call a custom exception handler for all access types. | ||
228 | * | ||
229 | * FIXME: could someone double-check whether I haven't mixed up some SRC and | ||
230 | * DST definitions? It's damn hard to trigger all cases. I hope I got | ||
231 | * them all but there's no guarantee. | ||
232 | */ | ||
233 | |||
234 | #define SRC(y...) \ | ||
235 | 9999: y; \ | ||
236 | .section __ex_table, "a"; \ | ||
237 | .long 9999b, 6001f ; \ | ||
238 | .previous | ||
239 | |||
240 | #define DST(y...) \ | ||
241 | 9999: y; \ | ||
242 | .section __ex_table, "a"; \ | ||
243 | .long 9999b, 6002f ; \ | ||
244 | .previous | ||
245 | |||
246 | .align 4 | ||
247 | .globl csum_partial_copy_generic_i386 | ||
248 | |||
249 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM | ||
250 | |||
251 | #define ARGBASE 16 | ||
252 | #define FP 12 | ||
253 | |||
254 | csum_partial_copy_generic_i386: | ||
255 | subl $4,%esp | ||
256 | pushl %edi | ||
257 | pushl %esi | ||
258 | pushl %ebx | ||
259 | movl ARGBASE+16(%esp),%eax # sum | ||
260 | movl ARGBASE+12(%esp),%ecx # len | ||
261 | movl ARGBASE+4(%esp),%esi # src | ||
262 | movl ARGBASE+8(%esp),%edi # dst | ||
263 | |||
264 | testl $2, %edi # Check alignment. | ||
265 | jz 2f # Jump if alignment is ok. | ||
266 | subl $2, %ecx # Alignment uses up two bytes. | ||
267 | jae 1f # Jump if we had at least two bytes. | ||
268 | addl $2, %ecx # ecx was < 2. Deal with it. | ||
269 | jmp 4f | ||
270 | SRC(1: movw (%esi), %bx ) | ||
271 | addl $2, %esi | ||
272 | DST( movw %bx, (%edi) ) | ||
273 | addl $2, %edi | ||
274 | addw %bx, %ax | ||
275 | adcl $0, %eax | ||
276 | 2: | ||
277 | movl %ecx, FP(%esp) | ||
278 | shrl $5, %ecx | ||
279 | jz 2f | ||
280 | testl %esi, %esi | ||
281 | SRC(1: movl (%esi), %ebx ) | ||
282 | SRC( movl 4(%esi), %edx ) | ||
283 | adcl %ebx, %eax | ||
284 | DST( movl %ebx, (%edi) ) | ||
285 | adcl %edx, %eax | ||
286 | DST( movl %edx, 4(%edi) ) | ||
287 | |||
288 | SRC( movl 8(%esi), %ebx ) | ||
289 | SRC( movl 12(%esi), %edx ) | ||
290 | adcl %ebx, %eax | ||
291 | DST( movl %ebx, 8(%edi) ) | ||
292 | adcl %edx, %eax | ||
293 | DST( movl %edx, 12(%edi) ) | ||
294 | |||
295 | SRC( movl 16(%esi), %ebx ) | ||
296 | SRC( movl 20(%esi), %edx ) | ||
297 | adcl %ebx, %eax | ||
298 | DST( movl %ebx, 16(%edi) ) | ||
299 | adcl %edx, %eax | ||
300 | DST( movl %edx, 20(%edi) ) | ||
301 | |||
302 | SRC( movl 24(%esi), %ebx ) | ||
303 | SRC( movl 28(%esi), %edx ) | ||
304 | adcl %ebx, %eax | ||
305 | DST( movl %ebx, 24(%edi) ) | ||
306 | adcl %edx, %eax | ||
307 | DST( movl %edx, 28(%edi) ) | ||
308 | |||
309 | lea 32(%esi), %esi | ||
310 | lea 32(%edi), %edi | ||
311 | dec %ecx | ||
312 | jne 1b | ||
313 | adcl $0, %eax | ||
314 | 2: movl FP(%esp), %edx | ||
315 | movl %edx, %ecx | ||
316 | andl $0x1c, %edx | ||
317 | je 4f | ||
318 | shrl $2, %edx # This clears CF | ||
319 | SRC(3: movl (%esi), %ebx ) | ||
320 | adcl %ebx, %eax | ||
321 | DST( movl %ebx, (%edi) ) | ||
322 | lea 4(%esi), %esi | ||
323 | lea 4(%edi), %edi | ||
324 | dec %edx | ||
325 | jne 3b | ||
326 | adcl $0, %eax | ||
327 | 4: andl $3, %ecx | ||
328 | jz 7f | ||
329 | cmpl $2, %ecx | ||
330 | jb 5f | ||
331 | SRC( movw (%esi), %cx ) | ||
332 | leal 2(%esi), %esi | ||
333 | DST( movw %cx, (%edi) ) | ||
334 | leal 2(%edi), %edi | ||
335 | je 6f | ||
336 | shll $16,%ecx | ||
337 | SRC(5: movb (%esi), %cl ) | ||
338 | DST( movb %cl, (%edi) ) | ||
339 | 6: addl %ecx, %eax | ||
340 | adcl $0, %eax | ||
341 | 7: | ||
342 | 5000: | ||
343 | |||
344 | # Exception handler: | ||
345 | .section .fixup, "ax" | ||
346 | |||
347 | 6001: | ||
348 | movl ARGBASE+20(%esp), %ebx # src_err_ptr | ||
349 | movl $-EFAULT, (%ebx) | ||
350 | |||
351 | # zero the complete destination - computing the rest | ||
352 | # is too much work | ||
353 | movl ARGBASE+8(%esp), %edi # dst | ||
354 | movl ARGBASE+12(%esp), %ecx # len | ||
355 | xorl %eax,%eax | ||
356 | rep ; stosb | ||
357 | |||
358 | jmp 5000b | ||
359 | |||
360 | 6002: | ||
361 | movl ARGBASE+24(%esp), %ebx # dst_err_ptr | ||
362 | movl $-EFAULT,(%ebx) | ||
363 | jmp 5000b | ||
364 | |||
365 | .previous | ||
366 | |||
367 | popl %ebx | ||
368 | popl %esi | ||
369 | popl %edi | ||
370 | popl %ecx # equivalent to addl $4,%esp | ||
371 | ret | ||
372 | |||
373 | #else | ||
374 | |||
375 | /* Version for PentiumII/PPro */ | ||
376 | |||
377 | #define ROUND1(x) \ | ||
378 | SRC(movl x(%esi), %ebx ) ; \ | ||
379 | addl %ebx, %eax ; \ | ||
380 | DST(movl %ebx, x(%edi) ) ; | ||
381 | |||
382 | #define ROUND(x) \ | ||
383 | SRC(movl x(%esi), %ebx ) ; \ | ||
384 | adcl %ebx, %eax ; \ | ||
385 | DST(movl %ebx, x(%edi) ) ; | ||
386 | |||
387 | #define ARGBASE 12 | ||
388 | |||
389 | csum_partial_copy_generic_i386: | ||
390 | pushl %ebx | ||
391 | pushl %edi | ||
392 | pushl %esi | ||
393 | movl ARGBASE+4(%esp),%esi #src | ||
394 | movl ARGBASE+8(%esp),%edi #dst | ||
395 | movl ARGBASE+12(%esp),%ecx #len | ||
396 | movl ARGBASE+16(%esp),%eax #sum | ||
397 | # movl %ecx, %edx | ||
398 | movl %ecx, %ebx | ||
399 | movl %esi, %edx | ||
400 | shrl $6, %ecx | ||
401 | andl $0x3c, %ebx | ||
402 | negl %ebx | ||
403 | subl %ebx, %esi | ||
404 | subl %ebx, %edi | ||
405 | lea -1(%esi),%edx | ||
406 | andl $-32,%edx | ||
407 | lea 3f(%ebx,%ebx), %ebx | ||
408 | testl %esi, %esi | ||
409 | jmp *%ebx | ||
410 | 1: addl $64,%esi | ||
411 | addl $64,%edi | ||
412 | SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) | ||
413 | ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) | ||
414 | ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) | ||
415 | ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) | ||
416 | ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) | ||
417 | 3: adcl $0,%eax | ||
418 | addl $64, %edx | ||
419 | dec %ecx | ||
420 | jge 1b | ||
421 | 4: movl ARGBASE+12(%esp),%edx #len | ||
422 | andl $3, %edx | ||
423 | jz 7f | ||
424 | cmpl $2, %edx | ||
425 | jb 5f | ||
426 | SRC( movw (%esi), %dx ) | ||
427 | leal 2(%esi), %esi | ||
428 | DST( movw %dx, (%edi) ) | ||
429 | leal 2(%edi), %edi | ||
430 | je 6f | ||
431 | shll $16,%edx | ||
432 | 5: | ||
433 | SRC( movb (%esi), %dl ) | ||
434 | DST( movb %dl, (%edi) ) | ||
435 | 6: addl %edx, %eax | ||
436 | adcl $0, %eax | ||
437 | 7: | ||
438 | .section .fixup, "ax" | ||
439 | 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr | ||
440 | movl $-EFAULT, (%ebx) | ||
441 | # zero the complete destination (computing the rest is too much work) | ||
442 | movl ARGBASE+8(%esp),%edi # dst | ||
443 | movl ARGBASE+12(%esp),%ecx # len | ||
444 | xorl %eax,%eax | ||
445 | rep; stosb | ||
446 | jmp 7b | ||
447 | 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr | ||
448 | movl $-EFAULT, (%ebx) | ||
449 | jmp 7b | ||
450 | .previous | ||
451 | |||
452 | popl %esi | ||
453 | popl %edi | ||
454 | popl %ebx | ||
455 | ret | ||
456 | |||
457 | #undef ROUND | ||
458 | #undef ROUND1 | ||
459 | |||
460 | #endif | ||