diff options
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 61 |
1 files changed, 51 insertions, 10 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index c0a77fe038be..957a82484e3e 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -7,6 +7,7 @@ | |||
7 | * | 7 | * |
8 | * Copyright (C) 1998, 1999 Ralf Baechle | 8 | * Copyright (C) 1998, 1999 Ralf Baechle |
9 | * Copyright (C) 1999 Silicon Graphics, Inc. | 9 | * Copyright (C) 1999 Silicon Graphics, Inc. |
10 | * Copyright (C) 2007 Maciej W. Rozycki | ||
10 | */ | 11 | */ |
11 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
12 | #include <asm/asm.h> | 13 | #include <asm/asm.h> |
@@ -52,9 +53,12 @@ | |||
52 | #define UNIT(unit) ((unit)*NBYTES) | 53 | #define UNIT(unit) ((unit)*NBYTES) |
53 | 54 | ||
54 | #define ADDC(sum,reg) \ | 55 | #define ADDC(sum,reg) \ |
56 | .set push; \ | ||
57 | .set noat; \ | ||
55 | ADD sum, reg; \ | 58 | ADD sum, reg; \ |
56 | sltu v1, sum, reg; \ | 59 | sltu v1, sum, reg; \ |
57 | ADD sum, v1 | 60 | ADD sum, v1; \ |
61 | .set pop | ||
58 | 62 | ||
59 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ | 63 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ |
60 | LOAD _t0, (offset + UNIT(0))(src); \ | 64 | LOAD _t0, (offset + UNIT(0))(src); \ |
@@ -178,8 +182,10 @@ move_128bytes: | |||
178 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) | 182 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) |
179 | CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) | 183 | CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) |
180 | LONG_SUBU t8, t8, 0x01 | 184 | LONG_SUBU t8, t8, 0x01 |
185 | .set reorder /* DADDI_WAR */ | ||
186 | PTR_ADDU src, src, 0x80 | ||
181 | bnez t8, move_128bytes | 187 | bnez t8, move_128bytes |
182 | PTR_ADDU src, src, 0x80 | 188 | .set noreorder |
183 | 189 | ||
184 | 1: | 190 | 1: |
185 | beqz t2, 1f | 191 | beqz t2, 1f |
@@ -208,8 +214,10 @@ end_words: | |||
208 | lw t0, (src) | 214 | lw t0, (src) |
209 | LONG_SUBU t8, t8, 0x1 | 215 | LONG_SUBU t8, t8, 0x1 |
210 | ADDC(sum, t0) | 216 | ADDC(sum, t0) |
217 | .set reorder /* DADDI_WAR */ | ||
218 | PTR_ADDU src, src, 0x4 | ||
211 | bnez t8, end_words | 219 | bnez t8, end_words |
212 | PTR_ADDU src, src, 0x4 | 220 | .set noreorder |
213 | 221 | ||
214 | /* unknown src alignment and < 8 bytes to go */ | 222 | /* unknown src alignment and < 8 bytes to go */ |
215 | small_csumcpy: | 223 | small_csumcpy: |
@@ -246,6 +254,8 @@ small_csumcpy: | |||
246 | 1: ADDC(sum, t1) | 254 | 1: ADDC(sum, t1) |
247 | 255 | ||
248 | /* fold checksum */ | 256 | /* fold checksum */ |
257 | .set push | ||
258 | .set noat | ||
249 | #ifdef USE_DOUBLE | 259 | #ifdef USE_DOUBLE |
250 | dsll32 v1, sum, 0 | 260 | dsll32 v1, sum, 0 |
251 | daddu sum, v1 | 261 | daddu sum, v1 |
@@ -266,6 +276,7 @@ small_csumcpy: | |||
266 | srl sum, sum, 8 | 276 | srl sum, sum, 8 |
267 | or sum, v1 | 277 | or sum, v1 |
268 | andi sum, 0xffff | 278 | andi sum, 0xffff |
279 | .set pop | ||
269 | 1: | 280 | 1: |
270 | .set reorder | 281 | .set reorder |
271 | /* Add the passed partial csum. */ | 282 | /* Add the passed partial csum. */ |
@@ -373,7 +384,11 @@ small_csumcpy: | |||
373 | 384 | ||
374 | #define ADDRMASK (NBYTES-1) | 385 | #define ADDRMASK (NBYTES-1) |
375 | 386 | ||
387 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
376 | .set noat | 388 | .set noat |
389 | #else | ||
390 | .set at=v1 | ||
391 | #endif | ||
377 | 392 | ||
378 | LEAF(__csum_partial_copy_user) | 393 | LEAF(__csum_partial_copy_user) |
379 | PTR_ADDU AT, src, len /* See (1) above. */ | 394 | PTR_ADDU AT, src, len /* See (1) above. */ |
@@ -441,8 +456,10 @@ EXC( STORE t6, UNIT(6)(dst), s_exc) | |||
441 | ADDC(sum, t6) | 456 | ADDC(sum, t6) |
442 | EXC( STORE t7, UNIT(7)(dst), s_exc) | 457 | EXC( STORE t7, UNIT(7)(dst), s_exc) |
443 | ADDC(sum, t7) | 458 | ADDC(sum, t7) |
459 | .set reorder /* DADDI_WAR */ | ||
460 | ADD dst, dst, 8*NBYTES | ||
444 | bgez len, 1b | 461 | bgez len, 1b |
445 | ADD dst, dst, 8*NBYTES | 462 | .set noreorder |
446 | ADD len, 8*NBYTES # revert len (see above) | 463 | ADD len, 8*NBYTES # revert len (see above) |
447 | 464 | ||
448 | /* | 465 | /* |
@@ -471,8 +488,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc) | |||
471 | ADDC(sum, t2) | 488 | ADDC(sum, t2) |
472 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 489 | EXC( STORE t3, UNIT(3)(dst), s_exc) |
473 | ADDC(sum, t3) | 490 | ADDC(sum, t3) |
491 | .set reorder /* DADDI_WAR */ | ||
492 | ADD dst, dst, 4*NBYTES | ||
474 | beqz len, done | 493 | beqz len, done |
475 | ADD dst, dst, 4*NBYTES | 494 | .set noreorder |
476 | less_than_4units: | 495 | less_than_4units: |
477 | /* | 496 | /* |
478 | * rem = len % NBYTES | 497 | * rem = len % NBYTES |
@@ -485,8 +504,10 @@ EXC( LOAD t0, 0(src), l_exc) | |||
485 | SUB len, len, NBYTES | 504 | SUB len, len, NBYTES |
486 | EXC( STORE t0, 0(dst), s_exc) | 505 | EXC( STORE t0, 0(dst), s_exc) |
487 | ADDC(sum, t0) | 506 | ADDC(sum, t0) |
507 | .set reorder /* DADDI_WAR */ | ||
508 | ADD dst, dst, NBYTES | ||
488 | bne rem, len, 1b | 509 | bne rem, len, 1b |
489 | ADD dst, dst, NBYTES | 510 | .set noreorder |
490 | 511 | ||
491 | /* | 512 | /* |
492 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) | 513 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) |
@@ -572,8 +593,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc) | |||
572 | ADDC(sum, t2) | 593 | ADDC(sum, t2) |
573 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 594 | EXC( STORE t3, UNIT(3)(dst), s_exc) |
574 | ADDC(sum, t3) | 595 | ADDC(sum, t3) |
596 | .set reorder /* DADDI_WAR */ | ||
597 | ADD dst, dst, 4*NBYTES | ||
575 | bne len, rem, 1b | 598 | bne len, rem, 1b |
576 | ADD dst, dst, 4*NBYTES | 599 | .set noreorder |
577 | 600 | ||
578 | cleanup_src_unaligned: | 601 | cleanup_src_unaligned: |
579 | beqz len, done | 602 | beqz len, done |
@@ -587,8 +610,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) | |||
587 | SUB len, len, NBYTES | 610 | SUB len, len, NBYTES |
588 | EXC( STORE t0, 0(dst), s_exc) | 611 | EXC( STORE t0, 0(dst), s_exc) |
589 | ADDC(sum, t0) | 612 | ADDC(sum, t0) |
613 | .set reorder /* DADDI_WAR */ | ||
614 | ADD dst, dst, NBYTES | ||
590 | bne len, rem, 1b | 615 | bne len, rem, 1b |
591 | ADD dst, dst, NBYTES | 616 | .set noreorder |
592 | 617 | ||
593 | copy_bytes_checklen: | 618 | copy_bytes_checklen: |
594 | beqz len, done | 619 | beqz len, done |
@@ -631,6 +656,8 @@ copy_bytes_done: | |||
631 | ADDC(sum, t2) | 656 | ADDC(sum, t2) |
632 | done: | 657 | done: |
633 | /* fold checksum */ | 658 | /* fold checksum */ |
659 | .set push | ||
660 | .set noat | ||
634 | #ifdef USE_DOUBLE | 661 | #ifdef USE_DOUBLE |
635 | dsll32 v1, sum, 0 | 662 | dsll32 v1, sum, 0 |
636 | daddu sum, v1 | 663 | daddu sum, v1 |
@@ -651,6 +678,7 @@ done: | |||
651 | srl sum, sum, 8 | 678 | srl sum, sum, 8 |
652 | or sum, v1 | 679 | or sum, v1 |
653 | andi sum, 0xffff | 680 | andi sum, 0xffff |
681 | .set pop | ||
654 | 1: | 682 | 1: |
655 | .set reorder | 683 | .set reorder |
656 | ADDC(sum, psum) | 684 | ADDC(sum, psum) |
@@ -678,8 +706,10 @@ EXC( lbu t1, 0(src), l_exc) | |||
678 | SLLV t1, t1, t2 | 706 | SLLV t1, t1, t2 |
679 | addu t2, SHIFT_INC | 707 | addu t2, SHIFT_INC |
680 | ADDC(sum, t1) | 708 | ADDC(sum, t1) |
709 | .set reorder /* DADDI_WAR */ | ||
710 | ADD dst, dst, 1 | ||
681 | bne src, t0, 1b | 711 | bne src, t0, 1b |
682 | ADD dst, dst, 1 | 712 | .set noreorder |
683 | l_exc: | 713 | l_exc: |
684 | LOAD t0, TI_TASK($28) | 714 | LOAD t0, TI_TASK($28) |
685 | nop | 715 | nop |
@@ -697,12 +727,22 @@ l_exc: | |||
697 | * Clear len bytes starting at dst. Can't call __bzero because it | 727 | * Clear len bytes starting at dst. Can't call __bzero because it |
698 | * might modify len. An inefficient loop for these rare times... | 728 | * might modify len. An inefficient loop for these rare times... |
699 | */ | 729 | */ |
730 | .set reorder /* DADDI_WAR */ | ||
731 | SUB src, len, 1 | ||
700 | beqz len, done | 732 | beqz len, done |
701 | SUB src, len, 1 | 733 | .set noreorder |
702 | 1: sb zero, 0(dst) | 734 | 1: sb zero, 0(dst) |
703 | ADD dst, dst, 1 | 735 | ADD dst, dst, 1 |
736 | .set push | ||
737 | .set noat | ||
738 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
704 | bnez src, 1b | 739 | bnez src, 1b |
705 | SUB src, src, 1 | 740 | SUB src, src, 1 |
741 | #else | ||
742 | li v1, 1 | ||
743 | bnez src, 1b | ||
744 | SUB src, src, v1 | ||
745 | #endif | ||
706 | li v1, -EFAULT | 746 | li v1, -EFAULT |
707 | b done | 747 | b done |
708 | sw v1, (errptr) | 748 | sw v1, (errptr) |
@@ -712,4 +752,5 @@ s_exc: | |||
712 | li v1, -EFAULT | 752 | li v1, -EFAULT |
713 | jr ra | 753 | jr ra |
714 | sw v1, (errptr) | 754 | sw v1, (errptr) |
755 | .set pop | ||
715 | END(__csum_partial_copy_user) | 756 | END(__csum_partial_copy_user) |