aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips/lib/csum_partial.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r--arch/mips/lib/csum_partial.S61
1 files changed, 51 insertions, 10 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index c0a77fe038be..957a82484e3e 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -7,6 +7,7 @@
7 * 7 *
8 * Copyright (C) 1998, 1999 Ralf Baechle 8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc. 9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 * Copyright (C) 2007 Maciej W. Rozycki
10 */ 11 */
11#include <linux/errno.h> 12#include <linux/errno.h>
12#include <asm/asm.h> 13#include <asm/asm.h>
@@ -52,9 +53,12 @@
52#define UNIT(unit) ((unit)*NBYTES) 53#define UNIT(unit) ((unit)*NBYTES)
53 54
54#define ADDC(sum,reg) \ 55#define ADDC(sum,reg) \
56 .set push; \
57 .set noat; \
55 ADD sum, reg; \ 58 ADD sum, reg; \
56 sltu v1, sum, reg; \ 59 sltu v1, sum, reg; \
57 ADD sum, v1 60 ADD sum, v1; \
61 .set pop
58 62
59#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ 63#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
60 LOAD _t0, (offset + UNIT(0))(src); \ 64 LOAD _t0, (offset + UNIT(0))(src); \
@@ -178,8 +182,10 @@ move_128bytes:
178 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) 182 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
179 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) 183 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
180 LONG_SUBU t8, t8, 0x01 184 LONG_SUBU t8, t8, 0x01
185 .set reorder /* DADDI_WAR */
186 PTR_ADDU src, src, 0x80
181 bnez t8, move_128bytes 187 bnez t8, move_128bytes
182 PTR_ADDU src, src, 0x80 188 .set noreorder
183 189
1841: 1901:
185 beqz t2, 1f 191 beqz t2, 1f
@@ -208,8 +214,10 @@ end_words:
208 lw t0, (src) 214 lw t0, (src)
209 LONG_SUBU t8, t8, 0x1 215 LONG_SUBU t8, t8, 0x1
210 ADDC(sum, t0) 216 ADDC(sum, t0)
217 .set reorder /* DADDI_WAR */
218 PTR_ADDU src, src, 0x4
211 bnez t8, end_words 219 bnez t8, end_words
212 PTR_ADDU src, src, 0x4 220 .set noreorder
213 221
214/* unknown src alignment and < 8 bytes to go */ 222/* unknown src alignment and < 8 bytes to go */
215small_csumcpy: 223small_csumcpy:
@@ -246,6 +254,8 @@ small_csumcpy:
2461: ADDC(sum, t1) 2541: ADDC(sum, t1)
247 255
248 /* fold checksum */ 256 /* fold checksum */
257 .set push
258 .set noat
249#ifdef USE_DOUBLE 259#ifdef USE_DOUBLE
250 dsll32 v1, sum, 0 260 dsll32 v1, sum, 0
251 daddu sum, v1 261 daddu sum, v1
@@ -266,6 +276,7 @@ small_csumcpy:
266 srl sum, sum, 8 276 srl sum, sum, 8
267 or sum, v1 277 or sum, v1
268 andi sum, 0xffff 278 andi sum, 0xffff
279 .set pop
2691: 2801:
270 .set reorder 281 .set reorder
271 /* Add the passed partial csum. */ 282 /* Add the passed partial csum. */
@@ -373,7 +384,11 @@ small_csumcpy:
373 384
374#define ADDRMASK (NBYTES-1) 385#define ADDRMASK (NBYTES-1)
375 386
387#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
376 .set noat 388 .set noat
389#else
390 .set at=v1
391#endif
377 392
378LEAF(__csum_partial_copy_user) 393LEAF(__csum_partial_copy_user)
379 PTR_ADDU AT, src, len /* See (1) above. */ 394 PTR_ADDU AT, src, len /* See (1) above. */
@@ -441,8 +456,10 @@ EXC( STORE t6, UNIT(6)(dst), s_exc)
441 ADDC(sum, t6) 456 ADDC(sum, t6)
442EXC( STORE t7, UNIT(7)(dst), s_exc) 457EXC( STORE t7, UNIT(7)(dst), s_exc)
443 ADDC(sum, t7) 458 ADDC(sum, t7)
459 .set reorder /* DADDI_WAR */
460 ADD dst, dst, 8*NBYTES
444 bgez len, 1b 461 bgez len, 1b
445 ADD dst, dst, 8*NBYTES 462 .set noreorder
446 ADD len, 8*NBYTES # revert len (see above) 463 ADD len, 8*NBYTES # revert len (see above)
447 464
448 /* 465 /*
@@ -471,8 +488,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
471 ADDC(sum, t2) 488 ADDC(sum, t2)
472EXC( STORE t3, UNIT(3)(dst), s_exc) 489EXC( STORE t3, UNIT(3)(dst), s_exc)
473 ADDC(sum, t3) 490 ADDC(sum, t3)
491 .set reorder /* DADDI_WAR */
492 ADD dst, dst, 4*NBYTES
474 beqz len, done 493 beqz len, done
475 ADD dst, dst, 4*NBYTES 494 .set noreorder
476less_than_4units: 495less_than_4units:
477 /* 496 /*
478 * rem = len % NBYTES 497 * rem = len % NBYTES
@@ -485,8 +504,10 @@ EXC( LOAD t0, 0(src), l_exc)
485 SUB len, len, NBYTES 504 SUB len, len, NBYTES
486EXC( STORE t0, 0(dst), s_exc) 505EXC( STORE t0, 0(dst), s_exc)
487 ADDC(sum, t0) 506 ADDC(sum, t0)
507 .set reorder /* DADDI_WAR */
508 ADD dst, dst, NBYTES
488 bne rem, len, 1b 509 bne rem, len, 1b
489 ADD dst, dst, NBYTES 510 .set noreorder
490 511
491 /* 512 /*
492 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 513 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
@@ -572,8 +593,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
572 ADDC(sum, t2) 593 ADDC(sum, t2)
573EXC( STORE t3, UNIT(3)(dst), s_exc) 594EXC( STORE t3, UNIT(3)(dst), s_exc)
574 ADDC(sum, t3) 595 ADDC(sum, t3)
596 .set reorder /* DADDI_WAR */
597 ADD dst, dst, 4*NBYTES
575 bne len, rem, 1b 598 bne len, rem, 1b
576 ADD dst, dst, 4*NBYTES 599 .set noreorder
577 600
578cleanup_src_unaligned: 601cleanup_src_unaligned:
579 beqz len, done 602 beqz len, done
@@ -587,8 +610,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
587 SUB len, len, NBYTES 610 SUB len, len, NBYTES
588EXC( STORE t0, 0(dst), s_exc) 611EXC( STORE t0, 0(dst), s_exc)
589 ADDC(sum, t0) 612 ADDC(sum, t0)
613 .set reorder /* DADDI_WAR */
614 ADD dst, dst, NBYTES
590 bne len, rem, 1b 615 bne len, rem, 1b
591 ADD dst, dst, NBYTES 616 .set noreorder
592 617
593copy_bytes_checklen: 618copy_bytes_checklen:
594 beqz len, done 619 beqz len, done
@@ -631,6 +656,8 @@ copy_bytes_done:
631 ADDC(sum, t2) 656 ADDC(sum, t2)
632done: 657done:
633 /* fold checksum */ 658 /* fold checksum */
659 .set push
660 .set noat
634#ifdef USE_DOUBLE 661#ifdef USE_DOUBLE
635 dsll32 v1, sum, 0 662 dsll32 v1, sum, 0
636 daddu sum, v1 663 daddu sum, v1
@@ -651,6 +678,7 @@ done:
651 srl sum, sum, 8 678 srl sum, sum, 8
652 or sum, v1 679 or sum, v1
653 andi sum, 0xffff 680 andi sum, 0xffff
681 .set pop
6541: 6821:
655 .set reorder 683 .set reorder
656 ADDC(sum, psum) 684 ADDC(sum, psum)
@@ -678,8 +706,10 @@ EXC( lbu t1, 0(src), l_exc)
678 SLLV t1, t1, t2 706 SLLV t1, t1, t2
679 addu t2, SHIFT_INC 707 addu t2, SHIFT_INC
680 ADDC(sum, t1) 708 ADDC(sum, t1)
709 .set reorder /* DADDI_WAR */
710 ADD dst, dst, 1
681 bne src, t0, 1b 711 bne src, t0, 1b
682 ADD dst, dst, 1 712 .set noreorder
683l_exc: 713l_exc:
684 LOAD t0, TI_TASK($28) 714 LOAD t0, TI_TASK($28)
685 nop 715 nop
@@ -697,12 +727,22 @@ l_exc:
697 * Clear len bytes starting at dst. Can't call __bzero because it 727 * Clear len bytes starting at dst. Can't call __bzero because it
698 * might modify len. An inefficient loop for these rare times... 728 * might modify len. An inefficient loop for these rare times...
699 */ 729 */
730 .set reorder /* DADDI_WAR */
731 SUB src, len, 1
700 beqz len, done 732 beqz len, done
701 SUB src, len, 1 733 .set noreorder
7021: sb zero, 0(dst) 7341: sb zero, 0(dst)
703 ADD dst, dst, 1 735 ADD dst, dst, 1
736 .set push
737 .set noat
738#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
704 bnez src, 1b 739 bnez src, 1b
705 SUB src, src, 1 740 SUB src, src, 1
741#else
742 li v1, 1
743 bnez src, 1b
744 SUB src, src, v1
745#endif
706 li v1, -EFAULT 746 li v1, -EFAULT
707 b done 747 b done
708 sw v1, (errptr) 748 sw v1, (errptr)
@@ -712,4 +752,5 @@ s_exc:
712 li v1, -EFAULT 752 li v1, -EFAULT
713 jr ra 753 jr ra
714 sw v1, (errptr) 754 sw v1, (errptr)
755 .set pop
715 END(__csum_partial_copy_user) 756 END(__csum_partial_copy_user)