aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips/lib/csum_partial.S
diff options
context:
space:
mode:
authorMaciej W. Rozycki <macro@linux-mips.org>2007-10-23 07:43:25 -0400
committerRalf Baechle <ralf@linux-mips.org>2008-01-29 05:14:55 -0500
commit619b6e18fce20e4b2d0082cde989f37e1be7b3e1 (patch)
treeb56120c0de584f6672aa7e4533163435613f5203 /arch/mips/lib/csum_partial.S
parent20d60d9973c3b441902b0a3f4f6f7e7ade08f77d (diff)
[MIPS] R4000/R4400 daddiu erratum workaround
This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at=<reg> feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r--arch/mips/lib/csum_partial.S61
1 files changed, 51 insertions, 10 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index c0a77fe038be..957a82484e3e 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -7,6 +7,7 @@
7 * 7 *
8 * Copyright (C) 1998, 1999 Ralf Baechle 8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc. 9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 * Copyright (C) 2007 Maciej W. Rozycki
10 */ 11 */
11#include <linux/errno.h> 12#include <linux/errno.h>
12#include <asm/asm.h> 13#include <asm/asm.h>
@@ -52,9 +53,12 @@
52#define UNIT(unit) ((unit)*NBYTES) 53#define UNIT(unit) ((unit)*NBYTES)
53 54
54#define ADDC(sum,reg) \ 55#define ADDC(sum,reg) \
56 .set push; \
57 .set noat; \
55 ADD sum, reg; \ 58 ADD sum, reg; \
56 sltu v1, sum, reg; \ 59 sltu v1, sum, reg; \
57 ADD sum, v1 60 ADD sum, v1; \
61 .set pop
58 62
59#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ 63#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
60 LOAD _t0, (offset + UNIT(0))(src); \ 64 LOAD _t0, (offset + UNIT(0))(src); \
@@ -178,8 +182,10 @@ move_128bytes:
178 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) 182 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
179 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) 183 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
180 LONG_SUBU t8, t8, 0x01 184 LONG_SUBU t8, t8, 0x01
185 .set reorder /* DADDI_WAR */
186 PTR_ADDU src, src, 0x80
181 bnez t8, move_128bytes 187 bnez t8, move_128bytes
182 PTR_ADDU src, src, 0x80 188 .set noreorder
183 189
1841: 1901:
185 beqz t2, 1f 191 beqz t2, 1f
@@ -208,8 +214,10 @@ end_words:
208 lw t0, (src) 214 lw t0, (src)
209 LONG_SUBU t8, t8, 0x1 215 LONG_SUBU t8, t8, 0x1
210 ADDC(sum, t0) 216 ADDC(sum, t0)
217 .set reorder /* DADDI_WAR */
218 PTR_ADDU src, src, 0x4
211 bnez t8, end_words 219 bnez t8, end_words
212 PTR_ADDU src, src, 0x4 220 .set noreorder
213 221
214/* unknown src alignment and < 8 bytes to go */ 222/* unknown src alignment and < 8 bytes to go */
215small_csumcpy: 223small_csumcpy:
@@ -246,6 +254,8 @@ small_csumcpy:
2461: ADDC(sum, t1) 2541: ADDC(sum, t1)
247 255
248 /* fold checksum */ 256 /* fold checksum */
257 .set push
258 .set noat
249#ifdef USE_DOUBLE 259#ifdef USE_DOUBLE
250 dsll32 v1, sum, 0 260 dsll32 v1, sum, 0
251 daddu sum, v1 261 daddu sum, v1
@@ -266,6 +276,7 @@ small_csumcpy:
266 srl sum, sum, 8 276 srl sum, sum, 8
267 or sum, v1 277 or sum, v1
268 andi sum, 0xffff 278 andi sum, 0xffff
279 .set pop
2691: 2801:
270 .set reorder 281 .set reorder
271 /* Add the passed partial csum. */ 282 /* Add the passed partial csum. */
@@ -373,7 +384,11 @@ small_csumcpy:
373 384
374#define ADDRMASK (NBYTES-1) 385#define ADDRMASK (NBYTES-1)
375 386
387#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
376 .set noat 388 .set noat
389#else
390 .set at=v1
391#endif
377 392
378LEAF(__csum_partial_copy_user) 393LEAF(__csum_partial_copy_user)
379 PTR_ADDU AT, src, len /* See (1) above. */ 394 PTR_ADDU AT, src, len /* See (1) above. */
@@ -441,8 +456,10 @@ EXC( STORE t6, UNIT(6)(dst), s_exc)
441 ADDC(sum, t6) 456 ADDC(sum, t6)
442EXC( STORE t7, UNIT(7)(dst), s_exc) 457EXC( STORE t7, UNIT(7)(dst), s_exc)
443 ADDC(sum, t7) 458 ADDC(sum, t7)
459 .set reorder /* DADDI_WAR */
460 ADD dst, dst, 8*NBYTES
444 bgez len, 1b 461 bgez len, 1b
445 ADD dst, dst, 8*NBYTES 462 .set noreorder
446 ADD len, 8*NBYTES # revert len (see above) 463 ADD len, 8*NBYTES # revert len (see above)
447 464
448 /* 465 /*
@@ -471,8 +488,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
471 ADDC(sum, t2) 488 ADDC(sum, t2)
472EXC( STORE t3, UNIT(3)(dst), s_exc) 489EXC( STORE t3, UNIT(3)(dst), s_exc)
473 ADDC(sum, t3) 490 ADDC(sum, t3)
491 .set reorder /* DADDI_WAR */
492 ADD dst, dst, 4*NBYTES
474 beqz len, done 493 beqz len, done
475 ADD dst, dst, 4*NBYTES 494 .set noreorder
476less_than_4units: 495less_than_4units:
477 /* 496 /*
478 * rem = len % NBYTES 497 * rem = len % NBYTES
@@ -485,8 +504,10 @@ EXC( LOAD t0, 0(src), l_exc)
485 SUB len, len, NBYTES 504 SUB len, len, NBYTES
486EXC( STORE t0, 0(dst), s_exc) 505EXC( STORE t0, 0(dst), s_exc)
487 ADDC(sum, t0) 506 ADDC(sum, t0)
507 .set reorder /* DADDI_WAR */
508 ADD dst, dst, NBYTES
488 bne rem, len, 1b 509 bne rem, len, 1b
489 ADD dst, dst, NBYTES 510 .set noreorder
490 511
491 /* 512 /*
492 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 513 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
@@ -572,8 +593,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
572 ADDC(sum, t2) 593 ADDC(sum, t2)
573EXC( STORE t3, UNIT(3)(dst), s_exc) 594EXC( STORE t3, UNIT(3)(dst), s_exc)
574 ADDC(sum, t3) 595 ADDC(sum, t3)
596 .set reorder /* DADDI_WAR */
597 ADD dst, dst, 4*NBYTES
575 bne len, rem, 1b 598 bne len, rem, 1b
576 ADD dst, dst, 4*NBYTES 599 .set noreorder
577 600
578cleanup_src_unaligned: 601cleanup_src_unaligned:
579 beqz len, done 602 beqz len, done
@@ -587,8 +610,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
587 SUB len, len, NBYTES 610 SUB len, len, NBYTES
588EXC( STORE t0, 0(dst), s_exc) 611EXC( STORE t0, 0(dst), s_exc)
589 ADDC(sum, t0) 612 ADDC(sum, t0)
613 .set reorder /* DADDI_WAR */
614 ADD dst, dst, NBYTES
590 bne len, rem, 1b 615 bne len, rem, 1b
591 ADD dst, dst, NBYTES 616 .set noreorder
592 617
593copy_bytes_checklen: 618copy_bytes_checklen:
594 beqz len, done 619 beqz len, done
@@ -631,6 +656,8 @@ copy_bytes_done:
631 ADDC(sum, t2) 656 ADDC(sum, t2)
632done: 657done:
633 /* fold checksum */ 658 /* fold checksum */
659 .set push
660 .set noat
634#ifdef USE_DOUBLE 661#ifdef USE_DOUBLE
635 dsll32 v1, sum, 0 662 dsll32 v1, sum, 0
636 daddu sum, v1 663 daddu sum, v1
@@ -651,6 +678,7 @@ done:
651 srl sum, sum, 8 678 srl sum, sum, 8
652 or sum, v1 679 or sum, v1
653 andi sum, 0xffff 680 andi sum, 0xffff
681 .set pop
6541: 6821:
655 .set reorder 683 .set reorder
656 ADDC(sum, psum) 684 ADDC(sum, psum)
@@ -678,8 +706,10 @@ EXC( lbu t1, 0(src), l_exc)
678 SLLV t1, t1, t2 706 SLLV t1, t1, t2
679 addu t2, SHIFT_INC 707 addu t2, SHIFT_INC
680 ADDC(sum, t1) 708 ADDC(sum, t1)
709 .set reorder /* DADDI_WAR */
710 ADD dst, dst, 1
681 bne src, t0, 1b 711 bne src, t0, 1b
682 ADD dst, dst, 1 712 .set noreorder
683l_exc: 713l_exc:
684 LOAD t0, TI_TASK($28) 714 LOAD t0, TI_TASK($28)
685 nop 715 nop
@@ -697,12 +727,22 @@ l_exc:
697 * Clear len bytes starting at dst. Can't call __bzero because it 727 * Clear len bytes starting at dst. Can't call __bzero because it
698 * might modify len. An inefficient loop for these rare times... 728 * might modify len. An inefficient loop for these rare times...
699 */ 729 */
730 .set reorder /* DADDI_WAR */
731 SUB src, len, 1
700 beqz len, done 732 beqz len, done
701 SUB src, len, 1 733 .set noreorder
7021: sb zero, 0(dst) 7341: sb zero, 0(dst)
703 ADD dst, dst, 1 735 ADD dst, dst, 1
736 .set push
737 .set noat
738#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
704 bnez src, 1b 739 bnez src, 1b
705 SUB src, src, 1 740 SUB src, src, 1
741#else
742 li v1, 1
743 bnez src, 1b
744 SUB src, src, v1
745#endif
706 li v1, -EFAULT 746 li v1, -EFAULT
707 b done 747 b done
708 sw v1, (errptr) 748 sw v1, (errptr)
@@ -712,4 +752,5 @@ s_exc:
712 li v1, -EFAULT 752 li v1, -EFAULT
713 jr ra 753 jr ra
714 sw v1, (errptr) 754 sw v1, (errptr)
755 .set pop
715 END(__csum_partial_copy_user) 756 END(__csum_partial_copy_user)