diff options
author | Maciej W. Rozycki <macro@linux-mips.org> | 2007-10-23 07:43:25 -0400 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2008-01-29 05:14:55 -0500 |
commit | 619b6e18fce20e4b2d0082cde989f37e1be7b3e1 (patch) | |
tree | b56120c0de584f6672aa7e4533163435613f5203 /arch/mips/lib | |
parent | 20d60d9973c3b441902b0a3f4f6f7e7ade08f77d (diff) |
[MIPS] R4000/R4400 daddiu erratum workaround
This complements the generic R4000/R4400 errata workaround code and adds
bits for the daddiu problem. In most places it just modifies handwritten
assembly code so that the assembler is allowed to use a temporary register
as daddiu may now be treated as a macro that expands to a sequence of li
and daddu. It is the AT register or, where AT is unavailable or used
explicitly for another purpose, an explicitly-named register is selected,
using the .set at=<reg> feature added recently to gas. This feature is
only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the
workaround remains disabled, the required version of binutils stays
unchanged.
Similarly, daddiu instructions put in branch delay slots in noreorder
fragments are now taken out of them and the assembler is allowed to
reorder them itself as possible (which it does making the whole idea of
scheduling them into delay slots manually questionable).
Also in the very few places where such a simple conversion was not
possible, a handcoded longer sequence is implemented.
Other than that there are changes to code responsible for building the
TLB fault and page clear/copy handlers to avoid daddiu as appropriate.
These are only effective if the erratum is verified to be present at the
run time.
Finally there is a trivial update to __delay(), because it uses daddiu in
a branch delay slot.
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 61 | ||||
-rw-r--r-- | arch/mips/lib/memcpy-inatomic.S | 25 | ||||
-rw-r--r-- | arch/mips/lib/memcpy.S | 60 | ||||
-rw-r--r-- | arch/mips/lib/memset.S | 11 | ||||
-rw-r--r-- | arch/mips/lib/strncpy_user.S | 4 |
5 files changed, 130 insertions, 31 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index c0a77fe038be..957a82484e3e 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -7,6 +7,7 @@ | |||
7 | * | 7 | * |
8 | * Copyright (C) 1998, 1999 Ralf Baechle | 8 | * Copyright (C) 1998, 1999 Ralf Baechle |
9 | * Copyright (C) 1999 Silicon Graphics, Inc. | 9 | * Copyright (C) 1999 Silicon Graphics, Inc. |
10 | * Copyright (C) 2007 Maciej W. Rozycki | ||
10 | */ | 11 | */ |
11 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
12 | #include <asm/asm.h> | 13 | #include <asm/asm.h> |
@@ -52,9 +53,12 @@ | |||
52 | #define UNIT(unit) ((unit)*NBYTES) | 53 | #define UNIT(unit) ((unit)*NBYTES) |
53 | 54 | ||
54 | #define ADDC(sum,reg) \ | 55 | #define ADDC(sum,reg) \ |
56 | .set push; \ | ||
57 | .set noat; \ | ||
55 | ADD sum, reg; \ | 58 | ADD sum, reg; \ |
56 | sltu v1, sum, reg; \ | 59 | sltu v1, sum, reg; \ |
57 | ADD sum, v1 | 60 | ADD sum, v1; \ |
61 | .set pop | ||
58 | 62 | ||
59 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ | 63 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ |
60 | LOAD _t0, (offset + UNIT(0))(src); \ | 64 | LOAD _t0, (offset + UNIT(0))(src); \ |
@@ -178,8 +182,10 @@ move_128bytes: | |||
178 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) | 182 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) |
179 | CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) | 183 | CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) |
180 | LONG_SUBU t8, t8, 0x01 | 184 | LONG_SUBU t8, t8, 0x01 |
185 | .set reorder /* DADDI_WAR */ | ||
186 | PTR_ADDU src, src, 0x80 | ||
181 | bnez t8, move_128bytes | 187 | bnez t8, move_128bytes |
182 | PTR_ADDU src, src, 0x80 | 188 | .set noreorder |
183 | 189 | ||
184 | 1: | 190 | 1: |
185 | beqz t2, 1f | 191 | beqz t2, 1f |
@@ -208,8 +214,10 @@ end_words: | |||
208 | lw t0, (src) | 214 | lw t0, (src) |
209 | LONG_SUBU t8, t8, 0x1 | 215 | LONG_SUBU t8, t8, 0x1 |
210 | ADDC(sum, t0) | 216 | ADDC(sum, t0) |
217 | .set reorder /* DADDI_WAR */ | ||
218 | PTR_ADDU src, src, 0x4 | ||
211 | bnez t8, end_words | 219 | bnez t8, end_words |
212 | PTR_ADDU src, src, 0x4 | 220 | .set noreorder |
213 | 221 | ||
214 | /* unknown src alignment and < 8 bytes to go */ | 222 | /* unknown src alignment and < 8 bytes to go */ |
215 | small_csumcpy: | 223 | small_csumcpy: |
@@ -246,6 +254,8 @@ small_csumcpy: | |||
246 | 1: ADDC(sum, t1) | 254 | 1: ADDC(sum, t1) |
247 | 255 | ||
248 | /* fold checksum */ | 256 | /* fold checksum */ |
257 | .set push | ||
258 | .set noat | ||
249 | #ifdef USE_DOUBLE | 259 | #ifdef USE_DOUBLE |
250 | dsll32 v1, sum, 0 | 260 | dsll32 v1, sum, 0 |
251 | daddu sum, v1 | 261 | daddu sum, v1 |
@@ -266,6 +276,7 @@ small_csumcpy: | |||
266 | srl sum, sum, 8 | 276 | srl sum, sum, 8 |
267 | or sum, v1 | 277 | or sum, v1 |
268 | andi sum, 0xffff | 278 | andi sum, 0xffff |
279 | .set pop | ||
269 | 1: | 280 | 1: |
270 | .set reorder | 281 | .set reorder |
271 | /* Add the passed partial csum. */ | 282 | /* Add the passed partial csum. */ |
@@ -373,7 +384,11 @@ small_csumcpy: | |||
373 | 384 | ||
374 | #define ADDRMASK (NBYTES-1) | 385 | #define ADDRMASK (NBYTES-1) |
375 | 386 | ||
387 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
376 | .set noat | 388 | .set noat |
389 | #else | ||
390 | .set at=v1 | ||
391 | #endif | ||
377 | 392 | ||
378 | LEAF(__csum_partial_copy_user) | 393 | LEAF(__csum_partial_copy_user) |
379 | PTR_ADDU AT, src, len /* See (1) above. */ | 394 | PTR_ADDU AT, src, len /* See (1) above. */ |
@@ -441,8 +456,10 @@ EXC( STORE t6, UNIT(6)(dst), s_exc) | |||
441 | ADDC(sum, t6) | 456 | ADDC(sum, t6) |
442 | EXC( STORE t7, UNIT(7)(dst), s_exc) | 457 | EXC( STORE t7, UNIT(7)(dst), s_exc) |
443 | ADDC(sum, t7) | 458 | ADDC(sum, t7) |
459 | .set reorder /* DADDI_WAR */ | ||
460 | ADD dst, dst, 8*NBYTES | ||
444 | bgez len, 1b | 461 | bgez len, 1b |
445 | ADD dst, dst, 8*NBYTES | 462 | .set noreorder |
446 | ADD len, 8*NBYTES # revert len (see above) | 463 | ADD len, 8*NBYTES # revert len (see above) |
447 | 464 | ||
448 | /* | 465 | /* |
@@ -471,8 +488,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc) | |||
471 | ADDC(sum, t2) | 488 | ADDC(sum, t2) |
472 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 489 | EXC( STORE t3, UNIT(3)(dst), s_exc) |
473 | ADDC(sum, t3) | 490 | ADDC(sum, t3) |
491 | .set reorder /* DADDI_WAR */ | ||
492 | ADD dst, dst, 4*NBYTES | ||
474 | beqz len, done | 493 | beqz len, done |
475 | ADD dst, dst, 4*NBYTES | 494 | .set noreorder |
476 | less_than_4units: | 495 | less_than_4units: |
477 | /* | 496 | /* |
478 | * rem = len % NBYTES | 497 | * rem = len % NBYTES |
@@ -485,8 +504,10 @@ EXC( LOAD t0, 0(src), l_exc) | |||
485 | SUB len, len, NBYTES | 504 | SUB len, len, NBYTES |
486 | EXC( STORE t0, 0(dst), s_exc) | 505 | EXC( STORE t0, 0(dst), s_exc) |
487 | ADDC(sum, t0) | 506 | ADDC(sum, t0) |
507 | .set reorder /* DADDI_WAR */ | ||
508 | ADD dst, dst, NBYTES | ||
488 | bne rem, len, 1b | 509 | bne rem, len, 1b |
489 | ADD dst, dst, NBYTES | 510 | .set noreorder |
490 | 511 | ||
491 | /* | 512 | /* |
492 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) | 513 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) |
@@ -572,8 +593,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc) | |||
572 | ADDC(sum, t2) | 593 | ADDC(sum, t2) |
573 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 594 | EXC( STORE t3, UNIT(3)(dst), s_exc) |
574 | ADDC(sum, t3) | 595 | ADDC(sum, t3) |
596 | .set reorder /* DADDI_WAR */ | ||
597 | ADD dst, dst, 4*NBYTES | ||
575 | bne len, rem, 1b | 598 | bne len, rem, 1b |
576 | ADD dst, dst, 4*NBYTES | 599 | .set noreorder |
577 | 600 | ||
578 | cleanup_src_unaligned: | 601 | cleanup_src_unaligned: |
579 | beqz len, done | 602 | beqz len, done |
@@ -587,8 +610,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) | |||
587 | SUB len, len, NBYTES | 610 | SUB len, len, NBYTES |
588 | EXC( STORE t0, 0(dst), s_exc) | 611 | EXC( STORE t0, 0(dst), s_exc) |
589 | ADDC(sum, t0) | 612 | ADDC(sum, t0) |
613 | .set reorder /* DADDI_WAR */ | ||
614 | ADD dst, dst, NBYTES | ||
590 | bne len, rem, 1b | 615 | bne len, rem, 1b |
591 | ADD dst, dst, NBYTES | 616 | .set noreorder |
592 | 617 | ||
593 | copy_bytes_checklen: | 618 | copy_bytes_checklen: |
594 | beqz len, done | 619 | beqz len, done |
@@ -631,6 +656,8 @@ copy_bytes_done: | |||
631 | ADDC(sum, t2) | 656 | ADDC(sum, t2) |
632 | done: | 657 | done: |
633 | /* fold checksum */ | 658 | /* fold checksum */ |
659 | .set push | ||
660 | .set noat | ||
634 | #ifdef USE_DOUBLE | 661 | #ifdef USE_DOUBLE |
635 | dsll32 v1, sum, 0 | 662 | dsll32 v1, sum, 0 |
636 | daddu sum, v1 | 663 | daddu sum, v1 |
@@ -651,6 +678,7 @@ done: | |||
651 | srl sum, sum, 8 | 678 | srl sum, sum, 8 |
652 | or sum, v1 | 679 | or sum, v1 |
653 | andi sum, 0xffff | 680 | andi sum, 0xffff |
681 | .set pop | ||
654 | 1: | 682 | 1: |
655 | .set reorder | 683 | .set reorder |
656 | ADDC(sum, psum) | 684 | ADDC(sum, psum) |
@@ -678,8 +706,10 @@ EXC( lbu t1, 0(src), l_exc) | |||
678 | SLLV t1, t1, t2 | 706 | SLLV t1, t1, t2 |
679 | addu t2, SHIFT_INC | 707 | addu t2, SHIFT_INC |
680 | ADDC(sum, t1) | 708 | ADDC(sum, t1) |
709 | .set reorder /* DADDI_WAR */ | ||
710 | ADD dst, dst, 1 | ||
681 | bne src, t0, 1b | 711 | bne src, t0, 1b |
682 | ADD dst, dst, 1 | 712 | .set noreorder |
683 | l_exc: | 713 | l_exc: |
684 | LOAD t0, TI_TASK($28) | 714 | LOAD t0, TI_TASK($28) |
685 | nop | 715 | nop |
@@ -697,12 +727,22 @@ l_exc: | |||
697 | * Clear len bytes starting at dst. Can't call __bzero because it | 727 | * Clear len bytes starting at dst. Can't call __bzero because it |
698 | * might modify len. An inefficient loop for these rare times... | 728 | * might modify len. An inefficient loop for these rare times... |
699 | */ | 729 | */ |
730 | .set reorder /* DADDI_WAR */ | ||
731 | SUB src, len, 1 | ||
700 | beqz len, done | 732 | beqz len, done |
701 | SUB src, len, 1 | 733 | .set noreorder |
702 | 1: sb zero, 0(dst) | 734 | 1: sb zero, 0(dst) |
703 | ADD dst, dst, 1 | 735 | ADD dst, dst, 1 |
736 | .set push | ||
737 | .set noat | ||
738 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
704 | bnez src, 1b | 739 | bnez src, 1b |
705 | SUB src, src, 1 | 740 | SUB src, src, 1 |
741 | #else | ||
742 | li v1, 1 | ||
743 | bnez src, 1b | ||
744 | SUB src, src, v1 | ||
745 | #endif | ||
706 | li v1, -EFAULT | 746 | li v1, -EFAULT |
707 | b done | 747 | b done |
708 | sw v1, (errptr) | 748 | sw v1, (errptr) |
@@ -712,4 +752,5 @@ s_exc: | |||
712 | li v1, -EFAULT | 752 | li v1, -EFAULT |
713 | jr ra | 753 | jr ra |
714 | sw v1, (errptr) | 754 | sw v1, (errptr) |
755 | .set pop | ||
715 | END(__csum_partial_copy_user) | 756 | END(__csum_partial_copy_user) |
diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S index 3a534b2baa0f..d1b08f5d6860 100644 --- a/arch/mips/lib/memcpy-inatomic.S +++ b/arch/mips/lib/memcpy-inatomic.S | |||
@@ -9,6 +9,7 @@ | |||
9 | * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. | 9 | * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. |
10 | * Copyright (C) 2002 Broadcom, Inc. | 10 | * Copyright (C) 2002 Broadcom, Inc. |
11 | * memcpy/copy_user author: Mark Vandevoorde | 11 | * memcpy/copy_user author: Mark Vandevoorde |
12 | * Copyright (C) 2007 Maciej W. Rozycki | ||
12 | * | 13 | * |
13 | * Mnemonic names for arguments to memcpy/__copy_user | 14 | * Mnemonic names for arguments to memcpy/__copy_user |
14 | */ | 15 | */ |
@@ -175,7 +176,11 @@ | |||
175 | 176 | ||
176 | .text | 177 | .text |
177 | .set noreorder | 178 | .set noreorder |
179 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
178 | .set noat | 180 | .set noat |
181 | #else | ||
182 | .set at=v1 | ||
183 | #endif | ||
179 | 184 | ||
180 | /* | 185 | /* |
181 | * A combined memcpy/__copy_user | 186 | * A combined memcpy/__copy_user |
@@ -268,8 +273,10 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | |||
268 | STORE t1, UNIT(1)(dst) | 273 | STORE t1, UNIT(1)(dst) |
269 | STORE t2, UNIT(2)(dst) | 274 | STORE t2, UNIT(2)(dst) |
270 | STORE t3, UNIT(3)(dst) | 275 | STORE t3, UNIT(3)(dst) |
276 | .set reorder /* DADDI_WAR */ | ||
277 | ADD dst, dst, 4*NBYTES | ||
271 | beqz len, done | 278 | beqz len, done |
272 | ADD dst, dst, 4*NBYTES | 279 | .set noreorder |
273 | less_than_4units: | 280 | less_than_4units: |
274 | /* | 281 | /* |
275 | * rem = len % NBYTES | 282 | * rem = len % NBYTES |
@@ -281,8 +288,10 @@ EXC( LOAD t0, 0(src), l_exc) | |||
281 | ADD src, src, NBYTES | 288 | ADD src, src, NBYTES |
282 | SUB len, len, NBYTES | 289 | SUB len, len, NBYTES |
283 | STORE t0, 0(dst) | 290 | STORE t0, 0(dst) |
291 | .set reorder /* DADDI_WAR */ | ||
292 | ADD dst, dst, NBYTES | ||
284 | bne rem, len, 1b | 293 | bne rem, len, 1b |
285 | ADD dst, dst, NBYTES | 294 | .set noreorder |
286 | 295 | ||
287 | /* | 296 | /* |
288 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) | 297 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) |
@@ -361,8 +370,10 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy) | |||
361 | STORE t2, UNIT(2)(dst) | 370 | STORE t2, UNIT(2)(dst) |
362 | STORE t3, UNIT(3)(dst) | 371 | STORE t3, UNIT(3)(dst) |
363 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 372 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
373 | .set reorder /* DADDI_WAR */ | ||
374 | ADD dst, dst, 4*NBYTES | ||
364 | bne len, rem, 1b | 375 | bne len, rem, 1b |
365 | ADD dst, dst, 4*NBYTES | 376 | .set noreorder |
366 | 377 | ||
367 | cleanup_src_unaligned: | 378 | cleanup_src_unaligned: |
368 | beqz len, done | 379 | beqz len, done |
@@ -375,8 +386,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) | |||
375 | ADD src, src, NBYTES | 386 | ADD src, src, NBYTES |
376 | SUB len, len, NBYTES | 387 | SUB len, len, NBYTES |
377 | STORE t0, 0(dst) | 388 | STORE t0, 0(dst) |
389 | .set reorder /* DADDI_WAR */ | ||
390 | ADD dst, dst, NBYTES | ||
378 | bne len, rem, 1b | 391 | bne len, rem, 1b |
379 | ADD dst, dst, NBYTES | 392 | .set noreorder |
380 | 393 | ||
381 | copy_bytes_checklen: | 394 | copy_bytes_checklen: |
382 | beqz len, done | 395 | beqz len, done |
@@ -424,8 +437,10 @@ l_exc_copy: | |||
424 | EXC( lb t1, 0(src), l_exc) | 437 | EXC( lb t1, 0(src), l_exc) |
425 | ADD src, src, 1 | 438 | ADD src, src, 1 |
426 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 439 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
440 | .set reorder /* DADDI_WAR */ | ||
441 | ADD dst, dst, 1 | ||
427 | bne src, t0, 1b | 442 | bne src, t0, 1b |
428 | ADD dst, dst, 1 | 443 | .set noreorder |
429 | l_exc: | 444 | l_exc: |
430 | LOAD t0, TI_TASK($28) | 445 | LOAD t0, TI_TASK($28) |
431 | nop | 446 | nop |
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index a526c62cb76a..aded7b159052 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -9,6 +9,7 @@ | |||
9 | * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. | 9 | * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. |
10 | * Copyright (C) 2002 Broadcom, Inc. | 10 | * Copyright (C) 2002 Broadcom, Inc. |
11 | * memcpy/copy_user author: Mark Vandevoorde | 11 | * memcpy/copy_user author: Mark Vandevoorde |
12 | * Copyright (C) 2007 Maciej W. Rozycki | ||
12 | * | 13 | * |
13 | * Mnemonic names for arguments to memcpy/__copy_user | 14 | * Mnemonic names for arguments to memcpy/__copy_user |
14 | */ | 15 | */ |
@@ -175,7 +176,11 @@ | |||
175 | 176 | ||
176 | .text | 177 | .text |
177 | .set noreorder | 178 | .set noreorder |
179 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
178 | .set noat | 180 | .set noat |
181 | #else | ||
182 | .set at=v1 | ||
183 | #endif | ||
179 | 184 | ||
180 | /* | 185 | /* |
181 | * A combined memcpy/__copy_user | 186 | * A combined memcpy/__copy_user |
@@ -271,8 +276,10 @@ EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | |||
271 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 276 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) |
272 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 277 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) |
273 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) | 278 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) |
279 | .set reorder /* DADDI_WAR */ | ||
280 | ADD dst, dst, 4*NBYTES | ||
274 | beqz len, done | 281 | beqz len, done |
275 | ADD dst, dst, 4*NBYTES | 282 | .set noreorder |
276 | less_than_4units: | 283 | less_than_4units: |
277 | /* | 284 | /* |
278 | * rem = len % NBYTES | 285 | * rem = len % NBYTES |
@@ -284,8 +291,10 @@ EXC( LOAD t0, 0(src), l_exc) | |||
284 | ADD src, src, NBYTES | 291 | ADD src, src, NBYTES |
285 | SUB len, len, NBYTES | 292 | SUB len, len, NBYTES |
286 | EXC( STORE t0, 0(dst), s_exc_p1u) | 293 | EXC( STORE t0, 0(dst), s_exc_p1u) |
294 | .set reorder /* DADDI_WAR */ | ||
295 | ADD dst, dst, NBYTES | ||
287 | bne rem, len, 1b | 296 | bne rem, len, 1b |
288 | ADD dst, dst, NBYTES | 297 | .set noreorder |
289 | 298 | ||
290 | /* | 299 | /* |
291 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) | 300 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) |
@@ -364,8 +373,10 @@ EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | |||
364 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 373 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) |
365 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) | 374 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) |
366 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 375 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
376 | .set reorder /* DADDI_WAR */ | ||
377 | ADD dst, dst, 4*NBYTES | ||
367 | bne len, rem, 1b | 378 | bne len, rem, 1b |
368 | ADD dst, dst, 4*NBYTES | 379 | .set noreorder |
369 | 380 | ||
370 | cleanup_src_unaligned: | 381 | cleanup_src_unaligned: |
371 | beqz len, done | 382 | beqz len, done |
@@ -378,8 +389,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) | |||
378 | ADD src, src, NBYTES | 389 | ADD src, src, NBYTES |
379 | SUB len, len, NBYTES | 390 | SUB len, len, NBYTES |
380 | EXC( STORE t0, 0(dst), s_exc_p1u) | 391 | EXC( STORE t0, 0(dst), s_exc_p1u) |
392 | .set reorder /* DADDI_WAR */ | ||
393 | ADD dst, dst, NBYTES | ||
381 | bne len, rem, 1b | 394 | bne len, rem, 1b |
382 | ADD dst, dst, NBYTES | 395 | .set noreorder |
383 | 396 | ||
384 | copy_bytes_checklen: | 397 | copy_bytes_checklen: |
385 | beqz len, done | 398 | beqz len, done |
@@ -427,8 +440,10 @@ l_exc_copy: | |||
427 | EXC( lb t1, 0(src), l_exc) | 440 | EXC( lb t1, 0(src), l_exc) |
428 | ADD src, src, 1 | 441 | ADD src, src, 1 |
429 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 442 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
443 | .set reorder /* DADDI_WAR */ | ||
444 | ADD dst, dst, 1 | ||
430 | bne src, t0, 1b | 445 | bne src, t0, 1b |
431 | ADD dst, dst, 1 | 446 | .set noreorder |
432 | l_exc: | 447 | l_exc: |
433 | LOAD t0, TI_TASK($28) | 448 | LOAD t0, TI_TASK($28) |
434 | nop | 449 | nop |
@@ -446,20 +461,33 @@ l_exc: | |||
446 | * Clear len bytes starting at dst. Can't call __bzero because it | 461 | * Clear len bytes starting at dst. Can't call __bzero because it |
447 | * might modify len. An inefficient loop for these rare times... | 462 | * might modify len. An inefficient loop for these rare times... |
448 | */ | 463 | */ |
464 | .set reorder /* DADDI_WAR */ | ||
465 | SUB src, len, 1 | ||
449 | beqz len, done | 466 | beqz len, done |
450 | SUB src, len, 1 | 467 | .set noreorder |
451 | 1: sb zero, 0(dst) | 468 | 1: sb zero, 0(dst) |
452 | ADD dst, dst, 1 | 469 | ADD dst, dst, 1 |
470 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
453 | bnez src, 1b | 471 | bnez src, 1b |
454 | SUB src, src, 1 | 472 | SUB src, src, 1 |
473 | #else | ||
474 | .set push | ||
475 | .set noat | ||
476 | li v1, 1 | ||
477 | bnez src, 1b | ||
478 | SUB src, src, v1 | ||
479 | .set pop | ||
480 | #endif | ||
455 | jr ra | 481 | jr ra |
456 | nop | 482 | nop |
457 | 483 | ||
458 | 484 | ||
459 | #define SEXC(n) \ | 485 | #define SEXC(n) \ |
460 | s_exc_p ## n ## u: \ | 486 | .set reorder; /* DADDI_WAR */ \ |
461 | jr ra; \ | 487 | s_exc_p ## n ## u: \ |
462 | ADD len, len, n*NBYTES | 488 | ADD len, len, n*NBYTES; \ |
489 | jr ra; \ | ||
490 | .set noreorder | ||
463 | 491 | ||
464 | SEXC(8) | 492 | SEXC(8) |
465 | SEXC(7) | 493 | SEXC(7) |
@@ -471,8 +499,10 @@ SEXC(2) | |||
471 | SEXC(1) | 499 | SEXC(1) |
472 | 500 | ||
473 | s_exc_p1: | 501 | s_exc_p1: |
502 | .set reorder /* DADDI_WAR */ | ||
503 | ADD len, len, 1 | ||
474 | jr ra | 504 | jr ra |
475 | ADD len, len, 1 | 505 | .set noreorder |
476 | s_exc: | 506 | s_exc: |
477 | jr ra | 507 | jr ra |
478 | nop | 508 | nop |
@@ -502,8 +532,10 @@ r_end_bytes: | |||
502 | SUB a2, a2, 0x1 | 532 | SUB a2, a2, 0x1 |
503 | sb t0, -1(a0) | 533 | sb t0, -1(a0) |
504 | SUB a1, a1, 0x1 | 534 | SUB a1, a1, 0x1 |
535 | .set reorder /* DADDI_WAR */ | ||
536 | SUB a0, a0, 0x1 | ||
505 | bnez a2, r_end_bytes | 537 | bnez a2, r_end_bytes |
506 | SUB a0, a0, 0x1 | 538 | .set noreorder |
507 | 539 | ||
508 | r_out: | 540 | r_out: |
509 | jr ra | 541 | jr ra |
@@ -514,8 +546,10 @@ r_end_bytes_up: | |||
514 | SUB a2, a2, 0x1 | 546 | SUB a2, a2, 0x1 |
515 | sb t0, (a0) | 547 | sb t0, (a0) |
516 | ADD a1, a1, 0x1 | 548 | ADD a1, a1, 0x1 |
549 | .set reorder /* DADDI_WAR */ | ||
550 | ADD a0, a0, 0x1 | ||
517 | bnez a2, r_end_bytes_up | 551 | bnez a2, r_end_bytes_up |
518 | ADD a0, a0, 0x1 | 552 | .set noreorder |
519 | 553 | ||
520 | jr ra | 554 | jr ra |
521 | move a2, zero | 555 | move a2, zero |
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 3f8b8b3d0b23..3bf38422342f 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S | |||
@@ -5,6 +5,7 @@ | |||
5 | * | 5 | * |
6 | * Copyright (C) 1998, 1999, 2000 by Ralf Baechle | 6 | * Copyright (C) 1998, 1999, 2000 by Ralf Baechle |
7 | * Copyright (C) 1999, 2000 Silicon Graphics, Inc. | 7 | * Copyright (C) 1999, 2000 Silicon Graphics, Inc. |
8 | * Copyright (C) 2007 Maciej W. Rozycki | ||
8 | */ | 9 | */ |
9 | #include <asm/asm.h> | 10 | #include <asm/asm.h> |
10 | #include <asm/asm-offsets.h> | 11 | #include <asm/asm-offsets.h> |
@@ -74,8 +75,16 @@ FEXPORT(__bzero) | |||
74 | bnez t0, small_memset | 75 | bnez t0, small_memset |
75 | andi t0, a0, LONGMASK /* aligned? */ | 76 | andi t0, a0, LONGMASK /* aligned? */ |
76 | 77 | ||
78 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
77 | beqz t0, 1f | 79 | beqz t0, 1f |
78 | PTR_SUBU t0, LONGSIZE /* alignment in bytes */ | 80 | PTR_SUBU t0, LONGSIZE /* alignment in bytes */ |
81 | #else | ||
82 | .set noat | ||
83 | li AT, LONGSIZE | ||
84 | beqz t0, 1f | ||
85 | PTR_SUBU t0, AT /* alignment in bytes */ | ||
86 | .set at | ||
87 | #endif | ||
79 | 88 | ||
80 | #ifdef __MIPSEB__ | 89 | #ifdef __MIPSEB__ |
81 | EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ | 90 | EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ |
@@ -106,7 +115,7 @@ memset_partial: | |||
106 | .set noat | 115 | .set noat |
107 | LONG_SRL AT, t0, 1 | 116 | LONG_SRL AT, t0, 1 |
108 | PTR_SUBU t1, AT | 117 | PTR_SUBU t1, AT |
109 | .set noat | 118 | .set at |
110 | #endif | 119 | #endif |
111 | jr t1 | 120 | jr t1 |
112 | PTR_ADDU a0, t0 /* dest ptr */ | 121 | PTR_ADDU a0, t0 /* dest ptr */ |
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index d16c76fbfac7..5c8fb9d6b7f9 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S | |||
@@ -41,9 +41,9 @@ FEXPORT(__strncpy_from_user_nocheck_asm) | |||
41 | beqz t0, 2f | 41 | beqz t0, 2f |
42 | sb t0, (a0) | 42 | sb t0, (a0) |
43 | PTR_ADDIU v0, 1 | 43 | PTR_ADDIU v0, 1 |
44 | bne v0, a2, 1b | ||
45 | PTR_ADDIU a0, 1 | ||
46 | .set reorder | 44 | .set reorder |
45 | PTR_ADDIU a0, 1 | ||
46 | bne v0, a2, 1b | ||
47 | 2: PTR_ADDU t0, a1, v0 | 47 | 2: PTR_ADDU t0, a1, v0 |
48 | xor t0, a1 | 48 | xor t0, a1 |
49 | bltz t0, fault | 49 | bltz t0, fault |