aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips
diff options
context:
space:
mode:
authorMaciej W. Rozycki <macro@linux-mips.org>2007-10-23 07:43:25 -0400
committerRalf Baechle <ralf@linux-mips.org>2008-01-29 05:14:55 -0500
commit619b6e18fce20e4b2d0082cde989f37e1be7b3e1 (patch)
treeb56120c0de584f6672aa7e4533163435613f5203 /arch/mips
parent20d60d9973c3b441902b0a3f4f6f7e7ade08f77d (diff)
[MIPS] R4000/R4400 daddiu erratum workaround
This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at=<reg> feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips')
-rw-r--r--arch/mips/kernel/genex.S8
-rw-r--r--arch/mips/lib/csum_partial.S61
-rw-r--r--arch/mips/lib/memcpy-inatomic.S25
-rw-r--r--arch/mips/lib/memcpy.S60
-rw-r--r--arch/mips/lib/memset.S11
-rw-r--r--arch/mips/lib/strncpy_user.S4
-rw-r--r--arch/mips/mm/pg-r4k.c66
-rw-r--r--arch/mips/mm/tlbex.c42
8 files changed, 198 insertions, 79 deletions
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index e76a76bf0b3d..c6ada98ee042 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -6,7 +6,7 @@
6 * Copyright (C) 1994 - 2000, 2001, 2003 Ralf Baechle 6 * Copyright (C) 1994 - 2000, 2001, 2003 Ralf Baechle
7 * Copyright (C) 1999, 2000 Silicon Graphics, Inc. 7 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
8 * Copyright (C) 2001 MIPS Technologies, Inc. 8 * Copyright (C) 2001 MIPS Technologies, Inc.
9 * Copyright (C) 2002 Maciej W. Rozycki 9 * Copyright (C) 2002, 2007 Maciej W. Rozycki
10 */ 10 */
11#include <linux/init.h> 11#include <linux/init.h>
12 12
@@ -471,7 +471,13 @@ NESTED(nmi_handler, PT_SIZE, sp)
471 jr k0 471 jr k0
472 rfe 472 rfe
473#else 473#else
474#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
474 LONG_ADDIU k0, 4 /* stall on $k0 */ 475 LONG_ADDIU k0, 4 /* stall on $k0 */
476#else
477 .set at=v1
478 LONG_ADDIU k0, 4
479 .set noat
480#endif
475 MTC0 k0, CP0_EPC 481 MTC0 k0, CP0_EPC
476 /* I hope three instructions between MTC0 and ERET are enough... */ 482 /* I hope three instructions between MTC0 and ERET are enough... */
477 ori k1, _THREAD_MASK 483 ori k1, _THREAD_MASK
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index c0a77fe038be..957a82484e3e 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -7,6 +7,7 @@
7 * 7 *
8 * Copyright (C) 1998, 1999 Ralf Baechle 8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc. 9 * Copyright (C) 1999 Silicon Graphics, Inc.
10 * Copyright (C) 2007 Maciej W. Rozycki
10 */ 11 */
11#include <linux/errno.h> 12#include <linux/errno.h>
12#include <asm/asm.h> 13#include <asm/asm.h>
@@ -52,9 +53,12 @@
52#define UNIT(unit) ((unit)*NBYTES) 53#define UNIT(unit) ((unit)*NBYTES)
53 54
54#define ADDC(sum,reg) \ 55#define ADDC(sum,reg) \
56 .set push; \
57 .set noat; \
55 ADD sum, reg; \ 58 ADD sum, reg; \
56 sltu v1, sum, reg; \ 59 sltu v1, sum, reg; \
57 ADD sum, v1 60 ADD sum, v1; \
61 .set pop
58 62
59#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ 63#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
60 LOAD _t0, (offset + UNIT(0))(src); \ 64 LOAD _t0, (offset + UNIT(0))(src); \
@@ -178,8 +182,10 @@ move_128bytes:
178 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) 182 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
179 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) 183 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
180 LONG_SUBU t8, t8, 0x01 184 LONG_SUBU t8, t8, 0x01
185 .set reorder /* DADDI_WAR */
186 PTR_ADDU src, src, 0x80
181 bnez t8, move_128bytes 187 bnez t8, move_128bytes
182 PTR_ADDU src, src, 0x80 188 .set noreorder
183 189
1841: 1901:
185 beqz t2, 1f 191 beqz t2, 1f
@@ -208,8 +214,10 @@ end_words:
208 lw t0, (src) 214 lw t0, (src)
209 LONG_SUBU t8, t8, 0x1 215 LONG_SUBU t8, t8, 0x1
210 ADDC(sum, t0) 216 ADDC(sum, t0)
217 .set reorder /* DADDI_WAR */
218 PTR_ADDU src, src, 0x4
211 bnez t8, end_words 219 bnez t8, end_words
212 PTR_ADDU src, src, 0x4 220 .set noreorder
213 221
214/* unknown src alignment and < 8 bytes to go */ 222/* unknown src alignment and < 8 bytes to go */
215small_csumcpy: 223small_csumcpy:
@@ -246,6 +254,8 @@ small_csumcpy:
2461: ADDC(sum, t1) 2541: ADDC(sum, t1)
247 255
248 /* fold checksum */ 256 /* fold checksum */
257 .set push
258 .set noat
249#ifdef USE_DOUBLE 259#ifdef USE_DOUBLE
250 dsll32 v1, sum, 0 260 dsll32 v1, sum, 0
251 daddu sum, v1 261 daddu sum, v1
@@ -266,6 +276,7 @@ small_csumcpy:
266 srl sum, sum, 8 276 srl sum, sum, 8
267 or sum, v1 277 or sum, v1
268 andi sum, 0xffff 278 andi sum, 0xffff
279 .set pop
2691: 2801:
270 .set reorder 281 .set reorder
271 /* Add the passed partial csum. */ 282 /* Add the passed partial csum. */
@@ -373,7 +384,11 @@ small_csumcpy:
373 384
374#define ADDRMASK (NBYTES-1) 385#define ADDRMASK (NBYTES-1)
375 386
387#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
376 .set noat 388 .set noat
389#else
390 .set at=v1
391#endif
377 392
378LEAF(__csum_partial_copy_user) 393LEAF(__csum_partial_copy_user)
379 PTR_ADDU AT, src, len /* See (1) above. */ 394 PTR_ADDU AT, src, len /* See (1) above. */
@@ -441,8 +456,10 @@ EXC( STORE t6, UNIT(6)(dst), s_exc)
441 ADDC(sum, t6) 456 ADDC(sum, t6)
442EXC( STORE t7, UNIT(7)(dst), s_exc) 457EXC( STORE t7, UNIT(7)(dst), s_exc)
443 ADDC(sum, t7) 458 ADDC(sum, t7)
459 .set reorder /* DADDI_WAR */
460 ADD dst, dst, 8*NBYTES
444 bgez len, 1b 461 bgez len, 1b
445 ADD dst, dst, 8*NBYTES 462 .set noreorder
446 ADD len, 8*NBYTES # revert len (see above) 463 ADD len, 8*NBYTES # revert len (see above)
447 464
448 /* 465 /*
@@ -471,8 +488,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
471 ADDC(sum, t2) 488 ADDC(sum, t2)
472EXC( STORE t3, UNIT(3)(dst), s_exc) 489EXC( STORE t3, UNIT(3)(dst), s_exc)
473 ADDC(sum, t3) 490 ADDC(sum, t3)
491 .set reorder /* DADDI_WAR */
492 ADD dst, dst, 4*NBYTES
474 beqz len, done 493 beqz len, done
475 ADD dst, dst, 4*NBYTES 494 .set noreorder
476less_than_4units: 495less_than_4units:
477 /* 496 /*
478 * rem = len % NBYTES 497 * rem = len % NBYTES
@@ -485,8 +504,10 @@ EXC( LOAD t0, 0(src), l_exc)
485 SUB len, len, NBYTES 504 SUB len, len, NBYTES
486EXC( STORE t0, 0(dst), s_exc) 505EXC( STORE t0, 0(dst), s_exc)
487 ADDC(sum, t0) 506 ADDC(sum, t0)
507 .set reorder /* DADDI_WAR */
508 ADD dst, dst, NBYTES
488 bne rem, len, 1b 509 bne rem, len, 1b
489 ADD dst, dst, NBYTES 510 .set noreorder
490 511
491 /* 512 /*
492 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 513 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
@@ -572,8 +593,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
572 ADDC(sum, t2) 593 ADDC(sum, t2)
573EXC( STORE t3, UNIT(3)(dst), s_exc) 594EXC( STORE t3, UNIT(3)(dst), s_exc)
574 ADDC(sum, t3) 595 ADDC(sum, t3)
596 .set reorder /* DADDI_WAR */
597 ADD dst, dst, 4*NBYTES
575 bne len, rem, 1b 598 bne len, rem, 1b
576 ADD dst, dst, 4*NBYTES 599 .set noreorder
577 600
578cleanup_src_unaligned: 601cleanup_src_unaligned:
579 beqz len, done 602 beqz len, done
@@ -587,8 +610,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
587 SUB len, len, NBYTES 610 SUB len, len, NBYTES
588EXC( STORE t0, 0(dst), s_exc) 611EXC( STORE t0, 0(dst), s_exc)
589 ADDC(sum, t0) 612 ADDC(sum, t0)
613 .set reorder /* DADDI_WAR */
614 ADD dst, dst, NBYTES
590 bne len, rem, 1b 615 bne len, rem, 1b
591 ADD dst, dst, NBYTES 616 .set noreorder
592 617
593copy_bytes_checklen: 618copy_bytes_checklen:
594 beqz len, done 619 beqz len, done
@@ -631,6 +656,8 @@ copy_bytes_done:
631 ADDC(sum, t2) 656 ADDC(sum, t2)
632done: 657done:
633 /* fold checksum */ 658 /* fold checksum */
659 .set push
660 .set noat
634#ifdef USE_DOUBLE 661#ifdef USE_DOUBLE
635 dsll32 v1, sum, 0 662 dsll32 v1, sum, 0
636 daddu sum, v1 663 daddu sum, v1
@@ -651,6 +678,7 @@ done:
651 srl sum, sum, 8 678 srl sum, sum, 8
652 or sum, v1 679 or sum, v1
653 andi sum, 0xffff 680 andi sum, 0xffff
681 .set pop
6541: 6821:
655 .set reorder 683 .set reorder
656 ADDC(sum, psum) 684 ADDC(sum, psum)
@@ -678,8 +706,10 @@ EXC( lbu t1, 0(src), l_exc)
678 SLLV t1, t1, t2 706 SLLV t1, t1, t2
679 addu t2, SHIFT_INC 707 addu t2, SHIFT_INC
680 ADDC(sum, t1) 708 ADDC(sum, t1)
709 .set reorder /* DADDI_WAR */
710 ADD dst, dst, 1
681 bne src, t0, 1b 711 bne src, t0, 1b
682 ADD dst, dst, 1 712 .set noreorder
683l_exc: 713l_exc:
684 LOAD t0, TI_TASK($28) 714 LOAD t0, TI_TASK($28)
685 nop 715 nop
@@ -697,12 +727,22 @@ l_exc:
697 * Clear len bytes starting at dst. Can't call __bzero because it 727 * Clear len bytes starting at dst. Can't call __bzero because it
698 * might modify len. An inefficient loop for these rare times... 728 * might modify len. An inefficient loop for these rare times...
699 */ 729 */
730 .set reorder /* DADDI_WAR */
731 SUB src, len, 1
700 beqz len, done 732 beqz len, done
701 SUB src, len, 1 733 .set noreorder
7021: sb zero, 0(dst) 7341: sb zero, 0(dst)
703 ADD dst, dst, 1 735 ADD dst, dst, 1
736 .set push
737 .set noat
738#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
704 bnez src, 1b 739 bnez src, 1b
705 SUB src, src, 1 740 SUB src, src, 1
741#else
742 li v1, 1
743 bnez src, 1b
744 SUB src, src, v1
745#endif
706 li v1, -EFAULT 746 li v1, -EFAULT
707 b done 747 b done
708 sw v1, (errptr) 748 sw v1, (errptr)
@@ -712,4 +752,5 @@ s_exc:
712 li v1, -EFAULT 752 li v1, -EFAULT
713 jr ra 753 jr ra
714 sw v1, (errptr) 754 sw v1, (errptr)
755 .set pop
715 END(__csum_partial_copy_user) 756 END(__csum_partial_copy_user)
diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S
index 3a534b2baa0f..d1b08f5d6860 100644
--- a/arch/mips/lib/memcpy-inatomic.S
+++ b/arch/mips/lib/memcpy-inatomic.S
@@ -9,6 +9,7 @@
9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. 9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
10 * Copyright (C) 2002 Broadcom, Inc. 10 * Copyright (C) 2002 Broadcom, Inc.
11 * memcpy/copy_user author: Mark Vandevoorde 11 * memcpy/copy_user author: Mark Vandevoorde
12 * Copyright (C) 2007 Maciej W. Rozycki
12 * 13 *
13 * Mnemonic names for arguments to memcpy/__copy_user 14 * Mnemonic names for arguments to memcpy/__copy_user
14 */ 15 */
@@ -175,7 +176,11 @@
175 176
176 .text 177 .text
177 .set noreorder 178 .set noreorder
179#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
178 .set noat 180 .set noat
181#else
182 .set at=v1
183#endif
179 184
180/* 185/*
181 * A combined memcpy/__copy_user 186 * A combined memcpy/__copy_user
@@ -268,8 +273,10 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
268 STORE t1, UNIT(1)(dst) 273 STORE t1, UNIT(1)(dst)
269 STORE t2, UNIT(2)(dst) 274 STORE t2, UNIT(2)(dst)
270 STORE t3, UNIT(3)(dst) 275 STORE t3, UNIT(3)(dst)
276 .set reorder /* DADDI_WAR */
277 ADD dst, dst, 4*NBYTES
271 beqz len, done 278 beqz len, done
272 ADD dst, dst, 4*NBYTES 279 .set noreorder
273less_than_4units: 280less_than_4units:
274 /* 281 /*
275 * rem = len % NBYTES 282 * rem = len % NBYTES
@@ -281,8 +288,10 @@ EXC( LOAD t0, 0(src), l_exc)
281 ADD src, src, NBYTES 288 ADD src, src, NBYTES
282 SUB len, len, NBYTES 289 SUB len, len, NBYTES
283 STORE t0, 0(dst) 290 STORE t0, 0(dst)
291 .set reorder /* DADDI_WAR */
292 ADD dst, dst, NBYTES
284 bne rem, len, 1b 293 bne rem, len, 1b
285 ADD dst, dst, NBYTES 294 .set noreorder
286 295
287 /* 296 /*
288 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 297 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
@@ -361,8 +370,10 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy)
361 STORE t2, UNIT(2)(dst) 370 STORE t2, UNIT(2)(dst)
362 STORE t3, UNIT(3)(dst) 371 STORE t3, UNIT(3)(dst)
363 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) 372 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
373 .set reorder /* DADDI_WAR */
374 ADD dst, dst, 4*NBYTES
364 bne len, rem, 1b 375 bne len, rem, 1b
365 ADD dst, dst, 4*NBYTES 376 .set noreorder
366 377
367cleanup_src_unaligned: 378cleanup_src_unaligned:
368 beqz len, done 379 beqz len, done
@@ -375,8 +386,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
375 ADD src, src, NBYTES 386 ADD src, src, NBYTES
376 SUB len, len, NBYTES 387 SUB len, len, NBYTES
377 STORE t0, 0(dst) 388 STORE t0, 0(dst)
389 .set reorder /* DADDI_WAR */
390 ADD dst, dst, NBYTES
378 bne len, rem, 1b 391 bne len, rem, 1b
379 ADD dst, dst, NBYTES 392 .set noreorder
380 393
381copy_bytes_checklen: 394copy_bytes_checklen:
382 beqz len, done 395 beqz len, done
@@ -424,8 +437,10 @@ l_exc_copy:
424EXC( lb t1, 0(src), l_exc) 437EXC( lb t1, 0(src), l_exc)
425 ADD src, src, 1 438 ADD src, src, 1
426 sb t1, 0(dst) # can't fault -- we're copy_from_user 439 sb t1, 0(dst) # can't fault -- we're copy_from_user
440 .set reorder /* DADDI_WAR */
441 ADD dst, dst, 1
427 bne src, t0, 1b 442 bne src, t0, 1b
428 ADD dst, dst, 1 443 .set noreorder
429l_exc: 444l_exc:
430 LOAD t0, TI_TASK($28) 445 LOAD t0, TI_TASK($28)
431 nop 446 nop
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index a526c62cb76a..aded7b159052 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -9,6 +9,7 @@
9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. 9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
10 * Copyright (C) 2002 Broadcom, Inc. 10 * Copyright (C) 2002 Broadcom, Inc.
11 * memcpy/copy_user author: Mark Vandevoorde 11 * memcpy/copy_user author: Mark Vandevoorde
12 * Copyright (C) 2007 Maciej W. Rozycki
12 * 13 *
13 * Mnemonic names for arguments to memcpy/__copy_user 14 * Mnemonic names for arguments to memcpy/__copy_user
14 */ 15 */
@@ -175,7 +176,11 @@
175 176
176 .text 177 .text
177 .set noreorder 178 .set noreorder
179#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
178 .set noat 180 .set noat
181#else
182 .set at=v1
183#endif
179 184
180/* 185/*
181 * A combined memcpy/__copy_user 186 * A combined memcpy/__copy_user
@@ -271,8 +276,10 @@ EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
271EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 276EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
272EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 277EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
273EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 278EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
279 .set reorder /* DADDI_WAR */
280 ADD dst, dst, 4*NBYTES
274 beqz len, done 281 beqz len, done
275 ADD dst, dst, 4*NBYTES 282 .set noreorder
276less_than_4units: 283less_than_4units:
277 /* 284 /*
278 * rem = len % NBYTES 285 * rem = len % NBYTES
@@ -284,8 +291,10 @@ EXC( LOAD t0, 0(src), l_exc)
284 ADD src, src, NBYTES 291 ADD src, src, NBYTES
285 SUB len, len, NBYTES 292 SUB len, len, NBYTES
286EXC( STORE t0, 0(dst), s_exc_p1u) 293EXC( STORE t0, 0(dst), s_exc_p1u)
294 .set reorder /* DADDI_WAR */
295 ADD dst, dst, NBYTES
287 bne rem, len, 1b 296 bne rem, len, 1b
288 ADD dst, dst, NBYTES 297 .set noreorder
289 298
290 /* 299 /*
291 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 300 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
@@ -364,8 +373,10 @@ EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
364EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 373EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
365EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 374EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
366 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) 375 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
376 .set reorder /* DADDI_WAR */
377 ADD dst, dst, 4*NBYTES
367 bne len, rem, 1b 378 bne len, rem, 1b
368 ADD dst, dst, 4*NBYTES 379 .set noreorder
369 380
370cleanup_src_unaligned: 381cleanup_src_unaligned:
371 beqz len, done 382 beqz len, done
@@ -378,8 +389,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
378 ADD src, src, NBYTES 389 ADD src, src, NBYTES
379 SUB len, len, NBYTES 390 SUB len, len, NBYTES
380EXC( STORE t0, 0(dst), s_exc_p1u) 391EXC( STORE t0, 0(dst), s_exc_p1u)
392 .set reorder /* DADDI_WAR */
393 ADD dst, dst, NBYTES
381 bne len, rem, 1b 394 bne len, rem, 1b
382 ADD dst, dst, NBYTES 395 .set noreorder
383 396
384copy_bytes_checklen: 397copy_bytes_checklen:
385 beqz len, done 398 beqz len, done
@@ -427,8 +440,10 @@ l_exc_copy:
427EXC( lb t1, 0(src), l_exc) 440EXC( lb t1, 0(src), l_exc)
428 ADD src, src, 1 441 ADD src, src, 1
429 sb t1, 0(dst) # can't fault -- we're copy_from_user 442 sb t1, 0(dst) # can't fault -- we're copy_from_user
443 .set reorder /* DADDI_WAR */
444 ADD dst, dst, 1
430 bne src, t0, 1b 445 bne src, t0, 1b
431 ADD dst, dst, 1 446 .set noreorder
432l_exc: 447l_exc:
433 LOAD t0, TI_TASK($28) 448 LOAD t0, TI_TASK($28)
434 nop 449 nop
@@ -446,20 +461,33 @@ l_exc:
446 * Clear len bytes starting at dst. Can't call __bzero because it 461 * Clear len bytes starting at dst. Can't call __bzero because it
447 * might modify len. An inefficient loop for these rare times... 462 * might modify len. An inefficient loop for these rare times...
448 */ 463 */
464 .set reorder /* DADDI_WAR */
465 SUB src, len, 1
449 beqz len, done 466 beqz len, done
450 SUB src, len, 1 467 .set noreorder
4511: sb zero, 0(dst) 4681: sb zero, 0(dst)
452 ADD dst, dst, 1 469 ADD dst, dst, 1
470#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
453 bnez src, 1b 471 bnez src, 1b
454 SUB src, src, 1 472 SUB src, src, 1
473#else
474 .set push
475 .set noat
476 li v1, 1
477 bnez src, 1b
478 SUB src, src, v1
479 .set pop
480#endif
455 jr ra 481 jr ra
456 nop 482 nop
457 483
458 484
459#define SEXC(n) \ 485#define SEXC(n) \
460s_exc_p ## n ## u: \ 486 .set reorder; /* DADDI_WAR */ \
461 jr ra; \ 487s_exc_p ## n ## u: \
462 ADD len, len, n*NBYTES 488 ADD len, len, n*NBYTES; \
489 jr ra; \
490 .set noreorder
463 491
464SEXC(8) 492SEXC(8)
465SEXC(7) 493SEXC(7)
@@ -471,8 +499,10 @@ SEXC(2)
471SEXC(1) 499SEXC(1)
472 500
473s_exc_p1: 501s_exc_p1:
502 .set reorder /* DADDI_WAR */
503 ADD len, len, 1
474 jr ra 504 jr ra
475 ADD len, len, 1 505 .set noreorder
476s_exc: 506s_exc:
477 jr ra 507 jr ra
478 nop 508 nop
@@ -502,8 +532,10 @@ r_end_bytes:
502 SUB a2, a2, 0x1 532 SUB a2, a2, 0x1
503 sb t0, -1(a0) 533 sb t0, -1(a0)
504 SUB a1, a1, 0x1 534 SUB a1, a1, 0x1
535 .set reorder /* DADDI_WAR */
536 SUB a0, a0, 0x1
505 bnez a2, r_end_bytes 537 bnez a2, r_end_bytes
506 SUB a0, a0, 0x1 538 .set noreorder
507 539
508r_out: 540r_out:
509 jr ra 541 jr ra
@@ -514,8 +546,10 @@ r_end_bytes_up:
514 SUB a2, a2, 0x1 546 SUB a2, a2, 0x1
515 sb t0, (a0) 547 sb t0, (a0)
516 ADD a1, a1, 0x1 548 ADD a1, a1, 0x1
549 .set reorder /* DADDI_WAR */
550 ADD a0, a0, 0x1
517 bnez a2, r_end_bytes_up 551 bnez a2, r_end_bytes_up
518 ADD a0, a0, 0x1 552 .set noreorder
519 553
520 jr ra 554 jr ra
521 move a2, zero 555 move a2, zero
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 3f8b8b3d0b23..3bf38422342f 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -5,6 +5,7 @@
5 * 5 *
6 * Copyright (C) 1998, 1999, 2000 by Ralf Baechle 6 * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
7 * Copyright (C) 1999, 2000 Silicon Graphics, Inc. 7 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
8 * Copyright (C) 2007 Maciej W. Rozycki
8 */ 9 */
9#include <asm/asm.h> 10#include <asm/asm.h>
10#include <asm/asm-offsets.h> 11#include <asm/asm-offsets.h>
@@ -74,8 +75,16 @@ FEXPORT(__bzero)
74 bnez t0, small_memset 75 bnez t0, small_memset
75 andi t0, a0, LONGMASK /* aligned? */ 76 andi t0, a0, LONGMASK /* aligned? */
76 77
78#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
77 beqz t0, 1f 79 beqz t0, 1f
78 PTR_SUBU t0, LONGSIZE /* alignment in bytes */ 80 PTR_SUBU t0, LONGSIZE /* alignment in bytes */
81#else
82 .set noat
83 li AT, LONGSIZE
84 beqz t0, 1f
85 PTR_SUBU t0, AT /* alignment in bytes */
86 .set at
87#endif
79 88
80#ifdef __MIPSEB__ 89#ifdef __MIPSEB__
81 EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ 90 EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */
@@ -106,7 +115,7 @@ memset_partial:
106 .set noat 115 .set noat
107 LONG_SRL AT, t0, 1 116 LONG_SRL AT, t0, 1
108 PTR_SUBU t1, AT 117 PTR_SUBU t1, AT
109 .set noat 118 .set at
110#endif 119#endif
111 jr t1 120 jr t1
112 PTR_ADDU a0, t0 /* dest ptr */ 121 PTR_ADDU a0, t0 /* dest ptr */
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
index d16c76fbfac7..5c8fb9d6b7f9 100644
--- a/arch/mips/lib/strncpy_user.S
+++ b/arch/mips/lib/strncpy_user.S
@@ -41,9 +41,9 @@ FEXPORT(__strncpy_from_user_nocheck_asm)
41 beqz t0, 2f 41 beqz t0, 2f
42 sb t0, (a0) 42 sb t0, (a0)
43 PTR_ADDIU v0, 1 43 PTR_ADDIU v0, 1
44 bne v0, a2, 1b
45 PTR_ADDIU a0, 1
46 .set reorder 44 .set reorder
45 PTR_ADDIU a0, 1
46 bne v0, a2, 1b
472: PTR_ADDU t0, a1, v0 472: PTR_ADDU t0, a1, v0
48 xor t0, a1 48 xor t0, a1
49 bltz t0, fault 49 bltz t0, fault
diff --git a/arch/mips/mm/pg-r4k.c b/arch/mips/mm/pg-r4k.c
index 4f770ac885ce..9185fbf37c0d 100644
--- a/arch/mips/mm/pg-r4k.c
+++ b/arch/mips/mm/pg-r4k.c
@@ -4,6 +4,7 @@
4 * for more details. 4 * for more details.
5 * 5 *
6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) 6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
7 * Copyright (C) 2007 Maciej W. Rozycki
7 */ 8 */
8#include <linux/init.h> 9#include <linux/init.h>
9#include <linux/kernel.h> 10#include <linux/kernel.h>
@@ -12,6 +13,7 @@
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
14 15
16#include <asm/bugs.h>
15#include <asm/cacheops.h> 17#include <asm/cacheops.h>
16#include <asm/inst.h> 18#include <asm/inst.h>
17#include <asm/io.h> 19#include <asm/io.h>
@@ -255,64 +257,58 @@ static inline void build_store_reg(int reg)
255 __build_store_reg(reg); 257 __build_store_reg(reg);
256} 258}
257 259
258static inline void build_addiu_a2_a0(unsigned long offset) 260static inline void build_addiu_rt_rs(unsigned int rt, unsigned int rs,
261 unsigned long offset)
259{ 262{
260 union mips_instruction mi; 263 union mips_instruction mi;
261 264
262 BUG_ON(offset > 0x7fff); 265 BUG_ON(offset > 0x7fff);
263 266
264 mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op; 267 if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
265 mi.i_format.rs = 4; /* $a0 */ 268 mi.i_format.opcode = addiu_op;
266 mi.i_format.rt = 6; /* $a2 */ 269 mi.i_format.rs = 0; /* $zero */
267 mi.i_format.simmediate = offset; 270 mi.i_format.rt = 25; /* $t9 */
271 mi.i_format.simmediate = offset;
272 emit_instruction(mi);
268 273
274 mi.r_format.opcode = spec_op;
275 mi.r_format.rs = rs;
276 mi.r_format.rt = 25; /* $t9 */
277 mi.r_format.rd = rt;
278 mi.r_format.re = 0;
279 mi.r_format.func = daddu_op;
280 } else {
281 mi.i_format.opcode = cpu_has_64bit_gp_regs ?
282 daddiu_op : addiu_op;
283 mi.i_format.rs = rs;
284 mi.i_format.rt = rt;
285 mi.i_format.simmediate = offset;
286 }
269 emit_instruction(mi); 287 emit_instruction(mi);
270} 288}
271 289
272static inline void build_addiu_a2(unsigned long offset) 290static inline void build_addiu_a2_a0(unsigned long offset)
273{ 291{
274 union mips_instruction mi; 292 build_addiu_rt_rs(6, 4, offset); /* $a2, $a0, offset */
275 293}
276 BUG_ON(offset > 0x7fff);
277
278 mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
279 mi.i_format.rs = 6; /* $a2 */
280 mi.i_format.rt = 6; /* $a2 */
281 mi.i_format.simmediate = offset;
282 294
283 emit_instruction(mi); 295static inline void build_addiu_a2(unsigned long offset)
296{
297 build_addiu_rt_rs(6, 6, offset); /* $a2, $a2, offset */
284} 298}
285 299
286static inline void build_addiu_a1(unsigned long offset) 300static inline void build_addiu_a1(unsigned long offset)
287{ 301{
288 union mips_instruction mi; 302 build_addiu_rt_rs(5, 5, offset); /* $a1, $a1, offset */
289
290 BUG_ON(offset > 0x7fff);
291
292 mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
293 mi.i_format.rs = 5; /* $a1 */
294 mi.i_format.rt = 5; /* $a1 */
295 mi.i_format.simmediate = offset;
296 303
297 load_offset -= offset; 304 load_offset -= offset;
298
299 emit_instruction(mi);
300} 305}
301 306
302static inline void build_addiu_a0(unsigned long offset) 307static inline void build_addiu_a0(unsigned long offset)
303{ 308{
304 union mips_instruction mi; 309 build_addiu_rt_rs(4, 4, offset); /* $a0, $a0, offset */
305
306 BUG_ON(offset > 0x7fff);
307
308 mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
309 mi.i_format.rs = 4; /* $a0 */
310 mi.i_format.rt = 4; /* $a0 */
311 mi.i_format.simmediate = offset;
312 310
313 store_offset -= offset; 311 store_offset -= offset;
314
315 emit_instruction(mi);
316} 312}
317 313
318static inline void build_bne(unsigned int *dest) 314static inline void build_bne(unsigned int *dest)
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 511107f92d9c..f8925ba0b39e 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -6,7 +6,7 @@
6 * Synthesize TLB refill handlers at runtime. 6 * Synthesize TLB refill handlers at runtime.
7 * 7 *
8 * Copyright (C) 2004,2005,2006 by Thiemo Seufer 8 * Copyright (C) 2004,2005,2006 by Thiemo Seufer
9 * Copyright (C) 2005 Maciej W. Rozycki 9 * Copyright (C) 2005, 2007 Maciej W. Rozycki
10 * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) 10 * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org)
11 * 11 *
12 * ... and the days got worse and worse and now you see 12 * ... and the days got worse and worse and now you see
@@ -27,6 +27,7 @@
27#include <linux/string.h> 27#include <linux/string.h>
28#include <linux/init.h> 28#include <linux/init.h>
29 29
30#include <asm/bugs.h>
30#include <asm/pgtable.h> 31#include <asm/pgtable.h>
31#include <asm/cacheflush.h> 32#include <asm/cacheflush.h>
32#include <asm/mmu_context.h> 33#include <asm/mmu_context.h>
@@ -293,7 +294,7 @@ static void __init build_insn(u32 **buf, enum opcode opc, ...)
293 break; 294 break;
294 } 295 }
295 296
296 if (!ip) 297 if (!ip || (opc == insn_daddiu && r4k_daddiu_bug()))
297 panic("Unsupported TLB synthesizer instruction %d", opc); 298 panic("Unsupported TLB synthesizer instruction %d", opc);
298 299
299 op = ip->match; 300 op = ip->match;
@@ -525,23 +526,33 @@ L_LA(_r3000_write_probe_fail)
525#define i_ssnop(buf) i_sll(buf, 0, 0, 1) 526#define i_ssnop(buf) i_sll(buf, 0, 0, 1)
526#define i_ehb(buf) i_sll(buf, 0, 0, 3) 527#define i_ehb(buf) i_sll(buf, 0, 0, 3)
527 528
528#ifdef CONFIG_64BIT
529static __init int __maybe_unused in_compat_space_p(long addr) 529static __init int __maybe_unused in_compat_space_p(long addr)
530{ 530{
531 /* Is this address in 32bit compat space? */ 531 /* Is this address in 32bit compat space? */
532#ifdef CONFIG_64BIT
532 return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L); 533 return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L);
534#else
535 return 1;
536#endif
533} 537}
534 538
535static __init int __maybe_unused rel_highest(long val) 539static __init int __maybe_unused rel_highest(long val)
536{ 540{
541#ifdef CONFIG_64BIT
537 return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000; 542 return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000;
543#else
544 return 0;
545#endif
538} 546}
539 547
540static __init int __maybe_unused rel_higher(long val) 548static __init int __maybe_unused rel_higher(long val)
541{ 549{
550#ifdef CONFIG_64BIT
542 return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000; 551 return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000;
543} 552#else
553 return 0;
544#endif 554#endif
555}
545 556
546static __init int rel_hi(long val) 557static __init int rel_hi(long val)
547{ 558{
@@ -555,7 +566,6 @@ static __init int rel_lo(long val)
555 566
556static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr) 567static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr)
557{ 568{
558#ifdef CONFIG_64BIT
559 if (!in_compat_space_p(addr)) { 569 if (!in_compat_space_p(addr)) {
560 i_lui(buf, rs, rel_highest(addr)); 570 i_lui(buf, rs, rel_highest(addr));
561 if (rel_higher(addr)) 571 if (rel_higher(addr))
@@ -567,16 +577,18 @@ static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr)
567 } else 577 } else
568 i_dsll32(buf, rs, rs, 0); 578 i_dsll32(buf, rs, rs, 0);
569 } else 579 } else
570#endif
571 i_lui(buf, rs, rel_hi(addr)); 580 i_lui(buf, rs, rel_hi(addr));
572} 581}
573 582
574static __init void __maybe_unused i_LA(u32 **buf, unsigned int rs, 583static __init void __maybe_unused i_LA(u32 **buf, unsigned int rs, long addr)
575 long addr)
576{ 584{
577 i_LA_mostly(buf, rs, addr); 585 i_LA_mostly(buf, rs, addr);
578 if (rel_lo(addr)) 586 if (rel_lo(addr)) {
579 i_ADDIU(buf, rs, rs, rel_lo(addr)); 587 if (!in_compat_space_p(addr))
588 i_daddiu(buf, rs, rs, rel_lo(addr));
589 else
590 i_addiu(buf, rs, rs, rel_lo(addr));
591 }
580} 592}
581 593
582/* 594/*
@@ -1085,7 +1097,10 @@ build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r,
1085 } else { 1097 } else {
1086 i_LA_mostly(p, ptr, modd); 1098 i_LA_mostly(p, ptr, modd);
1087 il_b(p, r, label_vmalloc_done); 1099 il_b(p, r, label_vmalloc_done);
1088 i_daddiu(p, ptr, ptr, rel_lo(modd)); 1100 if (in_compat_space_p(modd))
1101 i_addiu(p, ptr, ptr, rel_lo(modd));
1102 else
1103 i_daddiu(p, ptr, ptr, rel_lo(modd));
1089 } 1104 }
1090 1105
1091 l_vmalloc(l, *p); 1106 l_vmalloc(l, *p);
@@ -1106,7 +1121,10 @@ build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r,
1106 } else { 1121 } else {
1107 i_LA_mostly(p, ptr, swpd); 1122 i_LA_mostly(p, ptr, swpd);
1108 il_b(p, r, label_vmalloc_done); 1123 il_b(p, r, label_vmalloc_done);
1109 i_daddiu(p, ptr, ptr, rel_lo(swpd)); 1124 if (in_compat_space_p(swpd))
1125 i_addiu(p, ptr, ptr, rel_lo(swpd));
1126 else
1127 i_daddiu(p, ptr, ptr, rel_lo(swpd));
1110 } 1128 }
1111} 1129}
1112 1130