aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips/lib/memcpy.S
diff options
context:
space:
mode:
authorMaciej W. Rozycki <macro@linux-mips.org>2007-10-23 07:43:25 -0400
committerRalf Baechle <ralf@linux-mips.org>2008-01-29 05:14:55 -0500
commit619b6e18fce20e4b2d0082cde989f37e1be7b3e1 (patch)
treeb56120c0de584f6672aa7e4533163435613f5203 /arch/mips/lib/memcpy.S
parent20d60d9973c3b441902b0a3f4f6f7e7ade08f77d (diff)
[MIPS] R4000/R4400 daddiu erratum workaround
This complements the generic R4000/R4400 errata workaround code and adds bits for the daddiu problem. In most places it just modifies handwritten assembly code so that the assembler is allowed to use a temporary register as daddiu may now be treated as a macro that expands to a sequence of li and daddu. It is the AT register or, where AT is unavailable or used explicitly for another purpose, an explicitly-named register is selected, using the .set at=<reg> feature added recently to gas. This feature is only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the workaround remains disabled, the required version of binutils stays unchanged. Similarly, daddiu instructions put in branch delay slots in noreorder fragments are now taken out of them and the assembler is allowed to reorder them itself as possible (which it does making the whole idea of scheduling them into delay slots manually questionable). Also in the very few places where such a simple conversion was not possible, a handcoded longer sequence is implemented. Other than that there are changes to code responsible for building the TLB fault and page clear/copy handlers to avoid daddiu as appropriate. These are only effective if the erratum is verified to be present at the run time. Finally there is a trivial update to __delay(), because it uses daddiu in a branch delay slot. Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib/memcpy.S')
-rw-r--r--arch/mips/lib/memcpy.S60
1 files changed, 47 insertions, 13 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index a526c62cb76a..aded7b159052 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -9,6 +9,7 @@
9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. 9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
10 * Copyright (C) 2002 Broadcom, Inc. 10 * Copyright (C) 2002 Broadcom, Inc.
11 * memcpy/copy_user author: Mark Vandevoorde 11 * memcpy/copy_user author: Mark Vandevoorde
12 * Copyright (C) 2007 Maciej W. Rozycki
12 * 13 *
13 * Mnemonic names for arguments to memcpy/__copy_user 14 * Mnemonic names for arguments to memcpy/__copy_user
14 */ 15 */
@@ -175,7 +176,11 @@
175 176
176 .text 177 .text
177 .set noreorder 178 .set noreorder
179#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
178 .set noat 180 .set noat
181#else
182 .set at=v1
183#endif
179 184
180/* 185/*
181 * A combined memcpy/__copy_user 186 * A combined memcpy/__copy_user
@@ -271,8 +276,10 @@ EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
271EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 276EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
272EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 277EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
273EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 278EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
279 .set reorder /* DADDI_WAR */
280 ADD dst, dst, 4*NBYTES
274 beqz len, done 281 beqz len, done
275 ADD dst, dst, 4*NBYTES 282 .set noreorder
276less_than_4units: 283less_than_4units:
277 /* 284 /*
278 * rem = len % NBYTES 285 * rem = len % NBYTES
@@ -284,8 +291,10 @@ EXC( LOAD t0, 0(src), l_exc)
284 ADD src, src, NBYTES 291 ADD src, src, NBYTES
285 SUB len, len, NBYTES 292 SUB len, len, NBYTES
286EXC( STORE t0, 0(dst), s_exc_p1u) 293EXC( STORE t0, 0(dst), s_exc_p1u)
294 .set reorder /* DADDI_WAR */
295 ADD dst, dst, NBYTES
287 bne rem, len, 1b 296 bne rem, len, 1b
288 ADD dst, dst, NBYTES 297 .set noreorder
289 298
290 /* 299 /*
291 * src and dst are aligned, need to copy rem bytes (rem < NBYTES) 300 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
@@ -364,8 +373,10 @@ EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
364EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 373EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
365EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 374EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
366 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) 375 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
376 .set reorder /* DADDI_WAR */
377 ADD dst, dst, 4*NBYTES
367 bne len, rem, 1b 378 bne len, rem, 1b
368 ADD dst, dst, 4*NBYTES 379 .set noreorder
369 380
370cleanup_src_unaligned: 381cleanup_src_unaligned:
371 beqz len, done 382 beqz len, done
@@ -378,8 +389,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
378 ADD src, src, NBYTES 389 ADD src, src, NBYTES
379 SUB len, len, NBYTES 390 SUB len, len, NBYTES
380EXC( STORE t0, 0(dst), s_exc_p1u) 391EXC( STORE t0, 0(dst), s_exc_p1u)
392 .set reorder /* DADDI_WAR */
393 ADD dst, dst, NBYTES
381 bne len, rem, 1b 394 bne len, rem, 1b
382 ADD dst, dst, NBYTES 395 .set noreorder
383 396
384copy_bytes_checklen: 397copy_bytes_checklen:
385 beqz len, done 398 beqz len, done
@@ -427,8 +440,10 @@ l_exc_copy:
427EXC( lb t1, 0(src), l_exc) 440EXC( lb t1, 0(src), l_exc)
428 ADD src, src, 1 441 ADD src, src, 1
429 sb t1, 0(dst) # can't fault -- we're copy_from_user 442 sb t1, 0(dst) # can't fault -- we're copy_from_user
443 .set reorder /* DADDI_WAR */
444 ADD dst, dst, 1
430 bne src, t0, 1b 445 bne src, t0, 1b
431 ADD dst, dst, 1 446 .set noreorder
432l_exc: 447l_exc:
433 LOAD t0, TI_TASK($28) 448 LOAD t0, TI_TASK($28)
434 nop 449 nop
@@ -446,20 +461,33 @@ l_exc:
446 * Clear len bytes starting at dst. Can't call __bzero because it 461 * Clear len bytes starting at dst. Can't call __bzero because it
447 * might modify len. An inefficient loop for these rare times... 462 * might modify len. An inefficient loop for these rare times...
448 */ 463 */
464 .set reorder /* DADDI_WAR */
465 SUB src, len, 1
449 beqz len, done 466 beqz len, done
450 SUB src, len, 1 467 .set noreorder
4511: sb zero, 0(dst) 4681: sb zero, 0(dst)
452 ADD dst, dst, 1 469 ADD dst, dst, 1
470#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
453 bnez src, 1b 471 bnez src, 1b
454 SUB src, src, 1 472 SUB src, src, 1
473#else
474 .set push
475 .set noat
476 li v1, 1
477 bnez src, 1b
478 SUB src, src, v1
479 .set pop
480#endif
455 jr ra 481 jr ra
456 nop 482 nop
457 483
458 484
459#define SEXC(n) \ 485#define SEXC(n) \
460s_exc_p ## n ## u: \ 486 .set reorder; /* DADDI_WAR */ \
461 jr ra; \ 487s_exc_p ## n ## u: \
462 ADD len, len, n*NBYTES 488 ADD len, len, n*NBYTES; \
489 jr ra; \
490 .set noreorder
463 491
464SEXC(8) 492SEXC(8)
465SEXC(7) 493SEXC(7)
@@ -471,8 +499,10 @@ SEXC(2)
471SEXC(1) 499SEXC(1)
472 500
473s_exc_p1: 501s_exc_p1:
502 .set reorder /* DADDI_WAR */
503 ADD len, len, 1
474 jr ra 504 jr ra
475 ADD len, len, 1 505 .set noreorder
476s_exc: 506s_exc:
477 jr ra 507 jr ra
478 nop 508 nop
@@ -502,8 +532,10 @@ r_end_bytes:
502 SUB a2, a2, 0x1 532 SUB a2, a2, 0x1
503 sb t0, -1(a0) 533 sb t0, -1(a0)
504 SUB a1, a1, 0x1 534 SUB a1, a1, 0x1
535 .set reorder /* DADDI_WAR */
536 SUB a0, a0, 0x1
505 bnez a2, r_end_bytes 537 bnez a2, r_end_bytes
506 SUB a0, a0, 0x1 538 .set noreorder
507 539
508r_out: 540r_out:
509 jr ra 541 jr ra
@@ -514,8 +546,10 @@ r_end_bytes_up:
514 SUB a2, a2, 0x1 546 SUB a2, a2, 0x1
515 sb t0, (a0) 547 sb t0, (a0)
516 ADD a1, a1, 0x1 548 ADD a1, a1, 0x1
549 .set reorder /* DADDI_WAR */
550 ADD a0, a0, 0x1
517 bnez a2, r_end_bytes_up 551 bnez a2, r_end_bytes_up
518 ADD a0, a0, 0x1 552 .set noreorder
519 553
520 jr ra 554 jr ra
521 move a2, zero 555 move a2, zero