diff options
Diffstat (limited to 'arch/mips/lib')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 275 | ||||
-rw-r--r-- | arch/mips/lib/memcpy-inatomic.S | 141 | ||||
-rw-r--r-- | arch/mips/lib/memcpy.S | 250 | ||||
-rw-r--r-- | arch/mips/lib/memset.S | 44 | ||||
-rw-r--r-- | arch/mips/lib/strlen_user.S | 6 | ||||
-rw-r--r-- | arch/mips/lib/strncpy_user.S | 15 | ||||
-rw-r--r-- | arch/mips/lib/strnlen_user.S | 7 | ||||
-rw-r--r-- | arch/mips/lib/uncached.c | 12 |
8 files changed, 433 insertions, 317 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index c0a77fe038b..8d7784122c1 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -7,6 +7,7 @@ | |||
7 | * | 7 | * |
8 | * Copyright (C) 1998, 1999 Ralf Baechle | 8 | * Copyright (C) 1998, 1999 Ralf Baechle |
9 | * Copyright (C) 1999 Silicon Graphics, Inc. | 9 | * Copyright (C) 1999 Silicon Graphics, Inc. |
10 | * Copyright (C) 2007 Maciej W. Rozycki | ||
10 | */ | 11 | */ |
11 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
12 | #include <asm/asm.h> | 13 | #include <asm/asm.h> |
@@ -52,9 +53,12 @@ | |||
52 | #define UNIT(unit) ((unit)*NBYTES) | 53 | #define UNIT(unit) ((unit)*NBYTES) |
53 | 54 | ||
54 | #define ADDC(sum,reg) \ | 55 | #define ADDC(sum,reg) \ |
56 | .set push; \ | ||
57 | .set noat; \ | ||
55 | ADD sum, reg; \ | 58 | ADD sum, reg; \ |
56 | sltu v1, sum, reg; \ | 59 | sltu v1, sum, reg; \ |
57 | ADD sum, v1 | 60 | ADD sum, v1; \ |
61 | .set pop | ||
58 | 62 | ||
59 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ | 63 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ |
60 | LOAD _t0, (offset + UNIT(0))(src); \ | 64 | LOAD _t0, (offset + UNIT(0))(src); \ |
@@ -92,13 +96,13 @@ LEAF(csum_partial) | |||
92 | move t7, zero | 96 | move t7, zero |
93 | 97 | ||
94 | sltiu t8, a1, 0x8 | 98 | sltiu t8, a1, 0x8 |
95 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | 99 | bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */ |
96 | move t2, a1 | 100 | move t2, a1 |
97 | 101 | ||
98 | andi t7, src, 0x1 /* odd buffer? */ | 102 | andi t7, src, 0x1 /* odd buffer? */ |
99 | 103 | ||
100 | hword_align: | 104 | .Lhword_align: |
101 | beqz t7, word_align | 105 | beqz t7, .Lword_align |
102 | andi t8, src, 0x2 | 106 | andi t8, src, 0x2 |
103 | 107 | ||
104 | lbu t0, (src) | 108 | lbu t0, (src) |
@@ -110,8 +114,8 @@ hword_align: | |||
110 | PTR_ADDU src, src, 0x1 | 114 | PTR_ADDU src, src, 0x1 |
111 | andi t8, src, 0x2 | 115 | andi t8, src, 0x2 |
112 | 116 | ||
113 | word_align: | 117 | .Lword_align: |
114 | beqz t8, dword_align | 118 | beqz t8, .Ldword_align |
115 | sltiu t8, a1, 56 | 119 | sltiu t8, a1, 56 |
116 | 120 | ||
117 | lhu t0, (src) | 121 | lhu t0, (src) |
@@ -120,12 +124,12 @@ word_align: | |||
120 | sltiu t8, a1, 56 | 124 | sltiu t8, a1, 56 |
121 | PTR_ADDU src, src, 0x2 | 125 | PTR_ADDU src, src, 0x2 |
122 | 126 | ||
123 | dword_align: | 127 | .Ldword_align: |
124 | bnez t8, do_end_words | 128 | bnez t8, .Ldo_end_words |
125 | move t8, a1 | 129 | move t8, a1 |
126 | 130 | ||
127 | andi t8, src, 0x4 | 131 | andi t8, src, 0x4 |
128 | beqz t8, qword_align | 132 | beqz t8, .Lqword_align |
129 | andi t8, src, 0x8 | 133 | andi t8, src, 0x8 |
130 | 134 | ||
131 | lw t0, 0x00(src) | 135 | lw t0, 0x00(src) |
@@ -134,8 +138,8 @@ dword_align: | |||
134 | PTR_ADDU src, src, 0x4 | 138 | PTR_ADDU src, src, 0x4 |
135 | andi t8, src, 0x8 | 139 | andi t8, src, 0x8 |
136 | 140 | ||
137 | qword_align: | 141 | .Lqword_align: |
138 | beqz t8, oword_align | 142 | beqz t8, .Loword_align |
139 | andi t8, src, 0x10 | 143 | andi t8, src, 0x10 |
140 | 144 | ||
141 | #ifdef USE_DOUBLE | 145 | #ifdef USE_DOUBLE |
@@ -152,8 +156,8 @@ qword_align: | |||
152 | PTR_ADDU src, src, 0x8 | 156 | PTR_ADDU src, src, 0x8 |
153 | andi t8, src, 0x10 | 157 | andi t8, src, 0x10 |
154 | 158 | ||
155 | oword_align: | 159 | .Loword_align: |
156 | beqz t8, begin_movement | 160 | beqz t8, .Lbegin_movement |
157 | LONG_SRL t8, a1, 0x7 | 161 | LONG_SRL t8, a1, 0x7 |
158 | 162 | ||
159 | #ifdef USE_DOUBLE | 163 | #ifdef USE_DOUBLE |
@@ -168,51 +172,55 @@ oword_align: | |||
168 | PTR_ADDU src, src, 0x10 | 172 | PTR_ADDU src, src, 0x10 |
169 | LONG_SRL t8, a1, 0x7 | 173 | LONG_SRL t8, a1, 0x7 |
170 | 174 | ||
171 | begin_movement: | 175 | .Lbegin_movement: |
172 | beqz t8, 1f | 176 | beqz t8, 1f |
173 | andi t2, a1, 0x40 | 177 | andi t2, a1, 0x40 |
174 | 178 | ||
175 | move_128bytes: | 179 | .Lmove_128bytes: |
176 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 180 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
177 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | 181 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) |
178 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) | 182 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) |
179 | CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) | 183 | CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) |
180 | LONG_SUBU t8, t8, 0x01 | 184 | LONG_SUBU t8, t8, 0x01 |
181 | bnez t8, move_128bytes | 185 | .set reorder /* DADDI_WAR */ |
182 | PTR_ADDU src, src, 0x80 | 186 | PTR_ADDU src, src, 0x80 |
187 | bnez t8, .Lmove_128bytes | ||
188 | .set noreorder | ||
183 | 189 | ||
184 | 1: | 190 | 1: |
185 | beqz t2, 1f | 191 | beqz t2, 1f |
186 | andi t2, a1, 0x20 | 192 | andi t2, a1, 0x20 |
187 | 193 | ||
188 | move_64bytes: | 194 | .Lmove_64bytes: |
189 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 195 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
190 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | 196 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) |
191 | PTR_ADDU src, src, 0x40 | 197 | PTR_ADDU src, src, 0x40 |
192 | 198 | ||
193 | 1: | 199 | 1: |
194 | beqz t2, do_end_words | 200 | beqz t2, .Ldo_end_words |
195 | andi t8, a1, 0x1c | 201 | andi t8, a1, 0x1c |
196 | 202 | ||
197 | move_32bytes: | 203 | .Lmove_32bytes: |
198 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 204 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
199 | andi t8, a1, 0x1c | 205 | andi t8, a1, 0x1c |
200 | PTR_ADDU src, src, 0x20 | 206 | PTR_ADDU src, src, 0x20 |
201 | 207 | ||
202 | do_end_words: | 208 | .Ldo_end_words: |
203 | beqz t8, small_csumcpy | 209 | beqz t8, .Lsmall_csumcpy |
204 | andi t2, a1, 0x3 | 210 | andi t2, a1, 0x3 |
205 | LONG_SRL t8, t8, 0x2 | 211 | LONG_SRL t8, t8, 0x2 |
206 | 212 | ||
207 | end_words: | 213 | .Lend_words: |
208 | lw t0, (src) | 214 | lw t0, (src) |
209 | LONG_SUBU t8, t8, 0x1 | 215 | LONG_SUBU t8, t8, 0x1 |
210 | ADDC(sum, t0) | 216 | ADDC(sum, t0) |
211 | bnez t8, end_words | 217 | .set reorder /* DADDI_WAR */ |
212 | PTR_ADDU src, src, 0x4 | 218 | PTR_ADDU src, src, 0x4 |
219 | bnez t8, .Lend_words | ||
220 | .set noreorder | ||
213 | 221 | ||
214 | /* unknown src alignment and < 8 bytes to go */ | 222 | /* unknown src alignment and < 8 bytes to go */ |
215 | small_csumcpy: | 223 | .Lsmall_csumcpy: |
216 | move a1, t2 | 224 | move a1, t2 |
217 | 225 | ||
218 | andi t0, a1, 4 | 226 | andi t0, a1, 4 |
@@ -246,6 +254,8 @@ small_csumcpy: | |||
246 | 1: ADDC(sum, t1) | 254 | 1: ADDC(sum, t1) |
247 | 255 | ||
248 | /* fold checksum */ | 256 | /* fold checksum */ |
257 | .set push | ||
258 | .set noat | ||
249 | #ifdef USE_DOUBLE | 259 | #ifdef USE_DOUBLE |
250 | dsll32 v1, sum, 0 | 260 | dsll32 v1, sum, 0 |
251 | daddu sum, v1 | 261 | daddu sum, v1 |
@@ -266,6 +276,7 @@ small_csumcpy: | |||
266 | srl sum, sum, 8 | 276 | srl sum, sum, 8 |
267 | or sum, v1 | 277 | or sum, v1 |
268 | andi sum, 0xffff | 278 | andi sum, 0xffff |
279 | .set pop | ||
269 | 1: | 280 | 1: |
270 | .set reorder | 281 | .set reorder |
271 | /* Add the passed partial csum. */ | 282 | /* Add the passed partial csum. */ |
@@ -373,7 +384,11 @@ small_csumcpy: | |||
373 | 384 | ||
374 | #define ADDRMASK (NBYTES-1) | 385 | #define ADDRMASK (NBYTES-1) |
375 | 386 | ||
387 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
376 | .set noat | 388 | .set noat |
389 | #else | ||
390 | .set at=v1 | ||
391 | #endif | ||
377 | 392 | ||
378 | LEAF(__csum_partial_copy_user) | 393 | LEAF(__csum_partial_copy_user) |
379 | PTR_ADDU AT, src, len /* See (1) above. */ | 394 | PTR_ADDU AT, src, len /* See (1) above. */ |
@@ -398,95 +413,101 @@ FEXPORT(csum_partial_copy_nocheck) | |||
398 | */ | 413 | */ |
399 | sltu t2, len, NBYTES | 414 | sltu t2, len, NBYTES |
400 | and t1, dst, ADDRMASK | 415 | and t1, dst, ADDRMASK |
401 | bnez t2, copy_bytes_checklen | 416 | bnez t2, .Lcopy_bytes_checklen |
402 | and t0, src, ADDRMASK | 417 | and t0, src, ADDRMASK |
403 | andi odd, dst, 0x1 /* odd buffer? */ | 418 | andi odd, dst, 0x1 /* odd buffer? */ |
404 | bnez t1, dst_unaligned | 419 | bnez t1, .Ldst_unaligned |
405 | nop | 420 | nop |
406 | bnez t0, src_unaligned_dst_aligned | 421 | bnez t0, .Lsrc_unaligned_dst_aligned |
407 | /* | 422 | /* |
408 | * use delay slot for fall-through | 423 | * use delay slot for fall-through |
409 | * src and dst are aligned; need to compute rem | 424 | * src and dst are aligned; need to compute rem |
410 | */ | 425 | */ |
411 | both_aligned: | 426 | .Lboth_aligned: |
412 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 427 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
413 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 428 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
414 | nop | 429 | nop |
415 | SUB len, 8*NBYTES # subtract here for bgez loop | 430 | SUB len, 8*NBYTES # subtract here for bgez loop |
416 | .align 4 | 431 | .align 4 |
417 | 1: | 432 | 1: |
418 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 433 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
419 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 434 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
420 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 435 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
421 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 436 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
422 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 437 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
423 | EXC( LOAD t5, UNIT(5)(src), l_exc_copy) | 438 | EXC( LOAD t5, UNIT(5)(src), .Ll_exc_copy) |
424 | EXC( LOAD t6, UNIT(6)(src), l_exc_copy) | 439 | EXC( LOAD t6, UNIT(6)(src), .Ll_exc_copy) |
425 | EXC( LOAD t7, UNIT(7)(src), l_exc_copy) | 440 | EXC( LOAD t7, UNIT(7)(src), .Ll_exc_copy) |
426 | SUB len, len, 8*NBYTES | 441 | SUB len, len, 8*NBYTES |
427 | ADD src, src, 8*NBYTES | 442 | ADD src, src, 8*NBYTES |
428 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 443 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
429 | ADDC(sum, t0) | 444 | ADDC(sum, t0) |
430 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 445 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
431 | ADDC(sum, t1) | 446 | ADDC(sum, t1) |
432 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 447 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
433 | ADDC(sum, t2) | 448 | ADDC(sum, t2) |
434 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 449 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
435 | ADDC(sum, t3) | 450 | ADDC(sum, t3) |
436 | EXC( STORE t4, UNIT(4)(dst), s_exc) | 451 | EXC( STORE t4, UNIT(4)(dst), .Ls_exc) |
437 | ADDC(sum, t4) | 452 | ADDC(sum, t4) |
438 | EXC( STORE t5, UNIT(5)(dst), s_exc) | 453 | EXC( STORE t5, UNIT(5)(dst), .Ls_exc) |
439 | ADDC(sum, t5) | 454 | ADDC(sum, t5) |
440 | EXC( STORE t6, UNIT(6)(dst), s_exc) | 455 | EXC( STORE t6, UNIT(6)(dst), .Ls_exc) |
441 | ADDC(sum, t6) | 456 | ADDC(sum, t6) |
442 | EXC( STORE t7, UNIT(7)(dst), s_exc) | 457 | EXC( STORE t7, UNIT(7)(dst), .Ls_exc) |
443 | ADDC(sum, t7) | 458 | ADDC(sum, t7) |
459 | .set reorder /* DADDI_WAR */ | ||
460 | ADD dst, dst, 8*NBYTES | ||
444 | bgez len, 1b | 461 | bgez len, 1b |
445 | ADD dst, dst, 8*NBYTES | 462 | .set noreorder |
446 | ADD len, 8*NBYTES # revert len (see above) | 463 | ADD len, 8*NBYTES # revert len (see above) |
447 | 464 | ||
448 | /* | 465 | /* |
449 | * len == the number of bytes left to copy < 8*NBYTES | 466 | * len == the number of bytes left to copy < 8*NBYTES |
450 | */ | 467 | */ |
451 | cleanup_both_aligned: | 468 | .Lcleanup_both_aligned: |
452 | #define rem t7 | 469 | #define rem t7 |
453 | beqz len, done | 470 | beqz len, .Ldone |
454 | sltu t0, len, 4*NBYTES | 471 | sltu t0, len, 4*NBYTES |
455 | bnez t0, less_than_4units | 472 | bnez t0, .Lless_than_4units |
456 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 473 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
457 | /* | 474 | /* |
458 | * len >= 4*NBYTES | 475 | * len >= 4*NBYTES |
459 | */ | 476 | */ |
460 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 477 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
461 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 478 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
462 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 479 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
463 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 480 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
464 | SUB len, len, 4*NBYTES | 481 | SUB len, len, 4*NBYTES |
465 | ADD src, src, 4*NBYTES | 482 | ADD src, src, 4*NBYTES |
466 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 483 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
467 | ADDC(sum, t0) | 484 | ADDC(sum, t0) |
468 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 485 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
469 | ADDC(sum, t1) | 486 | ADDC(sum, t1) |
470 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 487 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
471 | ADDC(sum, t2) | 488 | ADDC(sum, t2) |
472 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 489 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
473 | ADDC(sum, t3) | 490 | ADDC(sum, t3) |
474 | beqz len, done | 491 | .set reorder /* DADDI_WAR */ |
475 | ADD dst, dst, 4*NBYTES | 492 | ADD dst, dst, 4*NBYTES |
476 | less_than_4units: | 493 | beqz len, .Ldone |
494 | .set noreorder | ||
495 | .Lless_than_4units: | ||
477 | /* | 496 | /* |
478 | * rem = len % NBYTES | 497 | * rem = len % NBYTES |
479 | */ | 498 | */ |
480 | beq rem, len, copy_bytes | 499 | beq rem, len, .Lcopy_bytes |
481 | nop | 500 | nop |
482 | 1: | 501 | 1: |
483 | EXC( LOAD t0, 0(src), l_exc) | 502 | EXC( LOAD t0, 0(src), .Ll_exc) |
484 | ADD src, src, NBYTES | 503 | ADD src, src, NBYTES |
485 | SUB len, len, NBYTES | 504 | SUB len, len, NBYTES |
486 | EXC( STORE t0, 0(dst), s_exc) | 505 | EXC( STORE t0, 0(dst), .Ls_exc) |
487 | ADDC(sum, t0) | 506 | ADDC(sum, t0) |
507 | .set reorder /* DADDI_WAR */ | ||
508 | ADD dst, dst, NBYTES | ||
488 | bne rem, len, 1b | 509 | bne rem, len, 1b |
489 | ADD dst, dst, NBYTES | 510 | .set noreorder |
490 | 511 | ||
491 | /* | 512 | /* |
492 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) | 513 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) |
@@ -500,20 +521,20 @@ EXC( STORE t0, 0(dst), s_exc) | |||
500 | * more instruction-level parallelism. | 521 | * more instruction-level parallelism. |
501 | */ | 522 | */ |
502 | #define bits t2 | 523 | #define bits t2 |
503 | beqz len, done | 524 | beqz len, .Ldone |
504 | ADD t1, dst, len # t1 is just past last byte of dst | 525 | ADD t1, dst, len # t1 is just past last byte of dst |
505 | li bits, 8*NBYTES | 526 | li bits, 8*NBYTES |
506 | SLL rem, len, 3 # rem = number of bits to keep | 527 | SLL rem, len, 3 # rem = number of bits to keep |
507 | EXC( LOAD t0, 0(src), l_exc) | 528 | EXC( LOAD t0, 0(src), .Ll_exc) |
508 | SUB bits, bits, rem # bits = number of bits to discard | 529 | SUB bits, bits, rem # bits = number of bits to discard |
509 | SHIFT_DISCARD t0, t0, bits | 530 | SHIFT_DISCARD t0, t0, bits |
510 | EXC( STREST t0, -1(t1), s_exc) | 531 | EXC( STREST t0, -1(t1), .Ls_exc) |
511 | SHIFT_DISCARD_REVERT t0, t0, bits | 532 | SHIFT_DISCARD_REVERT t0, t0, bits |
512 | .set reorder | 533 | .set reorder |
513 | ADDC(sum, t0) | 534 | ADDC(sum, t0) |
514 | b done | 535 | b .Ldone |
515 | .set noreorder | 536 | .set noreorder |
516 | dst_unaligned: | 537 | .Ldst_unaligned: |
517 | /* | 538 | /* |
518 | * dst is unaligned | 539 | * dst is unaligned |
519 | * t0 = src & ADDRMASK | 540 | * t0 = src & ADDRMASK |
@@ -524,25 +545,25 @@ dst_unaligned: | |||
524 | * Set match = (src and dst have same alignment) | 545 | * Set match = (src and dst have same alignment) |
525 | */ | 546 | */ |
526 | #define match rem | 547 | #define match rem |
527 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 548 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
528 | ADD t2, zero, NBYTES | 549 | ADD t2, zero, NBYTES |
529 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 550 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
530 | SUB t2, t2, t1 # t2 = number of bytes copied | 551 | SUB t2, t2, t1 # t2 = number of bytes copied |
531 | xor match, t0, t1 | 552 | xor match, t0, t1 |
532 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) | 553 | EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) |
533 | SLL t4, t1, 3 # t4 = number of bits to discard | 554 | SLL t4, t1, 3 # t4 = number of bits to discard |
534 | SHIFT_DISCARD t3, t3, t4 | 555 | SHIFT_DISCARD t3, t3, t4 |
535 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ | 556 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ |
536 | ADDC(sum, t3) | 557 | ADDC(sum, t3) |
537 | beq len, t2, done | 558 | beq len, t2, .Ldone |
538 | SUB len, len, t2 | 559 | SUB len, len, t2 |
539 | ADD dst, dst, t2 | 560 | ADD dst, dst, t2 |
540 | beqz match, both_aligned | 561 | beqz match, .Lboth_aligned |
541 | ADD src, src, t2 | 562 | ADD src, src, t2 |
542 | 563 | ||
543 | src_unaligned_dst_aligned: | 564 | .Lsrc_unaligned_dst_aligned: |
544 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 565 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
545 | beqz t0, cleanup_src_unaligned | 566 | beqz t0, .Lcleanup_src_unaligned |
546 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 567 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
547 | 1: | 568 | 1: |
548 | /* | 569 | /* |
@@ -551,49 +572,53 @@ src_unaligned_dst_aligned: | |||
551 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 572 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
552 | * are to the same unit (unless src is aligned, but it's not). | 573 | * are to the same unit (unless src is aligned, but it's not). |
553 | */ | 574 | */ |
554 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 575 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
555 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 576 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
556 | SUB len, len, 4*NBYTES | 577 | SUB len, len, 4*NBYTES |
557 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 578 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
558 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 579 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
559 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 580 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
560 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 581 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
561 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 582 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
562 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 583 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
563 | ADD src, src, 4*NBYTES | 584 | ADD src, src, 4*NBYTES |
564 | #ifdef CONFIG_CPU_SB1 | 585 | #ifdef CONFIG_CPU_SB1 |
565 | nop # improves slotting | 586 | nop # improves slotting |
566 | #endif | 587 | #endif |
567 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 588 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
568 | ADDC(sum, t0) | 589 | ADDC(sum, t0) |
569 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 590 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
570 | ADDC(sum, t1) | 591 | ADDC(sum, t1) |
571 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 592 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
572 | ADDC(sum, t2) | 593 | ADDC(sum, t2) |
573 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 594 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
574 | ADDC(sum, t3) | 595 | ADDC(sum, t3) |
596 | .set reorder /* DADDI_WAR */ | ||
597 | ADD dst, dst, 4*NBYTES | ||
575 | bne len, rem, 1b | 598 | bne len, rem, 1b |
576 | ADD dst, dst, 4*NBYTES | 599 | .set noreorder |
577 | 600 | ||
578 | cleanup_src_unaligned: | 601 | .Lcleanup_src_unaligned: |
579 | beqz len, done | 602 | beqz len, .Ldone |
580 | and rem, len, NBYTES-1 # rem = len % NBYTES | 603 | and rem, len, NBYTES-1 # rem = len % NBYTES |
581 | beq rem, len, copy_bytes | 604 | beq rem, len, .Lcopy_bytes |
582 | nop | 605 | nop |
583 | 1: | 606 | 1: |
584 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 607 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
585 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 608 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
586 | ADD src, src, NBYTES | 609 | ADD src, src, NBYTES |
587 | SUB len, len, NBYTES | 610 | SUB len, len, NBYTES |
588 | EXC( STORE t0, 0(dst), s_exc) | 611 | EXC( STORE t0, 0(dst), .Ls_exc) |
589 | ADDC(sum, t0) | 612 | ADDC(sum, t0) |
613 | .set reorder /* DADDI_WAR */ | ||
614 | ADD dst, dst, NBYTES | ||
590 | bne len, rem, 1b | 615 | bne len, rem, 1b |
591 | ADD dst, dst, NBYTES | 616 | .set noreorder |
592 | 617 | ||
593 | copy_bytes_checklen: | 618 | .Lcopy_bytes_checklen: |
594 | beqz len, done | 619 | beqz len, .Ldone |
595 | nop | 620 | nop |
596 | copy_bytes: | 621 | .Lcopy_bytes: |
597 | /* 0 < len < NBYTES */ | 622 | /* 0 < len < NBYTES */ |
598 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | 623 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
599 | #define SHIFT_START 0 | 624 | #define SHIFT_START 0 |
@@ -604,14 +629,14 @@ copy_bytes: | |||
604 | #endif | 629 | #endif |
605 | move t2, zero # partial word | 630 | move t2, zero # partial word |
606 | li t3, SHIFT_START # shift | 631 | li t3, SHIFT_START # shift |
607 | /* use l_exc_copy here to return correct sum on fault */ | 632 | /* use .Ll_exc_copy here to return correct sum on fault */ |
608 | #define COPY_BYTE(N) \ | 633 | #define COPY_BYTE(N) \ |
609 | EXC( lbu t0, N(src), l_exc_copy); \ | 634 | EXC( lbu t0, N(src), .Ll_exc_copy); \ |
610 | SUB len, len, 1; \ | 635 | SUB len, len, 1; \ |
611 | EXC( sb t0, N(dst), s_exc); \ | 636 | EXC( sb t0, N(dst), .Ls_exc); \ |
612 | SLLV t0, t0, t3; \ | 637 | SLLV t0, t0, t3; \ |
613 | addu t3, SHIFT_INC; \ | 638 | addu t3, SHIFT_INC; \ |
614 | beqz len, copy_bytes_done; \ | 639 | beqz len, .Lcopy_bytes_done; \ |
615 | or t2, t0 | 640 | or t2, t0 |
616 | 641 | ||
617 | COPY_BYTE(0) | 642 | COPY_BYTE(0) |
@@ -622,15 +647,17 @@ EXC( sb t0, N(dst), s_exc); \ | |||
622 | COPY_BYTE(4) | 647 | COPY_BYTE(4) |
623 | COPY_BYTE(5) | 648 | COPY_BYTE(5) |
624 | #endif | 649 | #endif |
625 | EXC( lbu t0, NBYTES-2(src), l_exc_copy) | 650 | EXC( lbu t0, NBYTES-2(src), .Ll_exc_copy) |
626 | SUB len, len, 1 | 651 | SUB len, len, 1 |
627 | EXC( sb t0, NBYTES-2(dst), s_exc) | 652 | EXC( sb t0, NBYTES-2(dst), .Ls_exc) |
628 | SLLV t0, t0, t3 | 653 | SLLV t0, t0, t3 |
629 | or t2, t0 | 654 | or t2, t0 |
630 | copy_bytes_done: | 655 | .Lcopy_bytes_done: |
631 | ADDC(sum, t2) | 656 | ADDC(sum, t2) |
632 | done: | 657 | .Ldone: |
633 | /* fold checksum */ | 658 | /* fold checksum */ |
659 | .set push | ||
660 | .set noat | ||
634 | #ifdef USE_DOUBLE | 661 | #ifdef USE_DOUBLE |
635 | dsll32 v1, sum, 0 | 662 | dsll32 v1, sum, 0 |
636 | daddu sum, v1 | 663 | daddu sum, v1 |
@@ -651,13 +678,14 @@ done: | |||
651 | srl sum, sum, 8 | 678 | srl sum, sum, 8 |
652 | or sum, v1 | 679 | or sum, v1 |
653 | andi sum, 0xffff | 680 | andi sum, 0xffff |
681 | .set pop | ||
654 | 1: | 682 | 1: |
655 | .set reorder | 683 | .set reorder |
656 | ADDC(sum, psum) | 684 | ADDC(sum, psum) |
657 | jr ra | 685 | jr ra |
658 | .set noreorder | 686 | .set noreorder |
659 | 687 | ||
660 | l_exc_copy: | 688 | .Ll_exc_copy: |
661 | /* | 689 | /* |
662 | * Copy bytes from src until faulting load address (or until a | 690 | * Copy bytes from src until faulting load address (or until a |
663 | * lb faults) | 691 | * lb faults) |
@@ -672,15 +700,17 @@ l_exc_copy: | |||
672 | li t2, SHIFT_START | 700 | li t2, SHIFT_START |
673 | LOAD t0, THREAD_BUADDR(t0) | 701 | LOAD t0, THREAD_BUADDR(t0) |
674 | 1: | 702 | 1: |
675 | EXC( lbu t1, 0(src), l_exc) | 703 | EXC( lbu t1, 0(src), .Ll_exc) |
676 | ADD src, src, 1 | 704 | ADD src, src, 1 |
677 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 705 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
678 | SLLV t1, t1, t2 | 706 | SLLV t1, t1, t2 |
679 | addu t2, SHIFT_INC | 707 | addu t2, SHIFT_INC |
680 | ADDC(sum, t1) | 708 | ADDC(sum, t1) |
709 | .set reorder /* DADDI_WAR */ | ||
710 | ADD dst, dst, 1 | ||
681 | bne src, t0, 1b | 711 | bne src, t0, 1b |
682 | ADD dst, dst, 1 | 712 | .set noreorder |
683 | l_exc: | 713 | .Ll_exc: |
684 | LOAD t0, TI_TASK($28) | 714 | LOAD t0, TI_TASK($28) |
685 | nop | 715 | nop |
686 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 716 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
@@ -697,19 +727,30 @@ l_exc: | |||
697 | * Clear len bytes starting at dst. Can't call __bzero because it | 727 | * Clear len bytes starting at dst. Can't call __bzero because it |
698 | * might modify len. An inefficient loop for these rare times... | 728 | * might modify len. An inefficient loop for these rare times... |
699 | */ | 729 | */ |
700 | beqz len, done | 730 | .set reorder /* DADDI_WAR */ |
701 | SUB src, len, 1 | 731 | SUB src, len, 1 |
732 | beqz len, .Ldone | ||
733 | .set noreorder | ||
702 | 1: sb zero, 0(dst) | 734 | 1: sb zero, 0(dst) |
703 | ADD dst, dst, 1 | 735 | ADD dst, dst, 1 |
736 | .set push | ||
737 | .set noat | ||
738 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
704 | bnez src, 1b | 739 | bnez src, 1b |
705 | SUB src, src, 1 | 740 | SUB src, src, 1 |
741 | #else | ||
742 | li v1, 1 | ||
743 | bnez src, 1b | ||
744 | SUB src, src, v1 | ||
745 | #endif | ||
706 | li v1, -EFAULT | 746 | li v1, -EFAULT |
707 | b done | 747 | b .Ldone |
708 | sw v1, (errptr) | 748 | sw v1, (errptr) |
709 | 749 | ||
710 | s_exc: | 750 | .Ls_exc: |
711 | li v0, -1 /* invalid checksum */ | 751 | li v0, -1 /* invalid checksum */ |
712 | li v1, -EFAULT | 752 | li v1, -EFAULT |
713 | jr ra | 753 | jr ra |
714 | sw v1, (errptr) | 754 | sw v1, (errptr) |
755 | .set pop | ||
715 | END(__csum_partial_copy_user) | 756 | END(__csum_partial_copy_user) |
diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S index 3a534b2baa0..736d0fb56a9 100644 --- a/arch/mips/lib/memcpy-inatomic.S +++ b/arch/mips/lib/memcpy-inatomic.S | |||
@@ -9,6 +9,7 @@ | |||
9 | * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. | 9 | * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. |
10 | * Copyright (C) 2002 Broadcom, Inc. | 10 | * Copyright (C) 2002 Broadcom, Inc. |
11 | * memcpy/copy_user author: Mark Vandevoorde | 11 | * memcpy/copy_user author: Mark Vandevoorde |
12 | * Copyright (C) 2007 Maciej W. Rozycki | ||
12 | * | 13 | * |
13 | * Mnemonic names for arguments to memcpy/__copy_user | 14 | * Mnemonic names for arguments to memcpy/__copy_user |
14 | */ | 15 | */ |
@@ -175,7 +176,11 @@ | |||
175 | 176 | ||
176 | .text | 177 | .text |
177 | .set noreorder | 178 | .set noreorder |
179 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
178 | .set noat | 180 | .set noat |
181 | #else | ||
182 | .set at=v1 | ||
183 | #endif | ||
179 | 184 | ||
180 | /* | 185 | /* |
181 | * A combined memcpy/__copy_user | 186 | * A combined memcpy/__copy_user |
@@ -204,36 +209,36 @@ LEAF(__copy_user_inatomic) | |||
204 | and t1, dst, ADDRMASK | 209 | and t1, dst, ADDRMASK |
205 | PREF( 0, 1*32(src) ) | 210 | PREF( 0, 1*32(src) ) |
206 | PREF( 1, 1*32(dst) ) | 211 | PREF( 1, 1*32(dst) ) |
207 | bnez t2, copy_bytes_checklen | 212 | bnez t2, .Lcopy_bytes_checklen |
208 | and t0, src, ADDRMASK | 213 | and t0, src, ADDRMASK |
209 | PREF( 0, 2*32(src) ) | 214 | PREF( 0, 2*32(src) ) |
210 | PREF( 1, 2*32(dst) ) | 215 | PREF( 1, 2*32(dst) ) |
211 | bnez t1, dst_unaligned | 216 | bnez t1, .Ldst_unaligned |
212 | nop | 217 | nop |
213 | bnez t0, src_unaligned_dst_aligned | 218 | bnez t0, .Lsrc_unaligned_dst_aligned |
214 | /* | 219 | /* |
215 | * use delay slot for fall-through | 220 | * use delay slot for fall-through |
216 | * src and dst are aligned; need to compute rem | 221 | * src and dst are aligned; need to compute rem |
217 | */ | 222 | */ |
218 | both_aligned: | 223 | .Lboth_aligned: |
219 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 224 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
220 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 225 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
221 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 226 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
222 | PREF( 0, 3*32(src) ) | 227 | PREF( 0, 3*32(src) ) |
223 | PREF( 1, 3*32(dst) ) | 228 | PREF( 1, 3*32(dst) ) |
224 | .align 4 | 229 | .align 4 |
225 | 1: | 230 | 1: |
226 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 231 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
227 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 232 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
228 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 233 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
229 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 234 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
230 | SUB len, len, 8*NBYTES | 235 | SUB len, len, 8*NBYTES |
231 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 236 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
232 | EXC( LOAD t7, UNIT(5)(src), l_exc_copy) | 237 | EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) |
233 | STORE t0, UNIT(0)(dst) | 238 | STORE t0, UNIT(0)(dst) |
234 | STORE t1, UNIT(1)(dst) | 239 | STORE t1, UNIT(1)(dst) |
235 | EXC( LOAD t0, UNIT(6)(src), l_exc_copy) | 240 | EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) |
236 | EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | 241 | EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) |
237 | ADD src, src, 8*NBYTES | 242 | ADD src, src, 8*NBYTES |
238 | ADD dst, dst, 8*NBYTES | 243 | ADD dst, dst, 8*NBYTES |
239 | STORE t2, UNIT(-6)(dst) | 244 | STORE t2, UNIT(-6)(dst) |
@@ -250,39 +255,43 @@ EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | |||
250 | /* | 255 | /* |
251 | * len == rem == the number of bytes left to copy < 8*NBYTES | 256 | * len == rem == the number of bytes left to copy < 8*NBYTES |
252 | */ | 257 | */ |
253 | cleanup_both_aligned: | 258 | .Lcleanup_both_aligned: |
254 | beqz len, done | 259 | beqz len, .Ldone |
255 | sltu t0, len, 4*NBYTES | 260 | sltu t0, len, 4*NBYTES |
256 | bnez t0, less_than_4units | 261 | bnez t0, .Lless_than_4units |
257 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 262 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
258 | /* | 263 | /* |
259 | * len >= 4*NBYTES | 264 | * len >= 4*NBYTES |
260 | */ | 265 | */ |
261 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 266 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
262 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 267 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
263 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 268 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
264 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 269 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
265 | SUB len, len, 4*NBYTES | 270 | SUB len, len, 4*NBYTES |
266 | ADD src, src, 4*NBYTES | 271 | ADD src, src, 4*NBYTES |
267 | STORE t0, UNIT(0)(dst) | 272 | STORE t0, UNIT(0)(dst) |
268 | STORE t1, UNIT(1)(dst) | 273 | STORE t1, UNIT(1)(dst) |
269 | STORE t2, UNIT(2)(dst) | 274 | STORE t2, UNIT(2)(dst) |
270 | STORE t3, UNIT(3)(dst) | 275 | STORE t3, UNIT(3)(dst) |
271 | beqz len, done | 276 | .set reorder /* DADDI_WAR */ |
272 | ADD dst, dst, 4*NBYTES | 277 | ADD dst, dst, 4*NBYTES |
273 | less_than_4units: | 278 | beqz len, .Ldone |
279 | .set noreorder | ||
280 | .Lless_than_4units: | ||
274 | /* | 281 | /* |
275 | * rem = len % NBYTES | 282 | * rem = len % NBYTES |
276 | */ | 283 | */ |
277 | beq rem, len, copy_bytes | 284 | beq rem, len, .Lcopy_bytes |
278 | nop | 285 | nop |
279 | 1: | 286 | 1: |
280 | EXC( LOAD t0, 0(src), l_exc) | 287 | EXC( LOAD t0, 0(src), .Ll_exc) |
281 | ADD src, src, NBYTES | 288 | ADD src, src, NBYTES |
282 | SUB len, len, NBYTES | 289 | SUB len, len, NBYTES |
283 | STORE t0, 0(dst) | 290 | STORE t0, 0(dst) |
291 | .set reorder /* DADDI_WAR */ | ||
292 | ADD dst, dst, NBYTES | ||
284 | bne rem, len, 1b | 293 | bne rem, len, 1b |
285 | ADD dst, dst, NBYTES | 294 | .set noreorder |
286 | 295 | ||
287 | /* | 296 | /* |
288 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) | 297 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) |
@@ -296,17 +305,17 @@ EXC( LOAD t0, 0(src), l_exc) | |||
296 | * more instruction-level parallelism. | 305 | * more instruction-level parallelism. |
297 | */ | 306 | */ |
298 | #define bits t2 | 307 | #define bits t2 |
299 | beqz len, done | 308 | beqz len, .Ldone |
300 | ADD t1, dst, len # t1 is just past last byte of dst | 309 | ADD t1, dst, len # t1 is just past last byte of dst |
301 | li bits, 8*NBYTES | 310 | li bits, 8*NBYTES |
302 | SLL rem, len, 3 # rem = number of bits to keep | 311 | SLL rem, len, 3 # rem = number of bits to keep |
303 | EXC( LOAD t0, 0(src), l_exc) | 312 | EXC( LOAD t0, 0(src), .Ll_exc) |
304 | SUB bits, bits, rem # bits = number of bits to discard | 313 | SUB bits, bits, rem # bits = number of bits to discard |
305 | SHIFT_DISCARD t0, t0, bits | 314 | SHIFT_DISCARD t0, t0, bits |
306 | STREST t0, -1(t1) | 315 | STREST t0, -1(t1) |
307 | jr ra | 316 | jr ra |
308 | move len, zero | 317 | move len, zero |
309 | dst_unaligned: | 318 | .Ldst_unaligned: |
310 | /* | 319 | /* |
311 | * dst is unaligned | 320 | * dst is unaligned |
312 | * t0 = src & ADDRMASK | 321 | * t0 = src & ADDRMASK |
@@ -317,22 +326,22 @@ dst_unaligned: | |||
317 | * Set match = (src and dst have same alignment) | 326 | * Set match = (src and dst have same alignment) |
318 | */ | 327 | */ |
319 | #define match rem | 328 | #define match rem |
320 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 329 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
321 | ADD t2, zero, NBYTES | 330 | ADD t2, zero, NBYTES |
322 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 331 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
323 | SUB t2, t2, t1 # t2 = number of bytes copied | 332 | SUB t2, t2, t1 # t2 = number of bytes copied |
324 | xor match, t0, t1 | 333 | xor match, t0, t1 |
325 | STFIRST t3, FIRST(0)(dst) | 334 | STFIRST t3, FIRST(0)(dst) |
326 | beq len, t2, done | 335 | beq len, t2, .Ldone |
327 | SUB len, len, t2 | 336 | SUB len, len, t2 |
328 | ADD dst, dst, t2 | 337 | ADD dst, dst, t2 |
329 | beqz match, both_aligned | 338 | beqz match, .Lboth_aligned |
330 | ADD src, src, t2 | 339 | ADD src, src, t2 |
331 | 340 | ||
332 | src_unaligned_dst_aligned: | 341 | .Lsrc_unaligned_dst_aligned: |
333 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 342 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
334 | PREF( 0, 3*32(src) ) | 343 | PREF( 0, 3*32(src) ) |
335 | beqz t0, cleanup_src_unaligned | 344 | beqz t0, .Lcleanup_src_unaligned |
336 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 345 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
337 | PREF( 1, 3*32(dst) ) | 346 | PREF( 1, 3*32(dst) ) |
338 | 1: | 347 | 1: |
@@ -342,15 +351,15 @@ src_unaligned_dst_aligned: | |||
342 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 351 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
343 | * are to the same unit (unless src is aligned, but it's not). | 352 | * are to the same unit (unless src is aligned, but it's not). |
344 | */ | 353 | */ |
345 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 354 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
346 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 355 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
347 | SUB len, len, 4*NBYTES | 356 | SUB len, len, 4*NBYTES |
348 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 357 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
349 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 358 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
350 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 359 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
351 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 360 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
352 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 361 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
353 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 362 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
354 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 363 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
355 | ADD src, src, 4*NBYTES | 364 | ADD src, src, 4*NBYTES |
356 | #ifdef CONFIG_CPU_SB1 | 365 | #ifdef CONFIG_CPU_SB1 |
@@ -361,32 +370,36 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy) | |||
361 | STORE t2, UNIT(2)(dst) | 370 | STORE t2, UNIT(2)(dst) |
362 | STORE t3, UNIT(3)(dst) | 371 | STORE t3, UNIT(3)(dst) |
363 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 372 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
373 | .set reorder /* DADDI_WAR */ | ||
374 | ADD dst, dst, 4*NBYTES | ||
364 | bne len, rem, 1b | 375 | bne len, rem, 1b |
365 | ADD dst, dst, 4*NBYTES | 376 | .set noreorder |
366 | 377 | ||
367 | cleanup_src_unaligned: | 378 | .Lcleanup_src_unaligned: |
368 | beqz len, done | 379 | beqz len, .Ldone |
369 | and rem, len, NBYTES-1 # rem = len % NBYTES | 380 | and rem, len, NBYTES-1 # rem = len % NBYTES |
370 | beq rem, len, copy_bytes | 381 | beq rem, len, .Lcopy_bytes |
371 | nop | 382 | nop |
372 | 1: | 383 | 1: |
373 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 384 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
374 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 385 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
375 | ADD src, src, NBYTES | 386 | ADD src, src, NBYTES |
376 | SUB len, len, NBYTES | 387 | SUB len, len, NBYTES |
377 | STORE t0, 0(dst) | 388 | STORE t0, 0(dst) |
389 | .set reorder /* DADDI_WAR */ | ||
390 | ADD dst, dst, NBYTES | ||
378 | bne len, rem, 1b | 391 | bne len, rem, 1b |
379 | ADD dst, dst, NBYTES | 392 | .set noreorder |
380 | 393 | ||
381 | copy_bytes_checklen: | 394 | .Lcopy_bytes_checklen: |
382 | beqz len, done | 395 | beqz len, .Ldone |
383 | nop | 396 | nop |
384 | copy_bytes: | 397 | .Lcopy_bytes: |
385 | /* 0 < len < NBYTES */ | 398 | /* 0 < len < NBYTES */ |
386 | #define COPY_BYTE(N) \ | 399 | #define COPY_BYTE(N) \ |
387 | EXC( lb t0, N(src), l_exc); \ | 400 | EXC( lb t0, N(src), .Ll_exc); \ |
388 | SUB len, len, 1; \ | 401 | SUB len, len, 1; \ |
389 | beqz len, done; \ | 402 | beqz len, .Ldone; \ |
390 | sb t0, N(dst) | 403 | sb t0, N(dst) |
391 | 404 | ||
392 | COPY_BYTE(0) | 405 | COPY_BYTE(0) |
@@ -397,16 +410,16 @@ EXC( lb t0, N(src), l_exc); \ | |||
397 | COPY_BYTE(4) | 410 | COPY_BYTE(4) |
398 | COPY_BYTE(5) | 411 | COPY_BYTE(5) |
399 | #endif | 412 | #endif |
400 | EXC( lb t0, NBYTES-2(src), l_exc) | 413 | EXC( lb t0, NBYTES-2(src), .Ll_exc) |
401 | SUB len, len, 1 | 414 | SUB len, len, 1 |
402 | jr ra | 415 | jr ra |
403 | sb t0, NBYTES-2(dst) | 416 | sb t0, NBYTES-2(dst) |
404 | done: | 417 | .Ldone: |
405 | jr ra | 418 | jr ra |
406 | nop | 419 | nop |
407 | END(__copy_user_inatomic) | 420 | END(__copy_user_inatomic) |
408 | 421 | ||
409 | l_exc_copy: | 422 | .Ll_exc_copy: |
410 | /* | 423 | /* |
411 | * Copy bytes from src until faulting load address (or until a | 424 | * Copy bytes from src until faulting load address (or until a |
412 | * lb faults) | 425 | * lb faults) |
@@ -421,12 +434,14 @@ l_exc_copy: | |||
421 | nop | 434 | nop |
422 | LOAD t0, THREAD_BUADDR(t0) | 435 | LOAD t0, THREAD_BUADDR(t0) |
423 | 1: | 436 | 1: |
424 | EXC( lb t1, 0(src), l_exc) | 437 | EXC( lb t1, 0(src), .Ll_exc) |
425 | ADD src, src, 1 | 438 | ADD src, src, 1 |
426 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 439 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
440 | .set reorder /* DADDI_WAR */ | ||
441 | ADD dst, dst, 1 | ||
427 | bne src, t0, 1b | 442 | bne src, t0, 1b |
428 | ADD dst, dst, 1 | 443 | .set noreorder |
429 | l_exc: | 444 | .Ll_exc: |
430 | LOAD t0, TI_TASK($28) | 445 | LOAD t0, TI_TASK($28) |
431 | nop | 446 | nop |
432 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 447 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index a526c62cb76..c06cccf60be 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -9,6 +9,7 @@ | |||
9 | * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. | 9 | * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. |
10 | * Copyright (C) 2002 Broadcom, Inc. | 10 | * Copyright (C) 2002 Broadcom, Inc. |
11 | * memcpy/copy_user author: Mark Vandevoorde | 11 | * memcpy/copy_user author: Mark Vandevoorde |
12 | * Copyright (C) 2007 Maciej W. Rozycki | ||
12 | * | 13 | * |
13 | * Mnemonic names for arguments to memcpy/__copy_user | 14 | * Mnemonic names for arguments to memcpy/__copy_user |
14 | */ | 15 | */ |
@@ -175,7 +176,11 @@ | |||
175 | 176 | ||
176 | .text | 177 | .text |
177 | .set noreorder | 178 | .set noreorder |
179 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
178 | .set noat | 180 | .set noat |
181 | #else | ||
182 | .set at=v1 | ||
183 | #endif | ||
179 | 184 | ||
180 | /* | 185 | /* |
181 | * A combined memcpy/__copy_user | 186 | * A combined memcpy/__copy_user |
@@ -186,7 +191,7 @@ | |||
186 | .align 5 | 191 | .align 5 |
187 | LEAF(memcpy) /* a0=dst a1=src a2=len */ | 192 | LEAF(memcpy) /* a0=dst a1=src a2=len */ |
188 | move v0, dst /* return value */ | 193 | move v0, dst /* return value */ |
189 | __memcpy: | 194 | .L__memcpy: |
190 | FEXPORT(__copy_user) | 195 | FEXPORT(__copy_user) |
191 | /* | 196 | /* |
192 | * Note: dst & src may be unaligned, len may be 0 | 197 | * Note: dst & src may be unaligned, len may be 0 |
@@ -194,6 +199,7 @@ FEXPORT(__copy_user) | |||
194 | */ | 199 | */ |
195 | #define rem t8 | 200 | #define rem t8 |
196 | 201 | ||
202 | R10KCBARRIER(0(ra)) | ||
197 | /* | 203 | /* |
198 | * The "issue break"s below are very approximate. | 204 | * The "issue break"s below are very approximate. |
199 | * Issue delays for dcache fills will perturb the schedule, as will | 205 | * Issue delays for dcache fills will perturb the schedule, as will |
@@ -207,44 +213,45 @@ FEXPORT(__copy_user) | |||
207 | and t1, dst, ADDRMASK | 213 | and t1, dst, ADDRMASK |
208 | PREF( 0, 1*32(src) ) | 214 | PREF( 0, 1*32(src) ) |
209 | PREF( 1, 1*32(dst) ) | 215 | PREF( 1, 1*32(dst) ) |
210 | bnez t2, copy_bytes_checklen | 216 | bnez t2, .Lcopy_bytes_checklen |
211 | and t0, src, ADDRMASK | 217 | and t0, src, ADDRMASK |
212 | PREF( 0, 2*32(src) ) | 218 | PREF( 0, 2*32(src) ) |
213 | PREF( 1, 2*32(dst) ) | 219 | PREF( 1, 2*32(dst) ) |
214 | bnez t1, dst_unaligned | 220 | bnez t1, .Ldst_unaligned |
215 | nop | 221 | nop |
216 | bnez t0, src_unaligned_dst_aligned | 222 | bnez t0, .Lsrc_unaligned_dst_aligned |
217 | /* | 223 | /* |
218 | * use delay slot for fall-through | 224 | * use delay slot for fall-through |
219 | * src and dst are aligned; need to compute rem | 225 | * src and dst are aligned; need to compute rem |
220 | */ | 226 | */ |
221 | both_aligned: | 227 | .Lboth_aligned: |
222 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 228 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
223 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 229 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
224 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 230 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
225 | PREF( 0, 3*32(src) ) | 231 | PREF( 0, 3*32(src) ) |
226 | PREF( 1, 3*32(dst) ) | 232 | PREF( 1, 3*32(dst) ) |
227 | .align 4 | 233 | .align 4 |
228 | 1: | 234 | 1: |
229 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 235 | R10KCBARRIER(0(ra)) |
230 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 236 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
231 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 237 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
232 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 238 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
239 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) | ||
233 | SUB len, len, 8*NBYTES | 240 | SUB len, len, 8*NBYTES |
234 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 241 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
235 | EXC( LOAD t7, UNIT(5)(src), l_exc_copy) | 242 | EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) |
236 | EXC( STORE t0, UNIT(0)(dst), s_exc_p8u) | 243 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u) |
237 | EXC( STORE t1, UNIT(1)(dst), s_exc_p7u) | 244 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u) |
238 | EXC( LOAD t0, UNIT(6)(src), l_exc_copy) | 245 | EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) |
239 | EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | 246 | EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) |
240 | ADD src, src, 8*NBYTES | 247 | ADD src, src, 8*NBYTES |
241 | ADD dst, dst, 8*NBYTES | 248 | ADD dst, dst, 8*NBYTES |
242 | EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) | 249 | EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u) |
243 | EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) | 250 | EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u) |
244 | EXC( STORE t4, UNIT(-4)(dst), s_exc_p4u) | 251 | EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u) |
245 | EXC( STORE t7, UNIT(-3)(dst), s_exc_p3u) | 252 | EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u) |
246 | EXC( STORE t0, UNIT(-2)(dst), s_exc_p2u) | 253 | EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u) |
247 | EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) | 254 | EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u) |
248 | PREF( 0, 8*32(src) ) | 255 | PREF( 0, 8*32(src) ) |
249 | PREF( 1, 8*32(dst) ) | 256 | PREF( 1, 8*32(dst) ) |
250 | bne len, rem, 1b | 257 | bne len, rem, 1b |
@@ -253,39 +260,45 @@ EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) | |||
253 | /* | 260 | /* |
254 | * len == rem == the number of bytes left to copy < 8*NBYTES | 261 | * len == rem == the number of bytes left to copy < 8*NBYTES |
255 | */ | 262 | */ |
256 | cleanup_both_aligned: | 263 | .Lcleanup_both_aligned: |
257 | beqz len, done | 264 | beqz len, .Ldone |
258 | sltu t0, len, 4*NBYTES | 265 | sltu t0, len, 4*NBYTES |
259 | bnez t0, less_than_4units | 266 | bnez t0, .Lless_than_4units |
260 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 267 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
261 | /* | 268 | /* |
262 | * len >= 4*NBYTES | 269 | * len >= 4*NBYTES |
263 | */ | 270 | */ |
264 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 271 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
265 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 272 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
266 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 273 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
267 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 274 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
268 | SUB len, len, 4*NBYTES | 275 | SUB len, len, 4*NBYTES |
269 | ADD src, src, 4*NBYTES | 276 | ADD src, src, 4*NBYTES |
270 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | 277 | R10KCBARRIER(0(ra)) |
271 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 278 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) |
272 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 279 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) |
273 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) | 280 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) |
274 | beqz len, done | 281 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) |
275 | ADD dst, dst, 4*NBYTES | 282 | .set reorder /* DADDI_WAR */ |
276 | less_than_4units: | 283 | ADD dst, dst, 4*NBYTES |
284 | beqz len, .Ldone | ||
285 | .set noreorder | ||
286 | .Lless_than_4units: | ||
277 | /* | 287 | /* |
278 | * rem = len % NBYTES | 288 | * rem = len % NBYTES |
279 | */ | 289 | */ |
280 | beq rem, len, copy_bytes | 290 | beq rem, len, .Lcopy_bytes |
281 | nop | 291 | nop |
282 | 1: | 292 | 1: |
283 | EXC( LOAD t0, 0(src), l_exc) | 293 | R10KCBARRIER(0(ra)) |
294 | EXC( LOAD t0, 0(src), .Ll_exc) | ||
284 | ADD src, src, NBYTES | 295 | ADD src, src, NBYTES |
285 | SUB len, len, NBYTES | 296 | SUB len, len, NBYTES |
286 | EXC( STORE t0, 0(dst), s_exc_p1u) | 297 | EXC( STORE t0, 0(dst), .Ls_exc_p1u) |
298 | .set reorder /* DADDI_WAR */ | ||
299 | ADD dst, dst, NBYTES | ||
287 | bne rem, len, 1b | 300 | bne rem, len, 1b |
288 | ADD dst, dst, NBYTES | 301 | .set noreorder |
289 | 302 | ||
290 | /* | 303 | /* |
291 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) | 304 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) |
@@ -299,17 +312,17 @@ EXC( STORE t0, 0(dst), s_exc_p1u) | |||
299 | * more instruction-level parallelism. | 312 | * more instruction-level parallelism. |
300 | */ | 313 | */ |
301 | #define bits t2 | 314 | #define bits t2 |
302 | beqz len, done | 315 | beqz len, .Ldone |
303 | ADD t1, dst, len # t1 is just past last byte of dst | 316 | ADD t1, dst, len # t1 is just past last byte of dst |
304 | li bits, 8*NBYTES | 317 | li bits, 8*NBYTES |
305 | SLL rem, len, 3 # rem = number of bits to keep | 318 | SLL rem, len, 3 # rem = number of bits to keep |
306 | EXC( LOAD t0, 0(src), l_exc) | 319 | EXC( LOAD t0, 0(src), .Ll_exc) |
307 | SUB bits, bits, rem # bits = number of bits to discard | 320 | SUB bits, bits, rem # bits = number of bits to discard |
308 | SHIFT_DISCARD t0, t0, bits | 321 | SHIFT_DISCARD t0, t0, bits |
309 | EXC( STREST t0, -1(t1), s_exc) | 322 | EXC( STREST t0, -1(t1), .Ls_exc) |
310 | jr ra | 323 | jr ra |
311 | move len, zero | 324 | move len, zero |
312 | dst_unaligned: | 325 | .Ldst_unaligned: |
313 | /* | 326 | /* |
314 | * dst is unaligned | 327 | * dst is unaligned |
315 | * t0 = src & ADDRMASK | 328 | * t0 = src & ADDRMASK |
@@ -320,22 +333,23 @@ dst_unaligned: | |||
320 | * Set match = (src and dst have same alignment) | 333 | * Set match = (src and dst have same alignment) |
321 | */ | 334 | */ |
322 | #define match rem | 335 | #define match rem |
323 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 336 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
324 | ADD t2, zero, NBYTES | 337 | ADD t2, zero, NBYTES |
325 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 338 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
326 | SUB t2, t2, t1 # t2 = number of bytes copied | 339 | SUB t2, t2, t1 # t2 = number of bytes copied |
327 | xor match, t0, t1 | 340 | xor match, t0, t1 |
328 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) | 341 | R10KCBARRIER(0(ra)) |
329 | beq len, t2, done | 342 | EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) |
343 | beq len, t2, .Ldone | ||
330 | SUB len, len, t2 | 344 | SUB len, len, t2 |
331 | ADD dst, dst, t2 | 345 | ADD dst, dst, t2 |
332 | beqz match, both_aligned | 346 | beqz match, .Lboth_aligned |
333 | ADD src, src, t2 | 347 | ADD src, src, t2 |
334 | 348 | ||
335 | src_unaligned_dst_aligned: | 349 | .Lsrc_unaligned_dst_aligned: |
336 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 350 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
337 | PREF( 0, 3*32(src) ) | 351 | PREF( 0, 3*32(src) ) |
338 | beqz t0, cleanup_src_unaligned | 352 | beqz t0, .Lcleanup_src_unaligned |
339 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 353 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
340 | PREF( 1, 3*32(dst) ) | 354 | PREF( 1, 3*32(dst) ) |
341 | 1: | 355 | 1: |
@@ -345,52 +359,59 @@ src_unaligned_dst_aligned: | |||
345 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 359 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
346 | * are to the same unit (unless src is aligned, but it's not). | 360 | * are to the same unit (unless src is aligned, but it's not). |
347 | */ | 361 | */ |
348 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 362 | R10KCBARRIER(0(ra)) |
349 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 363 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
364 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) | ||
350 | SUB len, len, 4*NBYTES | 365 | SUB len, len, 4*NBYTES |
351 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 366 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
352 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 367 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
353 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 368 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
354 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 369 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
355 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 370 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
356 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 371 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
357 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 372 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
358 | ADD src, src, 4*NBYTES | 373 | ADD src, src, 4*NBYTES |
359 | #ifdef CONFIG_CPU_SB1 | 374 | #ifdef CONFIG_CPU_SB1 |
360 | nop # improves slotting | 375 | nop # improves slotting |
361 | #endif | 376 | #endif |
362 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | 377 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) |
363 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 378 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) |
364 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 379 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) |
365 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) | 380 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) |
366 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 381 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
382 | .set reorder /* DADDI_WAR */ | ||
383 | ADD dst, dst, 4*NBYTES | ||
367 | bne len, rem, 1b | 384 | bne len, rem, 1b |
368 | ADD dst, dst, 4*NBYTES | 385 | .set noreorder |
369 | 386 | ||
370 | cleanup_src_unaligned: | 387 | .Lcleanup_src_unaligned: |
371 | beqz len, done | 388 | beqz len, .Ldone |
372 | and rem, len, NBYTES-1 # rem = len % NBYTES | 389 | and rem, len, NBYTES-1 # rem = len % NBYTES |
373 | beq rem, len, copy_bytes | 390 | beq rem, len, .Lcopy_bytes |
374 | nop | 391 | nop |
375 | 1: | 392 | 1: |
376 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 393 | R10KCBARRIER(0(ra)) |
377 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 394 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
395 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) | ||
378 | ADD src, src, NBYTES | 396 | ADD src, src, NBYTES |
379 | SUB len, len, NBYTES | 397 | SUB len, len, NBYTES |
380 | EXC( STORE t0, 0(dst), s_exc_p1u) | 398 | EXC( STORE t0, 0(dst), .Ls_exc_p1u) |
399 | .set reorder /* DADDI_WAR */ | ||
400 | ADD dst, dst, NBYTES | ||
381 | bne len, rem, 1b | 401 | bne len, rem, 1b |
382 | ADD dst, dst, NBYTES | 402 | .set noreorder |
383 | 403 | ||
384 | copy_bytes_checklen: | 404 | .Lcopy_bytes_checklen: |
385 | beqz len, done | 405 | beqz len, .Ldone |
386 | nop | 406 | nop |
387 | copy_bytes: | 407 | .Lcopy_bytes: |
388 | /* 0 < len < NBYTES */ | 408 | /* 0 < len < NBYTES */ |
409 | R10KCBARRIER(0(ra)) | ||
389 | #define COPY_BYTE(N) \ | 410 | #define COPY_BYTE(N) \ |
390 | EXC( lb t0, N(src), l_exc); \ | 411 | EXC( lb t0, N(src), .Ll_exc); \ |
391 | SUB len, len, 1; \ | 412 | SUB len, len, 1; \ |
392 | beqz len, done; \ | 413 | beqz len, .Ldone; \ |
393 | EXC( sb t0, N(dst), s_exc_p1) | 414 | EXC( sb t0, N(dst), .Ls_exc_p1) |
394 | 415 | ||
395 | COPY_BYTE(0) | 416 | COPY_BYTE(0) |
396 | COPY_BYTE(1) | 417 | COPY_BYTE(1) |
@@ -400,16 +421,16 @@ EXC( sb t0, N(dst), s_exc_p1) | |||
400 | COPY_BYTE(4) | 421 | COPY_BYTE(4) |
401 | COPY_BYTE(5) | 422 | COPY_BYTE(5) |
402 | #endif | 423 | #endif |
403 | EXC( lb t0, NBYTES-2(src), l_exc) | 424 | EXC( lb t0, NBYTES-2(src), .Ll_exc) |
404 | SUB len, len, 1 | 425 | SUB len, len, 1 |
405 | jr ra | 426 | jr ra |
406 | EXC( sb t0, NBYTES-2(dst), s_exc_p1) | 427 | EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1) |
407 | done: | 428 | .Ldone: |
408 | jr ra | 429 | jr ra |
409 | nop | 430 | nop |
410 | END(memcpy) | 431 | END(memcpy) |
411 | 432 | ||
412 | l_exc_copy: | 433 | .Ll_exc_copy: |
413 | /* | 434 | /* |
414 | * Copy bytes from src until faulting load address (or until a | 435 | * Copy bytes from src until faulting load address (or until a |
415 | * lb faults) | 436 | * lb faults) |
@@ -424,12 +445,14 @@ l_exc_copy: | |||
424 | nop | 445 | nop |
425 | LOAD t0, THREAD_BUADDR(t0) | 446 | LOAD t0, THREAD_BUADDR(t0) |
426 | 1: | 447 | 1: |
427 | EXC( lb t1, 0(src), l_exc) | 448 | EXC( lb t1, 0(src), .Ll_exc) |
428 | ADD src, src, 1 | 449 | ADD src, src, 1 |
429 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 450 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
451 | .set reorder /* DADDI_WAR */ | ||
452 | ADD dst, dst, 1 | ||
430 | bne src, t0, 1b | 453 | bne src, t0, 1b |
431 | ADD dst, dst, 1 | 454 | .set noreorder |
432 | l_exc: | 455 | .Ll_exc: |
433 | LOAD t0, TI_TASK($28) | 456 | LOAD t0, TI_TASK($28) |
434 | nop | 457 | nop |
435 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 458 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
@@ -446,20 +469,33 @@ l_exc: | |||
446 | * Clear len bytes starting at dst. Can't call __bzero because it | 469 | * Clear len bytes starting at dst. Can't call __bzero because it |
447 | * might modify len. An inefficient loop for these rare times... | 470 | * might modify len. An inefficient loop for these rare times... |
448 | */ | 471 | */ |
449 | beqz len, done | 472 | .set reorder /* DADDI_WAR */ |
450 | SUB src, len, 1 | 473 | SUB src, len, 1 |
474 | beqz len, .Ldone | ||
475 | .set noreorder | ||
451 | 1: sb zero, 0(dst) | 476 | 1: sb zero, 0(dst) |
452 | ADD dst, dst, 1 | 477 | ADD dst, dst, 1 |
478 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
453 | bnez src, 1b | 479 | bnez src, 1b |
454 | SUB src, src, 1 | 480 | SUB src, src, 1 |
481 | #else | ||
482 | .set push | ||
483 | .set noat | ||
484 | li v1, 1 | ||
485 | bnez src, 1b | ||
486 | SUB src, src, v1 | ||
487 | .set pop | ||
488 | #endif | ||
455 | jr ra | 489 | jr ra |
456 | nop | 490 | nop |
457 | 491 | ||
458 | 492 | ||
459 | #define SEXC(n) \ | 493 | #define SEXC(n) \ |
460 | s_exc_p ## n ## u: \ | 494 | .set reorder; /* DADDI_WAR */ \ |
461 | jr ra; \ | 495 | .Ls_exc_p ## n ## u: \ |
462 | ADD len, len, n*NBYTES | 496 | ADD len, len, n*NBYTES; \ |
497 | jr ra; \ | ||
498 | .set noreorder | ||
463 | 499 | ||
464 | SEXC(8) | 500 | SEXC(8) |
465 | SEXC(7) | 501 | SEXC(7) |
@@ -470,10 +506,12 @@ SEXC(3) | |||
470 | SEXC(2) | 506 | SEXC(2) |
471 | SEXC(1) | 507 | SEXC(1) |
472 | 508 | ||
473 | s_exc_p1: | 509 | .Ls_exc_p1: |
510 | .set reorder /* DADDI_WAR */ | ||
511 | ADD len, len, 1 | ||
474 | jr ra | 512 | jr ra |
475 | ADD len, len, 1 | 513 | .set noreorder |
476 | s_exc: | 514 | .Ls_exc: |
477 | jr ra | 515 | jr ra |
478 | nop | 516 | nop |
479 | 517 | ||
@@ -484,38 +522,44 @@ LEAF(memmove) | |||
484 | sltu t0, a1, t0 # dst + len <= src -> memcpy | 522 | sltu t0, a1, t0 # dst + len <= src -> memcpy |
485 | sltu t1, a0, t1 # dst >= src + len -> memcpy | 523 | sltu t1, a0, t1 # dst >= src + len -> memcpy |
486 | and t0, t1 | 524 | and t0, t1 |
487 | beqz t0, __memcpy | 525 | beqz t0, .L__memcpy |
488 | move v0, a0 /* return value */ | 526 | move v0, a0 /* return value */ |
489 | beqz a2, r_out | 527 | beqz a2, .Lr_out |
490 | END(memmove) | 528 | END(memmove) |
491 | 529 | ||
492 | /* fall through to __rmemcpy */ | 530 | /* fall through to __rmemcpy */ |
493 | LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ | 531 | LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ |
494 | sltu t0, a1, a0 | 532 | sltu t0, a1, a0 |
495 | beqz t0, r_end_bytes_up # src >= dst | 533 | beqz t0, .Lr_end_bytes_up # src >= dst |
496 | nop | 534 | nop |
497 | ADD a0, a2 # dst = dst + len | 535 | ADD a0, a2 # dst = dst + len |
498 | ADD a1, a2 # src = src + len | 536 | ADD a1, a2 # src = src + len |
499 | 537 | ||
500 | r_end_bytes: | 538 | .Lr_end_bytes: |
539 | R10KCBARRIER(0(ra)) | ||
501 | lb t0, -1(a1) | 540 | lb t0, -1(a1) |
502 | SUB a2, a2, 0x1 | 541 | SUB a2, a2, 0x1 |
503 | sb t0, -1(a0) | 542 | sb t0, -1(a0) |
504 | SUB a1, a1, 0x1 | 543 | SUB a1, a1, 0x1 |
505 | bnez a2, r_end_bytes | 544 | .set reorder /* DADDI_WAR */ |
506 | SUB a0, a0, 0x1 | 545 | SUB a0, a0, 0x1 |
546 | bnez a2, .Lr_end_bytes | ||
547 | .set noreorder | ||
507 | 548 | ||
508 | r_out: | 549 | .Lr_out: |
509 | jr ra | 550 | jr ra |
510 | move a2, zero | 551 | move a2, zero |
511 | 552 | ||
512 | r_end_bytes_up: | 553 | .Lr_end_bytes_up: |
554 | R10KCBARRIER(0(ra)) | ||
513 | lb t0, (a1) | 555 | lb t0, (a1) |
514 | SUB a2, a2, 0x1 | 556 | SUB a2, a2, 0x1 |
515 | sb t0, (a0) | 557 | sb t0, (a0) |
516 | ADD a1, a1, 0x1 | 558 | ADD a1, a1, 0x1 |
517 | bnez a2, r_end_bytes_up | 559 | .set reorder /* DADDI_WAR */ |
518 | ADD a0, a0, 0x1 | 560 | ADD a0, a0, 0x1 |
561 | bnez a2, .Lr_end_bytes_up | ||
562 | .set noreorder | ||
519 | 563 | ||
520 | jr ra | 564 | jr ra |
521 | move a2, zero | 565 | move a2, zero |
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 3f8b8b3d0b2..77dc3b20110 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S | |||
@@ -5,6 +5,7 @@ | |||
5 | * | 5 | * |
6 | * Copyright (C) 1998, 1999, 2000 by Ralf Baechle | 6 | * Copyright (C) 1998, 1999, 2000 by Ralf Baechle |
7 | * Copyright (C) 1999, 2000 Silicon Graphics, Inc. | 7 | * Copyright (C) 1999, 2000 Silicon Graphics, Inc. |
8 | * Copyright (C) 2007 Maciej W. Rozycki | ||
8 | */ | 9 | */ |
9 | #include <asm/asm.h> | 10 | #include <asm/asm.h> |
10 | #include <asm/asm-offsets.h> | 11 | #include <asm/asm-offsets.h> |
@@ -71,34 +72,45 @@ LEAF(memset) | |||
71 | 72 | ||
72 | FEXPORT(__bzero) | 73 | FEXPORT(__bzero) |
73 | sltiu t0, a2, LONGSIZE /* very small region? */ | 74 | sltiu t0, a2, LONGSIZE /* very small region? */ |
74 | bnez t0, small_memset | 75 | bnez t0, .Lsmall_memset |
75 | andi t0, a0, LONGMASK /* aligned? */ | 76 | andi t0, a0, LONGMASK /* aligned? */ |
76 | 77 | ||
78 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | ||
77 | beqz t0, 1f | 79 | beqz t0, 1f |
78 | PTR_SUBU t0, LONGSIZE /* alignment in bytes */ | 80 | PTR_SUBU t0, LONGSIZE /* alignment in bytes */ |
81 | #else | ||
82 | .set noat | ||
83 | li AT, LONGSIZE | ||
84 | beqz t0, 1f | ||
85 | PTR_SUBU t0, AT /* alignment in bytes */ | ||
86 | .set at | ||
87 | #endif | ||
79 | 88 | ||
89 | R10KCBARRIER(0(ra)) | ||
80 | #ifdef __MIPSEB__ | 90 | #ifdef __MIPSEB__ |
81 | EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ | 91 | EX(LONG_S_L, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */ |
82 | #endif | 92 | #endif |
83 | #ifdef __MIPSEL__ | 93 | #ifdef __MIPSEL__ |
84 | EX(LONG_S_R, a1, (a0), first_fixup) /* make word/dword aligned */ | 94 | EX(LONG_S_R, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */ |
85 | #endif | 95 | #endif |
86 | PTR_SUBU a0, t0 /* long align ptr */ | 96 | PTR_SUBU a0, t0 /* long align ptr */ |
87 | PTR_ADDU a2, t0 /* correct size */ | 97 | PTR_ADDU a2, t0 /* correct size */ |
88 | 98 | ||
89 | 1: ori t1, a2, 0x3f /* # of full blocks */ | 99 | 1: ori t1, a2, 0x3f /* # of full blocks */ |
90 | xori t1, 0x3f | 100 | xori t1, 0x3f |
91 | beqz t1, memset_partial /* no block to fill */ | 101 | beqz t1, .Lmemset_partial /* no block to fill */ |
92 | andi t0, a2, 0x40-LONGSIZE | 102 | andi t0, a2, 0x40-LONGSIZE |
93 | 103 | ||
94 | PTR_ADDU t1, a0 /* end address */ | 104 | PTR_ADDU t1, a0 /* end address */ |
95 | .set reorder | 105 | .set reorder |
96 | 1: PTR_ADDIU a0, 64 | 106 | 1: PTR_ADDIU a0, 64 |
97 | f_fill64 a0, -64, a1, fwd_fixup | 107 | R10KCBARRIER(0(ra)) |
108 | f_fill64 a0, -64, a1, .Lfwd_fixup | ||
98 | bne t1, a0, 1b | 109 | bne t1, a0, 1b |
99 | .set noreorder | 110 | .set noreorder |
100 | 111 | ||
101 | memset_partial: | 112 | .Lmemset_partial: |
113 | R10KCBARRIER(0(ra)) | ||
102 | PTR_LA t1, 2f /* where to start */ | 114 | PTR_LA t1, 2f /* where to start */ |
103 | #if LONGSIZE == 4 | 115 | #if LONGSIZE == 4 |
104 | PTR_SUBU t1, t0 | 116 | PTR_SUBU t1, t0 |
@@ -106,7 +118,7 @@ memset_partial: | |||
106 | .set noat | 118 | .set noat |
107 | LONG_SRL AT, t0, 1 | 119 | LONG_SRL AT, t0, 1 |
108 | PTR_SUBU t1, AT | 120 | PTR_SUBU t1, AT |
109 | .set noat | 121 | .set at |
110 | #endif | 122 | #endif |
111 | jr t1 | 123 | jr t1 |
112 | PTR_ADDU a0, t0 /* dest ptr */ | 124 | PTR_ADDU a0, t0 /* dest ptr */ |
@@ -114,26 +126,28 @@ memset_partial: | |||
114 | .set push | 126 | .set push |
115 | .set noreorder | 127 | .set noreorder |
116 | .set nomacro | 128 | .set nomacro |
117 | f_fill64 a0, -64, a1, partial_fixup /* ... but first do longs ... */ | 129 | f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */ |
118 | 2: .set pop | 130 | 2: .set pop |
119 | andi a2, LONGMASK /* At most one long to go */ | 131 | andi a2, LONGMASK /* At most one long to go */ |
120 | 132 | ||
121 | beqz a2, 1f | 133 | beqz a2, 1f |
122 | PTR_ADDU a0, a2 /* What's left */ | 134 | PTR_ADDU a0, a2 /* What's left */ |
135 | R10KCBARRIER(0(ra)) | ||
123 | #ifdef __MIPSEB__ | 136 | #ifdef __MIPSEB__ |
124 | EX(LONG_S_R, a1, -1(a0), last_fixup) | 137 | EX(LONG_S_R, a1, -1(a0), .Llast_fixup) |
125 | #endif | 138 | #endif |
126 | #ifdef __MIPSEL__ | 139 | #ifdef __MIPSEL__ |
127 | EX(LONG_S_L, a1, -1(a0), last_fixup) | 140 | EX(LONG_S_L, a1, -1(a0), .Llast_fixup) |
128 | #endif | 141 | #endif |
129 | 1: jr ra | 142 | 1: jr ra |
130 | move a2, zero | 143 | move a2, zero |
131 | 144 | ||
132 | small_memset: | 145 | .Lsmall_memset: |
133 | beqz a2, 2f | 146 | beqz a2, 2f |
134 | PTR_ADDU t1, a0, a2 | 147 | PTR_ADDU t1, a0, a2 |
135 | 148 | ||
136 | 1: PTR_ADDIU a0, 1 /* fill bytewise */ | 149 | 1: PTR_ADDIU a0, 1 /* fill bytewise */ |
150 | R10KCBARRIER(0(ra)) | ||
137 | bne t1, a0, 1b | 151 | bne t1, a0, 1b |
138 | sb a1, -1(a0) | 152 | sb a1, -1(a0) |
139 | 153 | ||
@@ -141,11 +155,11 @@ small_memset: | |||
141 | move a2, zero | 155 | move a2, zero |
142 | END(memset) | 156 | END(memset) |
143 | 157 | ||
144 | first_fixup: | 158 | .Lfirst_fixup: |
145 | jr ra | 159 | jr ra |
146 | nop | 160 | nop |
147 | 161 | ||
148 | fwd_fixup: | 162 | .Lfwd_fixup: |
149 | PTR_L t0, TI_TASK($28) | 163 | PTR_L t0, TI_TASK($28) |
150 | LONG_L t0, THREAD_BUADDR(t0) | 164 | LONG_L t0, THREAD_BUADDR(t0) |
151 | andi a2, 0x3f | 165 | andi a2, 0x3f |
@@ -153,7 +167,7 @@ fwd_fixup: | |||
153 | jr ra | 167 | jr ra |
154 | LONG_SUBU a2, t0 | 168 | LONG_SUBU a2, t0 |
155 | 169 | ||
156 | partial_fixup: | 170 | .Lpartial_fixup: |
157 | PTR_L t0, TI_TASK($28) | 171 | PTR_L t0, TI_TASK($28) |
158 | LONG_L t0, THREAD_BUADDR(t0) | 172 | LONG_L t0, THREAD_BUADDR(t0) |
159 | andi a2, LONGMASK | 173 | andi a2, LONGMASK |
@@ -161,6 +175,6 @@ partial_fixup: | |||
161 | jr ra | 175 | jr ra |
162 | LONG_SUBU a2, t0 | 176 | LONG_SUBU a2, t0 |
163 | 177 | ||
164 | last_fixup: | 178 | .Llast_fixup: |
165 | jr ra | 179 | jr ra |
166 | andi v1, a2, LONGMASK | 180 | andi v1, a2, LONGMASK |
diff --git a/arch/mips/lib/strlen_user.S b/arch/mips/lib/strlen_user.S index eca558d83a3..fdbb970f670 100644 --- a/arch/mips/lib/strlen_user.S +++ b/arch/mips/lib/strlen_user.S | |||
@@ -24,16 +24,16 @@ | |||
24 | LEAF(__strlen_user_asm) | 24 | LEAF(__strlen_user_asm) |
25 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? | 25 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? |
26 | and v0, a0 | 26 | and v0, a0 |
27 | bnez v0, fault | 27 | bnez v0, .Lfault |
28 | 28 | ||
29 | FEXPORT(__strlen_user_nocheck_asm) | 29 | FEXPORT(__strlen_user_nocheck_asm) |
30 | move v0, a0 | 30 | move v0, a0 |
31 | 1: EX(lb, t0, (v0), fault) | 31 | 1: EX(lb, t0, (v0), .Lfault) |
32 | PTR_ADDIU v0, 1 | 32 | PTR_ADDIU v0, 1 |
33 | bnez t0, 1b | 33 | bnez t0, 1b |
34 | PTR_SUBU v0, a0 | 34 | PTR_SUBU v0, a0 |
35 | jr ra | 35 | jr ra |
36 | END(__strlen_user_asm) | 36 | END(__strlen_user_asm) |
37 | 37 | ||
38 | fault: move v0, zero | 38 | .Lfault: move v0, zero |
39 | jr ra | 39 | jr ra |
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index d16c76fbfac..7201b2ff08c 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S | |||
@@ -30,29 +30,30 @@ | |||
30 | LEAF(__strncpy_from_user_asm) | 30 | LEAF(__strncpy_from_user_asm) |
31 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? | 31 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? |
32 | and v0, a1 | 32 | and v0, a1 |
33 | bnez v0, fault | 33 | bnez v0, .Lfault |
34 | 34 | ||
35 | FEXPORT(__strncpy_from_user_nocheck_asm) | 35 | FEXPORT(__strncpy_from_user_nocheck_asm) |
36 | move v0, zero | 36 | move v0, zero |
37 | move v1, a1 | 37 | move v1, a1 |
38 | .set noreorder | 38 | .set noreorder |
39 | 1: EX(lbu, t0, (v1), fault) | 39 | 1: EX(lbu, t0, (v1), .Lfault) |
40 | PTR_ADDIU v1, 1 | 40 | PTR_ADDIU v1, 1 |
41 | R10KCBARRIER(0(ra)) | ||
41 | beqz t0, 2f | 42 | beqz t0, 2f |
42 | sb t0, (a0) | 43 | sb t0, (a0) |
43 | PTR_ADDIU v0, 1 | 44 | PTR_ADDIU v0, 1 |
44 | bne v0, a2, 1b | ||
45 | PTR_ADDIU a0, 1 | ||
46 | .set reorder | 45 | .set reorder |
46 | PTR_ADDIU a0, 1 | ||
47 | bne v0, a2, 1b | ||
47 | 2: PTR_ADDU t0, a1, v0 | 48 | 2: PTR_ADDU t0, a1, v0 |
48 | xor t0, a1 | 49 | xor t0, a1 |
49 | bltz t0, fault | 50 | bltz t0, .Lfault |
50 | jr ra # return n | 51 | jr ra # return n |
51 | END(__strncpy_from_user_asm) | 52 | END(__strncpy_from_user_asm) |
52 | 53 | ||
53 | fault: li v0, -EFAULT | 54 | .Lfault: li v0, -EFAULT |
54 | jr ra | 55 | jr ra |
55 | 56 | ||
56 | .section __ex_table,"a" | 57 | .section __ex_table,"a" |
57 | PTR 1b, fault | 58 | PTR 1b, .Lfault |
58 | .previous | 59 | .previous |
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index c0ea15194a0..c768e300061 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S | |||
@@ -28,18 +28,19 @@ | |||
28 | LEAF(__strnlen_user_asm) | 28 | LEAF(__strnlen_user_asm) |
29 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? | 29 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? |
30 | and v0, a0 | 30 | and v0, a0 |
31 | bnez v0, fault | 31 | bnez v0, .Lfault |
32 | 32 | ||
33 | FEXPORT(__strnlen_user_nocheck_asm) | 33 | FEXPORT(__strnlen_user_nocheck_asm) |
34 | move v0, a0 | 34 | move v0, a0 |
35 | PTR_ADDU a1, a0 # stop pointer | 35 | PTR_ADDU a1, a0 # stop pointer |
36 | 1: beq v0, a1, 1f # limit reached? | 36 | 1: beq v0, a1, 1f # limit reached? |
37 | EX(lb, t0, (v0), fault) | 37 | EX(lb, t0, (v0), .Lfault) |
38 | PTR_ADDU v0, 1 | 38 | PTR_ADDU v0, 1 |
39 | bnez t0, 1b | 39 | bnez t0, 1b |
40 | 1: PTR_SUBU v0, a0 | 40 | 1: PTR_SUBU v0, a0 |
41 | jr ra | 41 | jr ra |
42 | END(__strnlen_user_asm) | 42 | END(__strnlen_user_asm) |
43 | 43 | ||
44 | fault: move v0, zero | 44 | .Lfault: |
45 | move v0, zero | ||
45 | jr ra | 46 | jr ra |
diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c index 58d14f4d934..27b012d4341 100644 --- a/arch/mips/lib/uncached.c +++ b/arch/mips/lib/uncached.c | |||
@@ -46,9 +46,9 @@ unsigned long __init run_uncached(void *func) | |||
46 | if (sp >= (long)CKSEG0 && sp < (long)CKSEG2) | 46 | if (sp >= (long)CKSEG0 && sp < (long)CKSEG2) |
47 | usp = CKSEG1ADDR(sp); | 47 | usp = CKSEG1ADDR(sp); |
48 | #ifdef CONFIG_64BIT | 48 | #ifdef CONFIG_64BIT |
49 | else if ((long long)sp >= (long long)PHYS_TO_XKPHYS(0LL, 0) && | 49 | else if ((long long)sp >= (long long)PHYS_TO_XKPHYS(0, 0) && |
50 | (long long)sp < (long long)PHYS_TO_XKPHYS(8LL, 0)) | 50 | (long long)sp < (long long)PHYS_TO_XKPHYS(8, 0)) |
51 | usp = PHYS_TO_XKPHYS((long long)K_CALG_UNCACHED, | 51 | usp = PHYS_TO_XKPHYS(K_CALG_UNCACHED, |
52 | XKPHYS_TO_PHYS((long long)sp)); | 52 | XKPHYS_TO_PHYS((long long)sp)); |
53 | #endif | 53 | #endif |
54 | else { | 54 | else { |
@@ -58,9 +58,9 @@ unsigned long __init run_uncached(void *func) | |||
58 | if (lfunc >= (long)CKSEG0 && lfunc < (long)CKSEG2) | 58 | if (lfunc >= (long)CKSEG0 && lfunc < (long)CKSEG2) |
59 | ufunc = CKSEG1ADDR(lfunc); | 59 | ufunc = CKSEG1ADDR(lfunc); |
60 | #ifdef CONFIG_64BIT | 60 | #ifdef CONFIG_64BIT |
61 | else if ((long long)lfunc >= (long long)PHYS_TO_XKPHYS(0LL, 0) && | 61 | else if ((long long)lfunc >= (long long)PHYS_TO_XKPHYS(0, 0) && |
62 | (long long)lfunc < (long long)PHYS_TO_XKPHYS(8LL, 0)) | 62 | (long long)lfunc < (long long)PHYS_TO_XKPHYS(8, 0)) |
63 | ufunc = PHYS_TO_XKPHYS((long long)K_CALG_UNCACHED, | 63 | ufunc = PHYS_TO_XKPHYS(K_CALG_UNCACHED, |
64 | XKPHYS_TO_PHYS((long long)lfunc)); | 64 | XKPHYS_TO_PHYS((long long)lfunc)); |
65 | #endif | 65 | #endif |
66 | else { | 66 | else { |