diff options
-rw-r--r-- | arch/mips/lib/csum_partial.S | 214 | ||||
-rw-r--r-- | arch/mips/lib/memcpy-inatomic.S | 116 | ||||
-rw-r--r-- | arch/mips/lib/memcpy.S | 182 | ||||
-rw-r--r-- | arch/mips/lib/memset.S | 28 | ||||
-rw-r--r-- | arch/mips/lib/strlen_user.S | 6 | ||||
-rw-r--r-- | arch/mips/lib/strncpy_user.S | 10 | ||||
-rw-r--r-- | arch/mips/lib/strnlen_user.S | 7 |
7 files changed, 282 insertions, 281 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 957a82484e3e..8d7784122c14 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -96,13 +96,13 @@ LEAF(csum_partial) | |||
96 | move t7, zero | 96 | move t7, zero |
97 | 97 | ||
98 | sltiu t8, a1, 0x8 | 98 | sltiu t8, a1, 0x8 |
99 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | 99 | bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */ |
100 | move t2, a1 | 100 | move t2, a1 |
101 | 101 | ||
102 | andi t7, src, 0x1 /* odd buffer? */ | 102 | andi t7, src, 0x1 /* odd buffer? */ |
103 | 103 | ||
104 | hword_align: | 104 | .Lhword_align: |
105 | beqz t7, word_align | 105 | beqz t7, .Lword_align |
106 | andi t8, src, 0x2 | 106 | andi t8, src, 0x2 |
107 | 107 | ||
108 | lbu t0, (src) | 108 | lbu t0, (src) |
@@ -114,8 +114,8 @@ hword_align: | |||
114 | PTR_ADDU src, src, 0x1 | 114 | PTR_ADDU src, src, 0x1 |
115 | andi t8, src, 0x2 | 115 | andi t8, src, 0x2 |
116 | 116 | ||
117 | word_align: | 117 | .Lword_align: |
118 | beqz t8, dword_align | 118 | beqz t8, .Ldword_align |
119 | sltiu t8, a1, 56 | 119 | sltiu t8, a1, 56 |
120 | 120 | ||
121 | lhu t0, (src) | 121 | lhu t0, (src) |
@@ -124,12 +124,12 @@ word_align: | |||
124 | sltiu t8, a1, 56 | 124 | sltiu t8, a1, 56 |
125 | PTR_ADDU src, src, 0x2 | 125 | PTR_ADDU src, src, 0x2 |
126 | 126 | ||
127 | dword_align: | 127 | .Ldword_align: |
128 | bnez t8, do_end_words | 128 | bnez t8, .Ldo_end_words |
129 | move t8, a1 | 129 | move t8, a1 |
130 | 130 | ||
131 | andi t8, src, 0x4 | 131 | andi t8, src, 0x4 |
132 | beqz t8, qword_align | 132 | beqz t8, .Lqword_align |
133 | andi t8, src, 0x8 | 133 | andi t8, src, 0x8 |
134 | 134 | ||
135 | lw t0, 0x00(src) | 135 | lw t0, 0x00(src) |
@@ -138,8 +138,8 @@ dword_align: | |||
138 | PTR_ADDU src, src, 0x4 | 138 | PTR_ADDU src, src, 0x4 |
139 | andi t8, src, 0x8 | 139 | andi t8, src, 0x8 |
140 | 140 | ||
141 | qword_align: | 141 | .Lqword_align: |
142 | beqz t8, oword_align | 142 | beqz t8, .Loword_align |
143 | andi t8, src, 0x10 | 143 | andi t8, src, 0x10 |
144 | 144 | ||
145 | #ifdef USE_DOUBLE | 145 | #ifdef USE_DOUBLE |
@@ -156,8 +156,8 @@ qword_align: | |||
156 | PTR_ADDU src, src, 0x8 | 156 | PTR_ADDU src, src, 0x8 |
157 | andi t8, src, 0x10 | 157 | andi t8, src, 0x10 |
158 | 158 | ||
159 | oword_align: | 159 | .Loword_align: |
160 | beqz t8, begin_movement | 160 | beqz t8, .Lbegin_movement |
161 | LONG_SRL t8, a1, 0x7 | 161 | LONG_SRL t8, a1, 0x7 |
162 | 162 | ||
163 | #ifdef USE_DOUBLE | 163 | #ifdef USE_DOUBLE |
@@ -172,11 +172,11 @@ oword_align: | |||
172 | PTR_ADDU src, src, 0x10 | 172 | PTR_ADDU src, src, 0x10 |
173 | LONG_SRL t8, a1, 0x7 | 173 | LONG_SRL t8, a1, 0x7 |
174 | 174 | ||
175 | begin_movement: | 175 | .Lbegin_movement: |
176 | beqz t8, 1f | 176 | beqz t8, 1f |
177 | andi t2, a1, 0x40 | 177 | andi t2, a1, 0x40 |
178 | 178 | ||
179 | move_128bytes: | 179 | .Lmove_128bytes: |
180 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 180 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
181 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | 181 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) |
182 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) | 182 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) |
@@ -184,43 +184,43 @@ move_128bytes: | |||
184 | LONG_SUBU t8, t8, 0x01 | 184 | LONG_SUBU t8, t8, 0x01 |
185 | .set reorder /* DADDI_WAR */ | 185 | .set reorder /* DADDI_WAR */ |
186 | PTR_ADDU src, src, 0x80 | 186 | PTR_ADDU src, src, 0x80 |
187 | bnez t8, move_128bytes | 187 | bnez t8, .Lmove_128bytes |
188 | .set noreorder | 188 | .set noreorder |
189 | 189 | ||
190 | 1: | 190 | 1: |
191 | beqz t2, 1f | 191 | beqz t2, 1f |
192 | andi t2, a1, 0x20 | 192 | andi t2, a1, 0x20 |
193 | 193 | ||
194 | move_64bytes: | 194 | .Lmove_64bytes: |
195 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 195 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
196 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | 196 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) |
197 | PTR_ADDU src, src, 0x40 | 197 | PTR_ADDU src, src, 0x40 |
198 | 198 | ||
199 | 1: | 199 | 1: |
200 | beqz t2, do_end_words | 200 | beqz t2, .Ldo_end_words |
201 | andi t8, a1, 0x1c | 201 | andi t8, a1, 0x1c |
202 | 202 | ||
203 | move_32bytes: | 203 | .Lmove_32bytes: |
204 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 204 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
205 | andi t8, a1, 0x1c | 205 | andi t8, a1, 0x1c |
206 | PTR_ADDU src, src, 0x20 | 206 | PTR_ADDU src, src, 0x20 |
207 | 207 | ||
208 | do_end_words: | 208 | .Ldo_end_words: |
209 | beqz t8, small_csumcpy | 209 | beqz t8, .Lsmall_csumcpy |
210 | andi t2, a1, 0x3 | 210 | andi t2, a1, 0x3 |
211 | LONG_SRL t8, t8, 0x2 | 211 | LONG_SRL t8, t8, 0x2 |
212 | 212 | ||
213 | end_words: | 213 | .Lend_words: |
214 | lw t0, (src) | 214 | lw t0, (src) |
215 | LONG_SUBU t8, t8, 0x1 | 215 | LONG_SUBU t8, t8, 0x1 |
216 | ADDC(sum, t0) | 216 | ADDC(sum, t0) |
217 | .set reorder /* DADDI_WAR */ | 217 | .set reorder /* DADDI_WAR */ |
218 | PTR_ADDU src, src, 0x4 | 218 | PTR_ADDU src, src, 0x4 |
219 | bnez t8, end_words | 219 | bnez t8, .Lend_words |
220 | .set noreorder | 220 | .set noreorder |
221 | 221 | ||
222 | /* unknown src alignment and < 8 bytes to go */ | 222 | /* unknown src alignment and < 8 bytes to go */ |
223 | small_csumcpy: | 223 | .Lsmall_csumcpy: |
224 | move a1, t2 | 224 | move a1, t2 |
225 | 225 | ||
226 | andi t0, a1, 4 | 226 | andi t0, a1, 4 |
@@ -413,48 +413,48 @@ FEXPORT(csum_partial_copy_nocheck) | |||
413 | */ | 413 | */ |
414 | sltu t2, len, NBYTES | 414 | sltu t2, len, NBYTES |
415 | and t1, dst, ADDRMASK | 415 | and t1, dst, ADDRMASK |
416 | bnez t2, copy_bytes_checklen | 416 | bnez t2, .Lcopy_bytes_checklen |
417 | and t0, src, ADDRMASK | 417 | and t0, src, ADDRMASK |
418 | andi odd, dst, 0x1 /* odd buffer? */ | 418 | andi odd, dst, 0x1 /* odd buffer? */ |
419 | bnez t1, dst_unaligned | 419 | bnez t1, .Ldst_unaligned |
420 | nop | 420 | nop |
421 | bnez t0, src_unaligned_dst_aligned | 421 | bnez t0, .Lsrc_unaligned_dst_aligned |
422 | /* | 422 | /* |
423 | * use delay slot for fall-through | 423 | * use delay slot for fall-through |
424 | * src and dst are aligned; need to compute rem | 424 | * src and dst are aligned; need to compute rem |
425 | */ | 425 | */ |
426 | both_aligned: | 426 | .Lboth_aligned: |
427 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 427 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
428 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 428 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
429 | nop | 429 | nop |
430 | SUB len, 8*NBYTES # subtract here for bgez loop | 430 | SUB len, 8*NBYTES # subtract here for bgez loop |
431 | .align 4 | 431 | .align 4 |
432 | 1: | 432 | 1: |
433 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 433 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
434 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 434 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
435 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 435 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
436 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 436 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
437 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 437 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
438 | EXC( LOAD t5, UNIT(5)(src), l_exc_copy) | 438 | EXC( LOAD t5, UNIT(5)(src), .Ll_exc_copy) |
439 | EXC( LOAD t6, UNIT(6)(src), l_exc_copy) | 439 | EXC( LOAD t6, UNIT(6)(src), .Ll_exc_copy) |
440 | EXC( LOAD t7, UNIT(7)(src), l_exc_copy) | 440 | EXC( LOAD t7, UNIT(7)(src), .Ll_exc_copy) |
441 | SUB len, len, 8*NBYTES | 441 | SUB len, len, 8*NBYTES |
442 | ADD src, src, 8*NBYTES | 442 | ADD src, src, 8*NBYTES |
443 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 443 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
444 | ADDC(sum, t0) | 444 | ADDC(sum, t0) |
445 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 445 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
446 | ADDC(sum, t1) | 446 | ADDC(sum, t1) |
447 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 447 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
448 | ADDC(sum, t2) | 448 | ADDC(sum, t2) |
449 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 449 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
450 | ADDC(sum, t3) | 450 | ADDC(sum, t3) |
451 | EXC( STORE t4, UNIT(4)(dst), s_exc) | 451 | EXC( STORE t4, UNIT(4)(dst), .Ls_exc) |
452 | ADDC(sum, t4) | 452 | ADDC(sum, t4) |
453 | EXC( STORE t5, UNIT(5)(dst), s_exc) | 453 | EXC( STORE t5, UNIT(5)(dst), .Ls_exc) |
454 | ADDC(sum, t5) | 454 | ADDC(sum, t5) |
455 | EXC( STORE t6, UNIT(6)(dst), s_exc) | 455 | EXC( STORE t6, UNIT(6)(dst), .Ls_exc) |
456 | ADDC(sum, t6) | 456 | ADDC(sum, t6) |
457 | EXC( STORE t7, UNIT(7)(dst), s_exc) | 457 | EXC( STORE t7, UNIT(7)(dst), .Ls_exc) |
458 | ADDC(sum, t7) | 458 | ADDC(sum, t7) |
459 | .set reorder /* DADDI_WAR */ | 459 | .set reorder /* DADDI_WAR */ |
460 | ADD dst, dst, 8*NBYTES | 460 | ADD dst, dst, 8*NBYTES |
@@ -465,44 +465,44 @@ EXC( STORE t7, UNIT(7)(dst), s_exc) | |||
465 | /* | 465 | /* |
466 | * len == the number of bytes left to copy < 8*NBYTES | 466 | * len == the number of bytes left to copy < 8*NBYTES |
467 | */ | 467 | */ |
468 | cleanup_both_aligned: | 468 | .Lcleanup_both_aligned: |
469 | #define rem t7 | 469 | #define rem t7 |
470 | beqz len, done | 470 | beqz len, .Ldone |
471 | sltu t0, len, 4*NBYTES | 471 | sltu t0, len, 4*NBYTES |
472 | bnez t0, less_than_4units | 472 | bnez t0, .Lless_than_4units |
473 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 473 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
474 | /* | 474 | /* |
475 | * len >= 4*NBYTES | 475 | * len >= 4*NBYTES |
476 | */ | 476 | */ |
477 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 477 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
478 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 478 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
479 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 479 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
480 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 480 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
481 | SUB len, len, 4*NBYTES | 481 | SUB len, len, 4*NBYTES |
482 | ADD src, src, 4*NBYTES | 482 | ADD src, src, 4*NBYTES |
483 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 483 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
484 | ADDC(sum, t0) | 484 | ADDC(sum, t0) |
485 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 485 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
486 | ADDC(sum, t1) | 486 | ADDC(sum, t1) |
487 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 487 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
488 | ADDC(sum, t2) | 488 | ADDC(sum, t2) |
489 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 489 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
490 | ADDC(sum, t3) | 490 | ADDC(sum, t3) |
491 | .set reorder /* DADDI_WAR */ | 491 | .set reorder /* DADDI_WAR */ |
492 | ADD dst, dst, 4*NBYTES | 492 | ADD dst, dst, 4*NBYTES |
493 | beqz len, done | 493 | beqz len, .Ldone |
494 | .set noreorder | 494 | .set noreorder |
495 | less_than_4units: | 495 | .Lless_than_4units: |
496 | /* | 496 | /* |
497 | * rem = len % NBYTES | 497 | * rem = len % NBYTES |
498 | */ | 498 | */ |
499 | beq rem, len, copy_bytes | 499 | beq rem, len, .Lcopy_bytes |
500 | nop | 500 | nop |
501 | 1: | 501 | 1: |
502 | EXC( LOAD t0, 0(src), l_exc) | 502 | EXC( LOAD t0, 0(src), .Ll_exc) |
503 | ADD src, src, NBYTES | 503 | ADD src, src, NBYTES |
504 | SUB len, len, NBYTES | 504 | SUB len, len, NBYTES |
505 | EXC( STORE t0, 0(dst), s_exc) | 505 | EXC( STORE t0, 0(dst), .Ls_exc) |
506 | ADDC(sum, t0) | 506 | ADDC(sum, t0) |
507 | .set reorder /* DADDI_WAR */ | 507 | .set reorder /* DADDI_WAR */ |
508 | ADD dst, dst, NBYTES | 508 | ADD dst, dst, NBYTES |
@@ -521,20 +521,20 @@ EXC( STORE t0, 0(dst), s_exc) | |||
521 | * more instruction-level parallelism. | 521 | * more instruction-level parallelism. |
522 | */ | 522 | */ |
523 | #define bits t2 | 523 | #define bits t2 |
524 | beqz len, done | 524 | beqz len, .Ldone |
525 | ADD t1, dst, len # t1 is just past last byte of dst | 525 | ADD t1, dst, len # t1 is just past last byte of dst |
526 | li bits, 8*NBYTES | 526 | li bits, 8*NBYTES |
527 | SLL rem, len, 3 # rem = number of bits to keep | 527 | SLL rem, len, 3 # rem = number of bits to keep |
528 | EXC( LOAD t0, 0(src), l_exc) | 528 | EXC( LOAD t0, 0(src), .Ll_exc) |
529 | SUB bits, bits, rem # bits = number of bits to discard | 529 | SUB bits, bits, rem # bits = number of bits to discard |
530 | SHIFT_DISCARD t0, t0, bits | 530 | SHIFT_DISCARD t0, t0, bits |
531 | EXC( STREST t0, -1(t1), s_exc) | 531 | EXC( STREST t0, -1(t1), .Ls_exc) |
532 | SHIFT_DISCARD_REVERT t0, t0, bits | 532 | SHIFT_DISCARD_REVERT t0, t0, bits |
533 | .set reorder | 533 | .set reorder |
534 | ADDC(sum, t0) | 534 | ADDC(sum, t0) |
535 | b done | 535 | b .Ldone |
536 | .set noreorder | 536 | .set noreorder |
537 | dst_unaligned: | 537 | .Ldst_unaligned: |
538 | /* | 538 | /* |
539 | * dst is unaligned | 539 | * dst is unaligned |
540 | * t0 = src & ADDRMASK | 540 | * t0 = src & ADDRMASK |
@@ -545,25 +545,25 @@ dst_unaligned: | |||
545 | * Set match = (src and dst have same alignment) | 545 | * Set match = (src and dst have same alignment) |
546 | */ | 546 | */ |
547 | #define match rem | 547 | #define match rem |
548 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 548 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
549 | ADD t2, zero, NBYTES | 549 | ADD t2, zero, NBYTES |
550 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 550 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
551 | SUB t2, t2, t1 # t2 = number of bytes copied | 551 | SUB t2, t2, t1 # t2 = number of bytes copied |
552 | xor match, t0, t1 | 552 | xor match, t0, t1 |
553 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) | 553 | EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) |
554 | SLL t4, t1, 3 # t4 = number of bits to discard | 554 | SLL t4, t1, 3 # t4 = number of bits to discard |
555 | SHIFT_DISCARD t3, t3, t4 | 555 | SHIFT_DISCARD t3, t3, t4 |
556 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ | 556 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ |
557 | ADDC(sum, t3) | 557 | ADDC(sum, t3) |
558 | beq len, t2, done | 558 | beq len, t2, .Ldone |
559 | SUB len, len, t2 | 559 | SUB len, len, t2 |
560 | ADD dst, dst, t2 | 560 | ADD dst, dst, t2 |
561 | beqz match, both_aligned | 561 | beqz match, .Lboth_aligned |
562 | ADD src, src, t2 | 562 | ADD src, src, t2 |
563 | 563 | ||
564 | src_unaligned_dst_aligned: | 564 | .Lsrc_unaligned_dst_aligned: |
565 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 565 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
566 | beqz t0, cleanup_src_unaligned | 566 | beqz t0, .Lcleanup_src_unaligned |
567 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 567 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
568 | 1: | 568 | 1: |
569 | /* | 569 | /* |
@@ -572,53 +572,53 @@ src_unaligned_dst_aligned: | |||
572 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 572 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
573 | * are to the same unit (unless src is aligned, but it's not). | 573 | * are to the same unit (unless src is aligned, but it's not). |
574 | */ | 574 | */ |
575 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 575 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
576 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 576 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
577 | SUB len, len, 4*NBYTES | 577 | SUB len, len, 4*NBYTES |
578 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 578 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
579 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 579 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
580 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 580 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
581 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 581 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
582 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 582 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
583 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 583 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
584 | ADD src, src, 4*NBYTES | 584 | ADD src, src, 4*NBYTES |
585 | #ifdef CONFIG_CPU_SB1 | 585 | #ifdef CONFIG_CPU_SB1 |
586 | nop # improves slotting | 586 | nop # improves slotting |
587 | #endif | 587 | #endif |
588 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 588 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
589 | ADDC(sum, t0) | 589 | ADDC(sum, t0) |
590 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 590 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
591 | ADDC(sum, t1) | 591 | ADDC(sum, t1) |
592 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 592 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
593 | ADDC(sum, t2) | 593 | ADDC(sum, t2) |
594 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 594 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
595 | ADDC(sum, t3) | 595 | ADDC(sum, t3) |
596 | .set reorder /* DADDI_WAR */ | 596 | .set reorder /* DADDI_WAR */ |
597 | ADD dst, dst, 4*NBYTES | 597 | ADD dst, dst, 4*NBYTES |
598 | bne len, rem, 1b | 598 | bne len, rem, 1b |
599 | .set noreorder | 599 | .set noreorder |
600 | 600 | ||
601 | cleanup_src_unaligned: | 601 | .Lcleanup_src_unaligned: |
602 | beqz len, done | 602 | beqz len, .Ldone |
603 | and rem, len, NBYTES-1 # rem = len % NBYTES | 603 | and rem, len, NBYTES-1 # rem = len % NBYTES |
604 | beq rem, len, copy_bytes | 604 | beq rem, len, .Lcopy_bytes |
605 | nop | 605 | nop |
606 | 1: | 606 | 1: |
607 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 607 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
608 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 608 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
609 | ADD src, src, NBYTES | 609 | ADD src, src, NBYTES |
610 | SUB len, len, NBYTES | 610 | SUB len, len, NBYTES |
611 | EXC( STORE t0, 0(dst), s_exc) | 611 | EXC( STORE t0, 0(dst), .Ls_exc) |
612 | ADDC(sum, t0) | 612 | ADDC(sum, t0) |
613 | .set reorder /* DADDI_WAR */ | 613 | .set reorder /* DADDI_WAR */ |
614 | ADD dst, dst, NBYTES | 614 | ADD dst, dst, NBYTES |
615 | bne len, rem, 1b | 615 | bne len, rem, 1b |
616 | .set noreorder | 616 | .set noreorder |
617 | 617 | ||
618 | copy_bytes_checklen: | 618 | .Lcopy_bytes_checklen: |
619 | beqz len, done | 619 | beqz len, .Ldone |
620 | nop | 620 | nop |
621 | copy_bytes: | 621 | .Lcopy_bytes: |
622 | /* 0 < len < NBYTES */ | 622 | /* 0 < len < NBYTES */ |
623 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | 623 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
624 | #define SHIFT_START 0 | 624 | #define SHIFT_START 0 |
@@ -629,14 +629,14 @@ copy_bytes: | |||
629 | #endif | 629 | #endif |
630 | move t2, zero # partial word | 630 | move t2, zero # partial word |
631 | li t3, SHIFT_START # shift | 631 | li t3, SHIFT_START # shift |
632 | /* use l_exc_copy here to return correct sum on fault */ | 632 | /* use .Ll_exc_copy here to return correct sum on fault */ |
633 | #define COPY_BYTE(N) \ | 633 | #define COPY_BYTE(N) \ |
634 | EXC( lbu t0, N(src), l_exc_copy); \ | 634 | EXC( lbu t0, N(src), .Ll_exc_copy); \ |
635 | SUB len, len, 1; \ | 635 | SUB len, len, 1; \ |
636 | EXC( sb t0, N(dst), s_exc); \ | 636 | EXC( sb t0, N(dst), .Ls_exc); \ |
637 | SLLV t0, t0, t3; \ | 637 | SLLV t0, t0, t3; \ |
638 | addu t3, SHIFT_INC; \ | 638 | addu t3, SHIFT_INC; \ |
639 | beqz len, copy_bytes_done; \ | 639 | beqz len, .Lcopy_bytes_done; \ |
640 | or t2, t0 | 640 | or t2, t0 |
641 | 641 | ||
642 | COPY_BYTE(0) | 642 | COPY_BYTE(0) |
@@ -647,14 +647,14 @@ EXC( sb t0, N(dst), s_exc); \ | |||
647 | COPY_BYTE(4) | 647 | COPY_BYTE(4) |
648 | COPY_BYTE(5) | 648 | COPY_BYTE(5) |
649 | #endif | 649 | #endif |
650 | EXC( lbu t0, NBYTES-2(src), l_exc_copy) | 650 | EXC( lbu t0, NBYTES-2(src), .Ll_exc_copy) |
651 | SUB len, len, 1 | 651 | SUB len, len, 1 |
652 | EXC( sb t0, NBYTES-2(dst), s_exc) | 652 | EXC( sb t0, NBYTES-2(dst), .Ls_exc) |
653 | SLLV t0, t0, t3 | 653 | SLLV t0, t0, t3 |
654 | or t2, t0 | 654 | or t2, t0 |
655 | copy_bytes_done: | 655 | .Lcopy_bytes_done: |
656 | ADDC(sum, t2) | 656 | ADDC(sum, t2) |
657 | done: | 657 | .Ldone: |
658 | /* fold checksum */ | 658 | /* fold checksum */ |
659 | .set push | 659 | .set push |
660 | .set noat | 660 | .set noat |
@@ -685,7 +685,7 @@ done: | |||
685 | jr ra | 685 | jr ra |
686 | .set noreorder | 686 | .set noreorder |
687 | 687 | ||
688 | l_exc_copy: | 688 | .Ll_exc_copy: |
689 | /* | 689 | /* |
690 | * Copy bytes from src until faulting load address (or until a | 690 | * Copy bytes from src until faulting load address (or until a |
691 | * lb faults) | 691 | * lb faults) |
@@ -700,7 +700,7 @@ l_exc_copy: | |||
700 | li t2, SHIFT_START | 700 | li t2, SHIFT_START |
701 | LOAD t0, THREAD_BUADDR(t0) | 701 | LOAD t0, THREAD_BUADDR(t0) |
702 | 1: | 702 | 1: |
703 | EXC( lbu t1, 0(src), l_exc) | 703 | EXC( lbu t1, 0(src), .Ll_exc) |
704 | ADD src, src, 1 | 704 | ADD src, src, 1 |
705 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 705 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
706 | SLLV t1, t1, t2 | 706 | SLLV t1, t1, t2 |
@@ -710,7 +710,7 @@ EXC( lbu t1, 0(src), l_exc) | |||
710 | ADD dst, dst, 1 | 710 | ADD dst, dst, 1 |
711 | bne src, t0, 1b | 711 | bne src, t0, 1b |
712 | .set noreorder | 712 | .set noreorder |
713 | l_exc: | 713 | .Ll_exc: |
714 | LOAD t0, TI_TASK($28) | 714 | LOAD t0, TI_TASK($28) |
715 | nop | 715 | nop |
716 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 716 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
@@ -729,7 +729,7 @@ l_exc: | |||
729 | */ | 729 | */ |
730 | .set reorder /* DADDI_WAR */ | 730 | .set reorder /* DADDI_WAR */ |
731 | SUB src, len, 1 | 731 | SUB src, len, 1 |
732 | beqz len, done | 732 | beqz len, .Ldone |
733 | .set noreorder | 733 | .set noreorder |
734 | 1: sb zero, 0(dst) | 734 | 1: sb zero, 0(dst) |
735 | ADD dst, dst, 1 | 735 | ADD dst, dst, 1 |
@@ -744,10 +744,10 @@ l_exc: | |||
744 | SUB src, src, v1 | 744 | SUB src, src, v1 |
745 | #endif | 745 | #endif |
746 | li v1, -EFAULT | 746 | li v1, -EFAULT |
747 | b done | 747 | b .Ldone |
748 | sw v1, (errptr) | 748 | sw v1, (errptr) |
749 | 749 | ||
750 | s_exc: | 750 | .Ls_exc: |
751 | li v0, -1 /* invalid checksum */ | 751 | li v0, -1 /* invalid checksum */ |
752 | li v1, -EFAULT | 752 | li v1, -EFAULT |
753 | jr ra | 753 | jr ra |
diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S index d1b08f5d6860..736d0fb56a94 100644 --- a/arch/mips/lib/memcpy-inatomic.S +++ b/arch/mips/lib/memcpy-inatomic.S | |||
@@ -209,36 +209,36 @@ LEAF(__copy_user_inatomic) | |||
209 | and t1, dst, ADDRMASK | 209 | and t1, dst, ADDRMASK |
210 | PREF( 0, 1*32(src) ) | 210 | PREF( 0, 1*32(src) ) |
211 | PREF( 1, 1*32(dst) ) | 211 | PREF( 1, 1*32(dst) ) |
212 | bnez t2, copy_bytes_checklen | 212 | bnez t2, .Lcopy_bytes_checklen |
213 | and t0, src, ADDRMASK | 213 | and t0, src, ADDRMASK |
214 | PREF( 0, 2*32(src) ) | 214 | PREF( 0, 2*32(src) ) |
215 | PREF( 1, 2*32(dst) ) | 215 | PREF( 1, 2*32(dst) ) |
216 | bnez t1, dst_unaligned | 216 | bnez t1, .Ldst_unaligned |
217 | nop | 217 | nop |
218 | bnez t0, src_unaligned_dst_aligned | 218 | bnez t0, .Lsrc_unaligned_dst_aligned |
219 | /* | 219 | /* |
220 | * use delay slot for fall-through | 220 | * use delay slot for fall-through |
221 | * src and dst are aligned; need to compute rem | 221 | * src and dst are aligned; need to compute rem |
222 | */ | 222 | */ |
223 | both_aligned: | 223 | .Lboth_aligned: |
224 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 224 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
225 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 225 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
226 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 226 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
227 | PREF( 0, 3*32(src) ) | 227 | PREF( 0, 3*32(src) ) |
228 | PREF( 1, 3*32(dst) ) | 228 | PREF( 1, 3*32(dst) ) |
229 | .align 4 | 229 | .align 4 |
230 | 1: | 230 | 1: |
231 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 231 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
232 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 232 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
233 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 233 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
234 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 234 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
235 | SUB len, len, 8*NBYTES | 235 | SUB len, len, 8*NBYTES |
236 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 236 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
237 | EXC( LOAD t7, UNIT(5)(src), l_exc_copy) | 237 | EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) |
238 | STORE t0, UNIT(0)(dst) | 238 | STORE t0, UNIT(0)(dst) |
239 | STORE t1, UNIT(1)(dst) | 239 | STORE t1, UNIT(1)(dst) |
240 | EXC( LOAD t0, UNIT(6)(src), l_exc_copy) | 240 | EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) |
241 | EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | 241 | EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) |
242 | ADD src, src, 8*NBYTES | 242 | ADD src, src, 8*NBYTES |
243 | ADD dst, dst, 8*NBYTES | 243 | ADD dst, dst, 8*NBYTES |
244 | STORE t2, UNIT(-6)(dst) | 244 | STORE t2, UNIT(-6)(dst) |
@@ -255,18 +255,18 @@ EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | |||
255 | /* | 255 | /* |
256 | * len == rem == the number of bytes left to copy < 8*NBYTES | 256 | * len == rem == the number of bytes left to copy < 8*NBYTES |
257 | */ | 257 | */ |
258 | cleanup_both_aligned: | 258 | .Lcleanup_both_aligned: |
259 | beqz len, done | 259 | beqz len, .Ldone |
260 | sltu t0, len, 4*NBYTES | 260 | sltu t0, len, 4*NBYTES |
261 | bnez t0, less_than_4units | 261 | bnez t0, .Lless_than_4units |
262 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 262 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
263 | /* | 263 | /* |
264 | * len >= 4*NBYTES | 264 | * len >= 4*NBYTES |
265 | */ | 265 | */ |
266 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 266 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
267 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 267 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
268 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 268 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
269 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 269 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
270 | SUB len, len, 4*NBYTES | 270 | SUB len, len, 4*NBYTES |
271 | ADD src, src, 4*NBYTES | 271 | ADD src, src, 4*NBYTES |
272 | STORE t0, UNIT(0)(dst) | 272 | STORE t0, UNIT(0)(dst) |
@@ -275,16 +275,16 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | |||
275 | STORE t3, UNIT(3)(dst) | 275 | STORE t3, UNIT(3)(dst) |
276 | .set reorder /* DADDI_WAR */ | 276 | .set reorder /* DADDI_WAR */ |
277 | ADD dst, dst, 4*NBYTES | 277 | ADD dst, dst, 4*NBYTES |
278 | beqz len, done | 278 | beqz len, .Ldone |
279 | .set noreorder | 279 | .set noreorder |
280 | less_than_4units: | 280 | .Lless_than_4units: |
281 | /* | 281 | /* |
282 | * rem = len % NBYTES | 282 | * rem = len % NBYTES |
283 | */ | 283 | */ |
284 | beq rem, len, copy_bytes | 284 | beq rem, len, .Lcopy_bytes |
285 | nop | 285 | nop |
286 | 1: | 286 | 1: |
287 | EXC( LOAD t0, 0(src), l_exc) | 287 | EXC( LOAD t0, 0(src), .Ll_exc) |
288 | ADD src, src, NBYTES | 288 | ADD src, src, NBYTES |
289 | SUB len, len, NBYTES | 289 | SUB len, len, NBYTES |
290 | STORE t0, 0(dst) | 290 | STORE t0, 0(dst) |
@@ -305,17 +305,17 @@ EXC( LOAD t0, 0(src), l_exc) | |||
305 | * more instruction-level parallelism. | 305 | * more instruction-level parallelism. |
306 | */ | 306 | */ |
307 | #define bits t2 | 307 | #define bits t2 |
308 | beqz len, done | 308 | beqz len, .Ldone |
309 | ADD t1, dst, len # t1 is just past last byte of dst | 309 | ADD t1, dst, len # t1 is just past last byte of dst |
310 | li bits, 8*NBYTES | 310 | li bits, 8*NBYTES |
311 | SLL rem, len, 3 # rem = number of bits to keep | 311 | SLL rem, len, 3 # rem = number of bits to keep |
312 | EXC( LOAD t0, 0(src), l_exc) | 312 | EXC( LOAD t0, 0(src), .Ll_exc) |
313 | SUB bits, bits, rem # bits = number of bits to discard | 313 | SUB bits, bits, rem # bits = number of bits to discard |
314 | SHIFT_DISCARD t0, t0, bits | 314 | SHIFT_DISCARD t0, t0, bits |
315 | STREST t0, -1(t1) | 315 | STREST t0, -1(t1) |
316 | jr ra | 316 | jr ra |
317 | move len, zero | 317 | move len, zero |
318 | dst_unaligned: | 318 | .Ldst_unaligned: |
319 | /* | 319 | /* |
320 | * dst is unaligned | 320 | * dst is unaligned |
321 | * t0 = src & ADDRMASK | 321 | * t0 = src & ADDRMASK |
@@ -326,22 +326,22 @@ dst_unaligned: | |||
326 | * Set match = (src and dst have same alignment) | 326 | * Set match = (src and dst have same alignment) |
327 | */ | 327 | */ |
328 | #define match rem | 328 | #define match rem |
329 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 329 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
330 | ADD t2, zero, NBYTES | 330 | ADD t2, zero, NBYTES |
331 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 331 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
332 | SUB t2, t2, t1 # t2 = number of bytes copied | 332 | SUB t2, t2, t1 # t2 = number of bytes copied |
333 | xor match, t0, t1 | 333 | xor match, t0, t1 |
334 | STFIRST t3, FIRST(0)(dst) | 334 | STFIRST t3, FIRST(0)(dst) |
335 | beq len, t2, done | 335 | beq len, t2, .Ldone |
336 | SUB len, len, t2 | 336 | SUB len, len, t2 |
337 | ADD dst, dst, t2 | 337 | ADD dst, dst, t2 |
338 | beqz match, both_aligned | 338 | beqz match, .Lboth_aligned |
339 | ADD src, src, t2 | 339 | ADD src, src, t2 |
340 | 340 | ||
341 | src_unaligned_dst_aligned: | 341 | .Lsrc_unaligned_dst_aligned: |
342 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 342 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
343 | PREF( 0, 3*32(src) ) | 343 | PREF( 0, 3*32(src) ) |
344 | beqz t0, cleanup_src_unaligned | 344 | beqz t0, .Lcleanup_src_unaligned |
345 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 345 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
346 | PREF( 1, 3*32(dst) ) | 346 | PREF( 1, 3*32(dst) ) |
347 | 1: | 347 | 1: |
@@ -351,15 +351,15 @@ src_unaligned_dst_aligned: | |||
351 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 351 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
352 | * are to the same unit (unless src is aligned, but it's not). | 352 | * are to the same unit (unless src is aligned, but it's not). |
353 | */ | 353 | */ |
354 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 354 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
355 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 355 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
356 | SUB len, len, 4*NBYTES | 356 | SUB len, len, 4*NBYTES |
357 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 357 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
358 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 358 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
359 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 359 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
360 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 360 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
361 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 361 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
362 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 362 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
363 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 363 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
364 | ADD src, src, 4*NBYTES | 364 | ADD src, src, 4*NBYTES |
365 | #ifdef CONFIG_CPU_SB1 | 365 | #ifdef CONFIG_CPU_SB1 |
@@ -375,14 +375,14 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy) | |||
375 | bne len, rem, 1b | 375 | bne len, rem, 1b |
376 | .set noreorder | 376 | .set noreorder |
377 | 377 | ||
378 | cleanup_src_unaligned: | 378 | .Lcleanup_src_unaligned: |
379 | beqz len, done | 379 | beqz len, .Ldone |
380 | and rem, len, NBYTES-1 # rem = len % NBYTES | 380 | and rem, len, NBYTES-1 # rem = len % NBYTES |
381 | beq rem, len, copy_bytes | 381 | beq rem, len, .Lcopy_bytes |
382 | nop | 382 | nop |
383 | 1: | 383 | 1: |
384 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 384 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
385 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 385 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
386 | ADD src, src, NBYTES | 386 | ADD src, src, NBYTES |
387 | SUB len, len, NBYTES | 387 | SUB len, len, NBYTES |
388 | STORE t0, 0(dst) | 388 | STORE t0, 0(dst) |
@@ -391,15 +391,15 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) | |||
391 | bne len, rem, 1b | 391 | bne len, rem, 1b |
392 | .set noreorder | 392 | .set noreorder |
393 | 393 | ||
394 | copy_bytes_checklen: | 394 | .Lcopy_bytes_checklen: |
395 | beqz len, done | 395 | beqz len, .Ldone |
396 | nop | 396 | nop |
397 | copy_bytes: | 397 | .Lcopy_bytes: |
398 | /* 0 < len < NBYTES */ | 398 | /* 0 < len < NBYTES */ |
399 | #define COPY_BYTE(N) \ | 399 | #define COPY_BYTE(N) \ |
400 | EXC( lb t0, N(src), l_exc); \ | 400 | EXC( lb t0, N(src), .Ll_exc); \ |
401 | SUB len, len, 1; \ | 401 | SUB len, len, 1; \ |
402 | beqz len, done; \ | 402 | beqz len, .Ldone; \ |
403 | sb t0, N(dst) | 403 | sb t0, N(dst) |
404 | 404 | ||
405 | COPY_BYTE(0) | 405 | COPY_BYTE(0) |
@@ -410,16 +410,16 @@ EXC( lb t0, N(src), l_exc); \ | |||
410 | COPY_BYTE(4) | 410 | COPY_BYTE(4) |
411 | COPY_BYTE(5) | 411 | COPY_BYTE(5) |
412 | #endif | 412 | #endif |
413 | EXC( lb t0, NBYTES-2(src), l_exc) | 413 | EXC( lb t0, NBYTES-2(src), .Ll_exc) |
414 | SUB len, len, 1 | 414 | SUB len, len, 1 |
415 | jr ra | 415 | jr ra |
416 | sb t0, NBYTES-2(dst) | 416 | sb t0, NBYTES-2(dst) |
417 | done: | 417 | .Ldone: |
418 | jr ra | 418 | jr ra |
419 | nop | 419 | nop |
420 | END(__copy_user_inatomic) | 420 | END(__copy_user_inatomic) |
421 | 421 | ||
422 | l_exc_copy: | 422 | .Ll_exc_copy: |
423 | /* | 423 | /* |
424 | * Copy bytes from src until faulting load address (or until a | 424 | * Copy bytes from src until faulting load address (or until a |
425 | * lb faults) | 425 | * lb faults) |
@@ -434,14 +434,14 @@ l_exc_copy: | |||
434 | nop | 434 | nop |
435 | LOAD t0, THREAD_BUADDR(t0) | 435 | LOAD t0, THREAD_BUADDR(t0) |
436 | 1: | 436 | 1: |
437 | EXC( lb t1, 0(src), l_exc) | 437 | EXC( lb t1, 0(src), .Ll_exc) |
438 | ADD src, src, 1 | 438 | ADD src, src, 1 |
439 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 439 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
440 | .set reorder /* DADDI_WAR */ | 440 | .set reorder /* DADDI_WAR */ |
441 | ADD dst, dst, 1 | 441 | ADD dst, dst, 1 |
442 | bne src, t0, 1b | 442 | bne src, t0, 1b |
443 | .set noreorder | 443 | .set noreorder |
444 | l_exc: | 444 | .Ll_exc: |
445 | LOAD t0, TI_TASK($28) | 445 | LOAD t0, TI_TASK($28) |
446 | nop | 446 | nop |
447 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 447 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 01e450b1ebc9..c06cccf60bec 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -191,7 +191,7 @@ | |||
191 | .align 5 | 191 | .align 5 |
192 | LEAF(memcpy) /* a0=dst a1=src a2=len */ | 192 | LEAF(memcpy) /* a0=dst a1=src a2=len */ |
193 | move v0, dst /* return value */ | 193 | move v0, dst /* return value */ |
194 | __memcpy: | 194 | .L__memcpy: |
195 | FEXPORT(__copy_user) | 195 | FEXPORT(__copy_user) |
196 | /* | 196 | /* |
197 | * Note: dst & src may be unaligned, len may be 0 | 197 | * Note: dst & src may be unaligned, len may be 0 |
@@ -213,45 +213,45 @@ FEXPORT(__copy_user) | |||
213 | and t1, dst, ADDRMASK | 213 | and t1, dst, ADDRMASK |
214 | PREF( 0, 1*32(src) ) | 214 | PREF( 0, 1*32(src) ) |
215 | PREF( 1, 1*32(dst) ) | 215 | PREF( 1, 1*32(dst) ) |
216 | bnez t2, copy_bytes_checklen | 216 | bnez t2, .Lcopy_bytes_checklen |
217 | and t0, src, ADDRMASK | 217 | and t0, src, ADDRMASK |
218 | PREF( 0, 2*32(src) ) | 218 | PREF( 0, 2*32(src) ) |
219 | PREF( 1, 2*32(dst) ) | 219 | PREF( 1, 2*32(dst) ) |
220 | bnez t1, dst_unaligned | 220 | bnez t1, .Ldst_unaligned |
221 | nop | 221 | nop |
222 | bnez t0, src_unaligned_dst_aligned | 222 | bnez t0, .Lsrc_unaligned_dst_aligned |
223 | /* | 223 | /* |
224 | * use delay slot for fall-through | 224 | * use delay slot for fall-through |
225 | * src and dst are aligned; need to compute rem | 225 | * src and dst are aligned; need to compute rem |
226 | */ | 226 | */ |
227 | both_aligned: | 227 | .Lboth_aligned: |
228 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 228 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
229 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 229 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
230 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 230 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
231 | PREF( 0, 3*32(src) ) | 231 | PREF( 0, 3*32(src) ) |
232 | PREF( 1, 3*32(dst) ) | 232 | PREF( 1, 3*32(dst) ) |
233 | .align 4 | 233 | .align 4 |
234 | 1: | 234 | 1: |
235 | R10KCBARRIER(0(ra)) | 235 | R10KCBARRIER(0(ra)) |
236 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 236 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
237 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 237 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
238 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 238 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
239 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 239 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
240 | SUB len, len, 8*NBYTES | 240 | SUB len, len, 8*NBYTES |
241 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 241 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
242 | EXC( LOAD t7, UNIT(5)(src), l_exc_copy) | 242 | EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) |
243 | EXC( STORE t0, UNIT(0)(dst), s_exc_p8u) | 243 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u) |
244 | EXC( STORE t1, UNIT(1)(dst), s_exc_p7u) | 244 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u) |
245 | EXC( LOAD t0, UNIT(6)(src), l_exc_copy) | 245 | EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) |
246 | EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | 246 | EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) |
247 | ADD src, src, 8*NBYTES | 247 | ADD src, src, 8*NBYTES |
248 | ADD dst, dst, 8*NBYTES | 248 | ADD dst, dst, 8*NBYTES |
249 | EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) | 249 | EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u) |
250 | EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) | 250 | EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u) |
251 | EXC( STORE t4, UNIT(-4)(dst), s_exc_p4u) | 251 | EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u) |
252 | EXC( STORE t7, UNIT(-3)(dst), s_exc_p3u) | 252 | EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u) |
253 | EXC( STORE t0, UNIT(-2)(dst), s_exc_p2u) | 253 | EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u) |
254 | EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) | 254 | EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u) |
255 | PREF( 0, 8*32(src) ) | 255 | PREF( 0, 8*32(src) ) |
256 | PREF( 1, 8*32(dst) ) | 256 | PREF( 1, 8*32(dst) ) |
257 | bne len, rem, 1b | 257 | bne len, rem, 1b |
@@ -260,41 +260,41 @@ EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) | |||
260 | /* | 260 | /* |
261 | * len == rem == the number of bytes left to copy < 8*NBYTES | 261 | * len == rem == the number of bytes left to copy < 8*NBYTES |
262 | */ | 262 | */ |
263 | cleanup_both_aligned: | 263 | .Lcleanup_both_aligned: |
264 | beqz len, done | 264 | beqz len, .Ldone |
265 | sltu t0, len, 4*NBYTES | 265 | sltu t0, len, 4*NBYTES |
266 | bnez t0, less_than_4units | 266 | bnez t0, .Lless_than_4units |
267 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 267 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
268 | /* | 268 | /* |
269 | * len >= 4*NBYTES | 269 | * len >= 4*NBYTES |
270 | */ | 270 | */ |
271 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 271 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
272 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 272 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
273 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 273 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
274 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 274 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
275 | SUB len, len, 4*NBYTES | 275 | SUB len, len, 4*NBYTES |
276 | ADD src, src, 4*NBYTES | 276 | ADD src, src, 4*NBYTES |
277 | R10KCBARRIER(0(ra)) | 277 | R10KCBARRIER(0(ra)) |
278 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | 278 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) |
279 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 279 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) |
280 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 280 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) |
281 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) | 281 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) |
282 | .set reorder /* DADDI_WAR */ | 282 | .set reorder /* DADDI_WAR */ |
283 | ADD dst, dst, 4*NBYTES | 283 | ADD dst, dst, 4*NBYTES |
284 | beqz len, done | 284 | beqz len, .Ldone |
285 | .set noreorder | 285 | .set noreorder |
286 | less_than_4units: | 286 | .Lless_than_4units: |
287 | /* | 287 | /* |
288 | * rem = len % NBYTES | 288 | * rem = len % NBYTES |
289 | */ | 289 | */ |
290 | beq rem, len, copy_bytes | 290 | beq rem, len, .Lcopy_bytes |
291 | nop | 291 | nop |
292 | 1: | 292 | 1: |
293 | R10KCBARRIER(0(ra)) | 293 | R10KCBARRIER(0(ra)) |
294 | EXC( LOAD t0, 0(src), l_exc) | 294 | EXC( LOAD t0, 0(src), .Ll_exc) |
295 | ADD src, src, NBYTES | 295 | ADD src, src, NBYTES |
296 | SUB len, len, NBYTES | 296 | SUB len, len, NBYTES |
297 | EXC( STORE t0, 0(dst), s_exc_p1u) | 297 | EXC( STORE t0, 0(dst), .Ls_exc_p1u) |
298 | .set reorder /* DADDI_WAR */ | 298 | .set reorder /* DADDI_WAR */ |
299 | ADD dst, dst, NBYTES | 299 | ADD dst, dst, NBYTES |
300 | bne rem, len, 1b | 300 | bne rem, len, 1b |
@@ -312,17 +312,17 @@ EXC( STORE t0, 0(dst), s_exc_p1u) | |||
312 | * more instruction-level parallelism. | 312 | * more instruction-level parallelism. |
313 | */ | 313 | */ |
314 | #define bits t2 | 314 | #define bits t2 |
315 | beqz len, done | 315 | beqz len, .Ldone |
316 | ADD t1, dst, len # t1 is just past last byte of dst | 316 | ADD t1, dst, len # t1 is just past last byte of dst |
317 | li bits, 8*NBYTES | 317 | li bits, 8*NBYTES |
318 | SLL rem, len, 3 # rem = number of bits to keep | 318 | SLL rem, len, 3 # rem = number of bits to keep |
319 | EXC( LOAD t0, 0(src), l_exc) | 319 | EXC( LOAD t0, 0(src), .Ll_exc) |
320 | SUB bits, bits, rem # bits = number of bits to discard | 320 | SUB bits, bits, rem # bits = number of bits to discard |
321 | SHIFT_DISCARD t0, t0, bits | 321 | SHIFT_DISCARD t0, t0, bits |
322 | EXC( STREST t0, -1(t1), s_exc) | 322 | EXC( STREST t0, -1(t1), .Ls_exc) |
323 | jr ra | 323 | jr ra |
324 | move len, zero | 324 | move len, zero |
325 | dst_unaligned: | 325 | .Ldst_unaligned: |
326 | /* | 326 | /* |
327 | * dst is unaligned | 327 | * dst is unaligned |
328 | * t0 = src & ADDRMASK | 328 | * t0 = src & ADDRMASK |
@@ -333,23 +333,23 @@ dst_unaligned: | |||
333 | * Set match = (src and dst have same alignment) | 333 | * Set match = (src and dst have same alignment) |
334 | */ | 334 | */ |
335 | #define match rem | 335 | #define match rem |
336 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 336 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
337 | ADD t2, zero, NBYTES | 337 | ADD t2, zero, NBYTES |
338 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 338 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
339 | SUB t2, t2, t1 # t2 = number of bytes copied | 339 | SUB t2, t2, t1 # t2 = number of bytes copied |
340 | xor match, t0, t1 | 340 | xor match, t0, t1 |
341 | R10KCBARRIER(0(ra)) | 341 | R10KCBARRIER(0(ra)) |
342 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) | 342 | EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) |
343 | beq len, t2, done | 343 | beq len, t2, .Ldone |
344 | SUB len, len, t2 | 344 | SUB len, len, t2 |
345 | ADD dst, dst, t2 | 345 | ADD dst, dst, t2 |
346 | beqz match, both_aligned | 346 | beqz match, .Lboth_aligned |
347 | ADD src, src, t2 | 347 | ADD src, src, t2 |
348 | 348 | ||
349 | src_unaligned_dst_aligned: | 349 | .Lsrc_unaligned_dst_aligned: |
350 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 350 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
351 | PREF( 0, 3*32(src) ) | 351 | PREF( 0, 3*32(src) ) |
352 | beqz t0, cleanup_src_unaligned | 352 | beqz t0, .Lcleanup_src_unaligned |
353 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 353 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
354 | PREF( 1, 3*32(dst) ) | 354 | PREF( 1, 3*32(dst) ) |
355 | 1: | 355 | 1: |
@@ -360,58 +360,58 @@ src_unaligned_dst_aligned: | |||
360 | * are to the same unit (unless src is aligned, but it's not). | 360 | * are to the same unit (unless src is aligned, but it's not). |
361 | */ | 361 | */ |
362 | R10KCBARRIER(0(ra)) | 362 | R10KCBARRIER(0(ra)) |
363 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 363 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
364 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 364 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
365 | SUB len, len, 4*NBYTES | 365 | SUB len, len, 4*NBYTES |
366 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 366 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
367 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 367 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
368 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 368 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
369 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 369 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
370 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 370 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
371 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 371 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
372 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 372 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
373 | ADD src, src, 4*NBYTES | 373 | ADD src, src, 4*NBYTES |
374 | #ifdef CONFIG_CPU_SB1 | 374 | #ifdef CONFIG_CPU_SB1 |
375 | nop # improves slotting | 375 | nop # improves slotting |
376 | #endif | 376 | #endif |
377 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | 377 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) |
378 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 378 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) |
379 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 379 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) |
380 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) | 380 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) |
381 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 381 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
382 | .set reorder /* DADDI_WAR */ | 382 | .set reorder /* DADDI_WAR */ |
383 | ADD dst, dst, 4*NBYTES | 383 | ADD dst, dst, 4*NBYTES |
384 | bne len, rem, 1b | 384 | bne len, rem, 1b |
385 | .set noreorder | 385 | .set noreorder |
386 | 386 | ||
387 | cleanup_src_unaligned: | 387 | .Lcleanup_src_unaligned: |
388 | beqz len, done | 388 | beqz len, .Ldone |
389 | and rem, len, NBYTES-1 # rem = len % NBYTES | 389 | and rem, len, NBYTES-1 # rem = len % NBYTES |
390 | beq rem, len, copy_bytes | 390 | beq rem, len, .Lcopy_bytes |
391 | nop | 391 | nop |
392 | 1: | 392 | 1: |
393 | R10KCBARRIER(0(ra)) | 393 | R10KCBARRIER(0(ra)) |
394 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 394 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
395 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 395 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
396 | ADD src, src, NBYTES | 396 | ADD src, src, NBYTES |
397 | SUB len, len, NBYTES | 397 | SUB len, len, NBYTES |
398 | EXC( STORE t0, 0(dst), s_exc_p1u) | 398 | EXC( STORE t0, 0(dst), .Ls_exc_p1u) |
399 | .set reorder /* DADDI_WAR */ | 399 | .set reorder /* DADDI_WAR */ |
400 | ADD dst, dst, NBYTES | 400 | ADD dst, dst, NBYTES |
401 | bne len, rem, 1b | 401 | bne len, rem, 1b |
402 | .set noreorder | 402 | .set noreorder |
403 | 403 | ||
404 | copy_bytes_checklen: | 404 | .Lcopy_bytes_checklen: |
405 | beqz len, done | 405 | beqz len, .Ldone |
406 | nop | 406 | nop |
407 | copy_bytes: | 407 | .Lcopy_bytes: |
408 | /* 0 < len < NBYTES */ | 408 | /* 0 < len < NBYTES */ |
409 | R10KCBARRIER(0(ra)) | 409 | R10KCBARRIER(0(ra)) |
410 | #define COPY_BYTE(N) \ | 410 | #define COPY_BYTE(N) \ |
411 | EXC( lb t0, N(src), l_exc); \ | 411 | EXC( lb t0, N(src), .Ll_exc); \ |
412 | SUB len, len, 1; \ | 412 | SUB len, len, 1; \ |
413 | beqz len, done; \ | 413 | beqz len, .Ldone; \ |
414 | EXC( sb t0, N(dst), s_exc_p1) | 414 | EXC( sb t0, N(dst), .Ls_exc_p1) |
415 | 415 | ||
416 | COPY_BYTE(0) | 416 | COPY_BYTE(0) |
417 | COPY_BYTE(1) | 417 | COPY_BYTE(1) |
@@ -421,16 +421,16 @@ EXC( sb t0, N(dst), s_exc_p1) | |||
421 | COPY_BYTE(4) | 421 | COPY_BYTE(4) |
422 | COPY_BYTE(5) | 422 | COPY_BYTE(5) |
423 | #endif | 423 | #endif |
424 | EXC( lb t0, NBYTES-2(src), l_exc) | 424 | EXC( lb t0, NBYTES-2(src), .Ll_exc) |
425 | SUB len, len, 1 | 425 | SUB len, len, 1 |
426 | jr ra | 426 | jr ra |
427 | EXC( sb t0, NBYTES-2(dst), s_exc_p1) | 427 | EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1) |
428 | done: | 428 | .Ldone: |
429 | jr ra | 429 | jr ra |
430 | nop | 430 | nop |
431 | END(memcpy) | 431 | END(memcpy) |
432 | 432 | ||
433 | l_exc_copy: | 433 | .Ll_exc_copy: |
434 | /* | 434 | /* |
435 | * Copy bytes from src until faulting load address (or until a | 435 | * Copy bytes from src until faulting load address (or until a |
436 | * lb faults) | 436 | * lb faults) |
@@ -445,14 +445,14 @@ l_exc_copy: | |||
445 | nop | 445 | nop |
446 | LOAD t0, THREAD_BUADDR(t0) | 446 | LOAD t0, THREAD_BUADDR(t0) |
447 | 1: | 447 | 1: |
448 | EXC( lb t1, 0(src), l_exc) | 448 | EXC( lb t1, 0(src), .Ll_exc) |
449 | ADD src, src, 1 | 449 | ADD src, src, 1 |
450 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 450 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
451 | .set reorder /* DADDI_WAR */ | 451 | .set reorder /* DADDI_WAR */ |
452 | ADD dst, dst, 1 | 452 | ADD dst, dst, 1 |
453 | bne src, t0, 1b | 453 | bne src, t0, 1b |
454 | .set noreorder | 454 | .set noreorder |
455 | l_exc: | 455 | .Ll_exc: |
456 | LOAD t0, TI_TASK($28) | 456 | LOAD t0, TI_TASK($28) |
457 | nop | 457 | nop |
458 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 458 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
@@ -471,7 +471,7 @@ l_exc: | |||
471 | */ | 471 | */ |
472 | .set reorder /* DADDI_WAR */ | 472 | .set reorder /* DADDI_WAR */ |
473 | SUB src, len, 1 | 473 | SUB src, len, 1 |
474 | beqz len, done | 474 | beqz len, .Ldone |
475 | .set noreorder | 475 | .set noreorder |
476 | 1: sb zero, 0(dst) | 476 | 1: sb zero, 0(dst) |
477 | ADD dst, dst, 1 | 477 | ADD dst, dst, 1 |
@@ -492,7 +492,7 @@ l_exc: | |||
492 | 492 | ||
493 | #define SEXC(n) \ | 493 | #define SEXC(n) \ |
494 | .set reorder; /* DADDI_WAR */ \ | 494 | .set reorder; /* DADDI_WAR */ \ |
495 | s_exc_p ## n ## u: \ | 495 | .Ls_exc_p ## n ## u: \ |
496 | ADD len, len, n*NBYTES; \ | 496 | ADD len, len, n*NBYTES; \ |
497 | jr ra; \ | 497 | jr ra; \ |
498 | .set noreorder | 498 | .set noreorder |
@@ -506,12 +506,12 @@ SEXC(3) | |||
506 | SEXC(2) | 506 | SEXC(2) |
507 | SEXC(1) | 507 | SEXC(1) |
508 | 508 | ||
509 | s_exc_p1: | 509 | .Ls_exc_p1: |
510 | .set reorder /* DADDI_WAR */ | 510 | .set reorder /* DADDI_WAR */ |
511 | ADD len, len, 1 | 511 | ADD len, len, 1 |
512 | jr ra | 512 | jr ra |
513 | .set noreorder | 513 | .set noreorder |
514 | s_exc: | 514 | .Ls_exc: |
515 | jr ra | 515 | jr ra |
516 | nop | 516 | nop |
517 | 517 | ||
@@ -522,20 +522,20 @@ LEAF(memmove) | |||
522 | sltu t0, a1, t0 # dst + len <= src -> memcpy | 522 | sltu t0, a1, t0 # dst + len <= src -> memcpy |
523 | sltu t1, a0, t1 # dst >= src + len -> memcpy | 523 | sltu t1, a0, t1 # dst >= src + len -> memcpy |
524 | and t0, t1 | 524 | and t0, t1 |
525 | beqz t0, __memcpy | 525 | beqz t0, .L__memcpy |
526 | move v0, a0 /* return value */ | 526 | move v0, a0 /* return value */ |
527 | beqz a2, r_out | 527 | beqz a2, .Lr_out |
528 | END(memmove) | 528 | END(memmove) |
529 | 529 | ||
530 | /* fall through to __rmemcpy */ | 530 | /* fall through to __rmemcpy */ |
531 | LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ | 531 | LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ |
532 | sltu t0, a1, a0 | 532 | sltu t0, a1, a0 |
533 | beqz t0, r_end_bytes_up # src >= dst | 533 | beqz t0, .Lr_end_bytes_up # src >= dst |
534 | nop | 534 | nop |
535 | ADD a0, a2 # dst = dst + len | 535 | ADD a0, a2 # dst = dst + len |
536 | ADD a1, a2 # src = src + len | 536 | ADD a1, a2 # src = src + len |
537 | 537 | ||
538 | r_end_bytes: | 538 | .Lr_end_bytes: |
539 | R10KCBARRIER(0(ra)) | 539 | R10KCBARRIER(0(ra)) |
540 | lb t0, -1(a1) | 540 | lb t0, -1(a1) |
541 | SUB a2, a2, 0x1 | 541 | SUB a2, a2, 0x1 |
@@ -543,14 +543,14 @@ r_end_bytes: | |||
543 | SUB a1, a1, 0x1 | 543 | SUB a1, a1, 0x1 |
544 | .set reorder /* DADDI_WAR */ | 544 | .set reorder /* DADDI_WAR */ |
545 | SUB a0, a0, 0x1 | 545 | SUB a0, a0, 0x1 |
546 | bnez a2, r_end_bytes | 546 | bnez a2, .Lr_end_bytes |
547 | .set noreorder | 547 | .set noreorder |
548 | 548 | ||
549 | r_out: | 549 | .Lr_out: |
550 | jr ra | 550 | jr ra |
551 | move a2, zero | 551 | move a2, zero |
552 | 552 | ||
553 | r_end_bytes_up: | 553 | .Lr_end_bytes_up: |
554 | R10KCBARRIER(0(ra)) | 554 | R10KCBARRIER(0(ra)) |
555 | lb t0, (a1) | 555 | lb t0, (a1) |
556 | SUB a2, a2, 0x1 | 556 | SUB a2, a2, 0x1 |
@@ -558,7 +558,7 @@ r_end_bytes_up: | |||
558 | ADD a1, a1, 0x1 | 558 | ADD a1, a1, 0x1 |
559 | .set reorder /* DADDI_WAR */ | 559 | .set reorder /* DADDI_WAR */ |
560 | ADD a0, a0, 0x1 | 560 | ADD a0, a0, 0x1 |
561 | bnez a2, r_end_bytes_up | 561 | bnez a2, .Lr_end_bytes_up |
562 | .set noreorder | 562 | .set noreorder |
563 | 563 | ||
564 | jr ra | 564 | jr ra |
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index c018a4721693..77dc3b20110a 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S | |||
@@ -72,7 +72,7 @@ LEAF(memset) | |||
72 | 72 | ||
73 | FEXPORT(__bzero) | 73 | FEXPORT(__bzero) |
74 | sltiu t0, a2, LONGSIZE /* very small region? */ | 74 | sltiu t0, a2, LONGSIZE /* very small region? */ |
75 | bnez t0, small_memset | 75 | bnez t0, .Lsmall_memset |
76 | andi t0, a0, LONGMASK /* aligned? */ | 76 | andi t0, a0, LONGMASK /* aligned? */ |
77 | 77 | ||
78 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | 78 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS |
@@ -88,28 +88,28 @@ FEXPORT(__bzero) | |||
88 | 88 | ||
89 | R10KCBARRIER(0(ra)) | 89 | R10KCBARRIER(0(ra)) |
90 | #ifdef __MIPSEB__ | 90 | #ifdef __MIPSEB__ |
91 | EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ | 91 | EX(LONG_S_L, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */ |
92 | #endif | 92 | #endif |
93 | #ifdef __MIPSEL__ | 93 | #ifdef __MIPSEL__ |
94 | EX(LONG_S_R, a1, (a0), first_fixup) /* make word/dword aligned */ | 94 | EX(LONG_S_R, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */ |
95 | #endif | 95 | #endif |
96 | PTR_SUBU a0, t0 /* long align ptr */ | 96 | PTR_SUBU a0, t0 /* long align ptr */ |
97 | PTR_ADDU a2, t0 /* correct size */ | 97 | PTR_ADDU a2, t0 /* correct size */ |
98 | 98 | ||
99 | 1: ori t1, a2, 0x3f /* # of full blocks */ | 99 | 1: ori t1, a2, 0x3f /* # of full blocks */ |
100 | xori t1, 0x3f | 100 | xori t1, 0x3f |
101 | beqz t1, memset_partial /* no block to fill */ | 101 | beqz t1, .Lmemset_partial /* no block to fill */ |
102 | andi t0, a2, 0x40-LONGSIZE | 102 | andi t0, a2, 0x40-LONGSIZE |
103 | 103 | ||
104 | PTR_ADDU t1, a0 /* end address */ | 104 | PTR_ADDU t1, a0 /* end address */ |
105 | .set reorder | 105 | .set reorder |
106 | 1: PTR_ADDIU a0, 64 | 106 | 1: PTR_ADDIU a0, 64 |
107 | R10KCBARRIER(0(ra)) | 107 | R10KCBARRIER(0(ra)) |
108 | f_fill64 a0, -64, a1, fwd_fixup | 108 | f_fill64 a0, -64, a1, .Lfwd_fixup |
109 | bne t1, a0, 1b | 109 | bne t1, a0, 1b |
110 | .set noreorder | 110 | .set noreorder |
111 | 111 | ||
112 | memset_partial: | 112 | .Lmemset_partial: |
113 | R10KCBARRIER(0(ra)) | 113 | R10KCBARRIER(0(ra)) |
114 | PTR_LA t1, 2f /* where to start */ | 114 | PTR_LA t1, 2f /* where to start */ |
115 | #if LONGSIZE == 4 | 115 | #if LONGSIZE == 4 |
@@ -126,7 +126,7 @@ memset_partial: | |||
126 | .set push | 126 | .set push |
127 | .set noreorder | 127 | .set noreorder |
128 | .set nomacro | 128 | .set nomacro |
129 | f_fill64 a0, -64, a1, partial_fixup /* ... but first do longs ... */ | 129 | f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */ |
130 | 2: .set pop | 130 | 2: .set pop |
131 | andi a2, LONGMASK /* At most one long to go */ | 131 | andi a2, LONGMASK /* At most one long to go */ |
132 | 132 | ||
@@ -134,15 +134,15 @@ memset_partial: | |||
134 | PTR_ADDU a0, a2 /* What's left */ | 134 | PTR_ADDU a0, a2 /* What's left */ |
135 | R10KCBARRIER(0(ra)) | 135 | R10KCBARRIER(0(ra)) |
136 | #ifdef __MIPSEB__ | 136 | #ifdef __MIPSEB__ |
137 | EX(LONG_S_R, a1, -1(a0), last_fixup) | 137 | EX(LONG_S_R, a1, -1(a0), .Llast_fixup) |
138 | #endif | 138 | #endif |
139 | #ifdef __MIPSEL__ | 139 | #ifdef __MIPSEL__ |
140 | EX(LONG_S_L, a1, -1(a0), last_fixup) | 140 | EX(LONG_S_L, a1, -1(a0), .Llast_fixup) |
141 | #endif | 141 | #endif |
142 | 1: jr ra | 142 | 1: jr ra |
143 | move a2, zero | 143 | move a2, zero |
144 | 144 | ||
145 | small_memset: | 145 | .Lsmall_memset: |
146 | beqz a2, 2f | 146 | beqz a2, 2f |
147 | PTR_ADDU t1, a0, a2 | 147 | PTR_ADDU t1, a0, a2 |
148 | 148 | ||
@@ -155,11 +155,11 @@ small_memset: | |||
155 | move a2, zero | 155 | move a2, zero |
156 | END(memset) | 156 | END(memset) |
157 | 157 | ||
158 | first_fixup: | 158 | .Lfirst_fixup: |
159 | jr ra | 159 | jr ra |
160 | nop | 160 | nop |
161 | 161 | ||
162 | fwd_fixup: | 162 | .Lfwd_fixup: |
163 | PTR_L t0, TI_TASK($28) | 163 | PTR_L t0, TI_TASK($28) |
164 | LONG_L t0, THREAD_BUADDR(t0) | 164 | LONG_L t0, THREAD_BUADDR(t0) |
165 | andi a2, 0x3f | 165 | andi a2, 0x3f |
@@ -167,7 +167,7 @@ fwd_fixup: | |||
167 | jr ra | 167 | jr ra |
168 | LONG_SUBU a2, t0 | 168 | LONG_SUBU a2, t0 |
169 | 169 | ||
170 | partial_fixup: | 170 | .Lpartial_fixup: |
171 | PTR_L t0, TI_TASK($28) | 171 | PTR_L t0, TI_TASK($28) |
172 | LONG_L t0, THREAD_BUADDR(t0) | 172 | LONG_L t0, THREAD_BUADDR(t0) |
173 | andi a2, LONGMASK | 173 | andi a2, LONGMASK |
@@ -175,6 +175,6 @@ partial_fixup: | |||
175 | jr ra | 175 | jr ra |
176 | LONG_SUBU a2, t0 | 176 | LONG_SUBU a2, t0 |
177 | 177 | ||
178 | last_fixup: | 178 | .Llast_fixup: |
179 | jr ra | 179 | jr ra |
180 | andi v1, a2, LONGMASK | 180 | andi v1, a2, LONGMASK |
diff --git a/arch/mips/lib/strlen_user.S b/arch/mips/lib/strlen_user.S index eca558d83a37..fdbb970f670d 100644 --- a/arch/mips/lib/strlen_user.S +++ b/arch/mips/lib/strlen_user.S | |||
@@ -24,16 +24,16 @@ | |||
24 | LEAF(__strlen_user_asm) | 24 | LEAF(__strlen_user_asm) |
25 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? | 25 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? |
26 | and v0, a0 | 26 | and v0, a0 |
27 | bnez v0, fault | 27 | bnez v0, .Lfault |
28 | 28 | ||
29 | FEXPORT(__strlen_user_nocheck_asm) | 29 | FEXPORT(__strlen_user_nocheck_asm) |
30 | move v0, a0 | 30 | move v0, a0 |
31 | 1: EX(lb, t0, (v0), fault) | 31 | 1: EX(lb, t0, (v0), .Lfault) |
32 | PTR_ADDIU v0, 1 | 32 | PTR_ADDIU v0, 1 |
33 | bnez t0, 1b | 33 | bnez t0, 1b |
34 | PTR_SUBU v0, a0 | 34 | PTR_SUBU v0, a0 |
35 | jr ra | 35 | jr ra |
36 | END(__strlen_user_asm) | 36 | END(__strlen_user_asm) |
37 | 37 | ||
38 | fault: move v0, zero | 38 | .Lfault: move v0, zero |
39 | jr ra | 39 | jr ra |
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index 8a63f72b81d3..7201b2ff08c8 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S | |||
@@ -30,13 +30,13 @@ | |||
30 | LEAF(__strncpy_from_user_asm) | 30 | LEAF(__strncpy_from_user_asm) |
31 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? | 31 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? |
32 | and v0, a1 | 32 | and v0, a1 |
33 | bnez v0, fault | 33 | bnez v0, .Lfault |
34 | 34 | ||
35 | FEXPORT(__strncpy_from_user_nocheck_asm) | 35 | FEXPORT(__strncpy_from_user_nocheck_asm) |
36 | move v0, zero | 36 | move v0, zero |
37 | move v1, a1 | 37 | move v1, a1 |
38 | .set noreorder | 38 | .set noreorder |
39 | 1: EX(lbu, t0, (v1), fault) | 39 | 1: EX(lbu, t0, (v1), .Lfault) |
40 | PTR_ADDIU v1, 1 | 40 | PTR_ADDIU v1, 1 |
41 | R10KCBARRIER(0(ra)) | 41 | R10KCBARRIER(0(ra)) |
42 | beqz t0, 2f | 42 | beqz t0, 2f |
@@ -47,13 +47,13 @@ FEXPORT(__strncpy_from_user_nocheck_asm) | |||
47 | bne v0, a2, 1b | 47 | bne v0, a2, 1b |
48 | 2: PTR_ADDU t0, a1, v0 | 48 | 2: PTR_ADDU t0, a1, v0 |
49 | xor t0, a1 | 49 | xor t0, a1 |
50 | bltz t0, fault | 50 | bltz t0, .Lfault |
51 | jr ra # return n | 51 | jr ra # return n |
52 | END(__strncpy_from_user_asm) | 52 | END(__strncpy_from_user_asm) |
53 | 53 | ||
54 | fault: li v0, -EFAULT | 54 | .Lfault: li v0, -EFAULT |
55 | jr ra | 55 | jr ra |
56 | 56 | ||
57 | .section __ex_table,"a" | 57 | .section __ex_table,"a" |
58 | PTR 1b, fault | 58 | PTR 1b, .Lfault |
59 | .previous | 59 | .previous |
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index c0ea15194a0e..c768e3000616 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S | |||
@@ -28,18 +28,19 @@ | |||
28 | LEAF(__strnlen_user_asm) | 28 | LEAF(__strnlen_user_asm) |
29 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? | 29 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? |
30 | and v0, a0 | 30 | and v0, a0 |
31 | bnez v0, fault | 31 | bnez v0, .Lfault |
32 | 32 | ||
33 | FEXPORT(__strnlen_user_nocheck_asm) | 33 | FEXPORT(__strnlen_user_nocheck_asm) |
34 | move v0, a0 | 34 | move v0, a0 |
35 | PTR_ADDU a1, a0 # stop pointer | 35 | PTR_ADDU a1, a0 # stop pointer |
36 | 1: beq v0, a1, 1f # limit reached? | 36 | 1: beq v0, a1, 1f # limit reached? |
37 | EX(lb, t0, (v0), fault) | 37 | EX(lb, t0, (v0), .Lfault) |
38 | PTR_ADDU v0, 1 | 38 | PTR_ADDU v0, 1 |
39 | bnez t0, 1b | 39 | bnez t0, 1b |
40 | 1: PTR_SUBU v0, a0 | 40 | 1: PTR_SUBU v0, a0 |
41 | jr ra | 41 | jr ra |
42 | END(__strnlen_user_asm) | 42 | END(__strnlen_user_asm) |
43 | 43 | ||
44 | fault: move v0, zero | 44 | .Lfault: |
45 | move v0, zero | ||
45 | jr ra | 46 | jr ra |