diff options
author | Ralf Baechle <ralf@linux-mips.org> | 2008-01-29 05:14:59 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2008-01-29 05:14:59 -0500 |
commit | c5ec1983e45d25446a023e98207e30ab1bf2311a (patch) | |
tree | 68124f5166dc284d7d1c6280f4642a6719a6c554 /arch/mips/lib | |
parent | f21d8508080f05a8b05c99fce57cd76ac43666fa (diff) |
[MIPS] Eleminate local symbols from the symbol table.
These symbols appear in oprofile output, stacktraces and similar but only
make the output harder to read. Many identical symbol names such as
"both_aligned" were also being used in multiple source files making it
impossible to see which file actually was meant. So let's get rid of them.
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 214 | ||||
-rw-r--r-- | arch/mips/lib/memcpy-inatomic.S | 116 | ||||
-rw-r--r-- | arch/mips/lib/memcpy.S | 182 | ||||
-rw-r--r-- | arch/mips/lib/memset.S | 28 | ||||
-rw-r--r-- | arch/mips/lib/strlen_user.S | 6 | ||||
-rw-r--r-- | arch/mips/lib/strncpy_user.S | 10 | ||||
-rw-r--r-- | arch/mips/lib/strnlen_user.S | 7 |
7 files changed, 282 insertions, 281 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 957a82484e3e..8d7784122c14 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -96,13 +96,13 @@ LEAF(csum_partial) | |||
96 | move t7, zero | 96 | move t7, zero |
97 | 97 | ||
98 | sltiu t8, a1, 0x8 | 98 | sltiu t8, a1, 0x8 |
99 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | 99 | bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */ |
100 | move t2, a1 | 100 | move t2, a1 |
101 | 101 | ||
102 | andi t7, src, 0x1 /* odd buffer? */ | 102 | andi t7, src, 0x1 /* odd buffer? */ |
103 | 103 | ||
104 | hword_align: | 104 | .Lhword_align: |
105 | beqz t7, word_align | 105 | beqz t7, .Lword_align |
106 | andi t8, src, 0x2 | 106 | andi t8, src, 0x2 |
107 | 107 | ||
108 | lbu t0, (src) | 108 | lbu t0, (src) |
@@ -114,8 +114,8 @@ hword_align: | |||
114 | PTR_ADDU src, src, 0x1 | 114 | PTR_ADDU src, src, 0x1 |
115 | andi t8, src, 0x2 | 115 | andi t8, src, 0x2 |
116 | 116 | ||
117 | word_align: | 117 | .Lword_align: |
118 | beqz t8, dword_align | 118 | beqz t8, .Ldword_align |
119 | sltiu t8, a1, 56 | 119 | sltiu t8, a1, 56 |
120 | 120 | ||
121 | lhu t0, (src) | 121 | lhu t0, (src) |
@@ -124,12 +124,12 @@ word_align: | |||
124 | sltiu t8, a1, 56 | 124 | sltiu t8, a1, 56 |
125 | PTR_ADDU src, src, 0x2 | 125 | PTR_ADDU src, src, 0x2 |
126 | 126 | ||
127 | dword_align: | 127 | .Ldword_align: |
128 | bnez t8, do_end_words | 128 | bnez t8, .Ldo_end_words |
129 | move t8, a1 | 129 | move t8, a1 |
130 | 130 | ||
131 | andi t8, src, 0x4 | 131 | andi t8, src, 0x4 |
132 | beqz t8, qword_align | 132 | beqz t8, .Lqword_align |
133 | andi t8, src, 0x8 | 133 | andi t8, src, 0x8 |
134 | 134 | ||
135 | lw t0, 0x00(src) | 135 | lw t0, 0x00(src) |
@@ -138,8 +138,8 @@ dword_align: | |||
138 | PTR_ADDU src, src, 0x4 | 138 | PTR_ADDU src, src, 0x4 |
139 | andi t8, src, 0x8 | 139 | andi t8, src, 0x8 |
140 | 140 | ||
141 | qword_align: | 141 | .Lqword_align: |
142 | beqz t8, oword_align | 142 | beqz t8, .Loword_align |
143 | andi t8, src, 0x10 | 143 | andi t8, src, 0x10 |
144 | 144 | ||
145 | #ifdef USE_DOUBLE | 145 | #ifdef USE_DOUBLE |
@@ -156,8 +156,8 @@ qword_align: | |||
156 | PTR_ADDU src, src, 0x8 | 156 | PTR_ADDU src, src, 0x8 |
157 | andi t8, src, 0x10 | 157 | andi t8, src, 0x10 |
158 | 158 | ||
159 | oword_align: | 159 | .Loword_align: |
160 | beqz t8, begin_movement | 160 | beqz t8, .Lbegin_movement |
161 | LONG_SRL t8, a1, 0x7 | 161 | LONG_SRL t8, a1, 0x7 |
162 | 162 | ||
163 | #ifdef USE_DOUBLE | 163 | #ifdef USE_DOUBLE |
@@ -172,11 +172,11 @@ oword_align: | |||
172 | PTR_ADDU src, src, 0x10 | 172 | PTR_ADDU src, src, 0x10 |
173 | LONG_SRL t8, a1, 0x7 | 173 | LONG_SRL t8, a1, 0x7 |
174 | 174 | ||
175 | begin_movement: | 175 | .Lbegin_movement: |
176 | beqz t8, 1f | 176 | beqz t8, 1f |
177 | andi t2, a1, 0x40 | 177 | andi t2, a1, 0x40 |
178 | 178 | ||
179 | move_128bytes: | 179 | .Lmove_128bytes: |
180 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 180 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
181 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | 181 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) |
182 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) | 182 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) |
@@ -184,43 +184,43 @@ move_128bytes: | |||
184 | LONG_SUBU t8, t8, 0x01 | 184 | LONG_SUBU t8, t8, 0x01 |
185 | .set reorder /* DADDI_WAR */ | 185 | .set reorder /* DADDI_WAR */ |
186 | PTR_ADDU src, src, 0x80 | 186 | PTR_ADDU src, src, 0x80 |
187 | bnez t8, move_128bytes | 187 | bnez t8, .Lmove_128bytes |
188 | .set noreorder | 188 | .set noreorder |
189 | 189 | ||
190 | 1: | 190 | 1: |
191 | beqz t2, 1f | 191 | beqz t2, 1f |
192 | andi t2, a1, 0x20 | 192 | andi t2, a1, 0x20 |
193 | 193 | ||
194 | move_64bytes: | 194 | .Lmove_64bytes: |
195 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 195 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
196 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | 196 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) |
197 | PTR_ADDU src, src, 0x40 | 197 | PTR_ADDU src, src, 0x40 |
198 | 198 | ||
199 | 1: | 199 | 1: |
200 | beqz t2, do_end_words | 200 | beqz t2, .Ldo_end_words |
201 | andi t8, a1, 0x1c | 201 | andi t8, a1, 0x1c |
202 | 202 | ||
203 | move_32bytes: | 203 | .Lmove_32bytes: |
204 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 204 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
205 | andi t8, a1, 0x1c | 205 | andi t8, a1, 0x1c |
206 | PTR_ADDU src, src, 0x20 | 206 | PTR_ADDU src, src, 0x20 |
207 | 207 | ||
208 | do_end_words: | 208 | .Ldo_end_words: |
209 | beqz t8, small_csumcpy | 209 | beqz t8, .Lsmall_csumcpy |
210 | andi t2, a1, 0x3 | 210 | andi t2, a1, 0x3 |
211 | LONG_SRL t8, t8, 0x2 | 211 | LONG_SRL t8, t8, 0x2 |
212 | 212 | ||
213 | end_words: | 213 | .Lend_words: |
214 | lw t0, (src) | 214 | lw t0, (src) |
215 | LONG_SUBU t8, t8, 0x1 | 215 | LONG_SUBU t8, t8, 0x1 |
216 | ADDC(sum, t0) | 216 | ADDC(sum, t0) |
217 | .set reorder /* DADDI_WAR */ | 217 | .set reorder /* DADDI_WAR */ |
218 | PTR_ADDU src, src, 0x4 | 218 | PTR_ADDU src, src, 0x4 |
219 | bnez t8, end_words | 219 | bnez t8, .Lend_words |
220 | .set noreorder | 220 | .set noreorder |
221 | 221 | ||
222 | /* unknown src alignment and < 8 bytes to go */ | 222 | /* unknown src alignment and < 8 bytes to go */ |
223 | small_csumcpy: | 223 | .Lsmall_csumcpy: |
224 | move a1, t2 | 224 | move a1, t2 |
225 | 225 | ||
226 | andi t0, a1, 4 | 226 | andi t0, a1, 4 |
@@ -413,48 +413,48 @@ FEXPORT(csum_partial_copy_nocheck) | |||
413 | */ | 413 | */ |
414 | sltu t2, len, NBYTES | 414 | sltu t2, len, NBYTES |
415 | and t1, dst, ADDRMASK | 415 | and t1, dst, ADDRMASK |
416 | bnez t2, copy_bytes_checklen | 416 | bnez t2, .Lcopy_bytes_checklen |
417 | and t0, src, ADDRMASK | 417 | and t0, src, ADDRMASK |
418 | andi odd, dst, 0x1 /* odd buffer? */ | 418 | andi odd, dst, 0x1 /* odd buffer? */ |
419 | bnez t1, dst_unaligned | 419 | bnez t1, .Ldst_unaligned |
420 | nop | 420 | nop |
421 | bnez t0, src_unaligned_dst_aligned | 421 | bnez t0, .Lsrc_unaligned_dst_aligned |
422 | /* | 422 | /* |
423 | * use delay slot for fall-through | 423 | * use delay slot for fall-through |
424 | * src and dst are aligned; need to compute rem | 424 | * src and dst are aligned; need to compute rem |
425 | */ | 425 | */ |
426 | both_aligned: | 426 | .Lboth_aligned: |
427 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 427 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
428 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 428 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
429 | nop | 429 | nop |
430 | SUB len, 8*NBYTES # subtract here for bgez loop | 430 | SUB len, 8*NBYTES # subtract here for bgez loop |
431 | .align 4 | 431 | .align 4 |
432 | 1: | 432 | 1: |
433 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 433 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
434 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 434 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
435 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 435 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
436 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 436 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
437 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 437 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
438 | EXC( LOAD t5, UNIT(5)(src), l_exc_copy) | 438 | EXC( LOAD t5, UNIT(5)(src), .Ll_exc_copy) |
439 | EXC( LOAD t6, UNIT(6)(src), l_exc_copy) | 439 | EXC( LOAD t6, UNIT(6)(src), .Ll_exc_copy) |
440 | EXC( LOAD t7, UNIT(7)(src), l_exc_copy) | 440 | EXC( LOAD t7, UNIT(7)(src), .Ll_exc_copy) |
441 | SUB len, len, 8*NBYTES | 441 | SUB len, len, 8*NBYTES |
442 | ADD src, src, 8*NBYTES | 442 | ADD src, src, 8*NBYTES |
443 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 443 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
444 | ADDC(sum, t0) | 444 | ADDC(sum, t0) |
445 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 445 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
446 | ADDC(sum, t1) | 446 | ADDC(sum, t1) |
447 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 447 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
448 | ADDC(sum, t2) | 448 | ADDC(sum, t2) |
449 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 449 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
450 | ADDC(sum, t3) | 450 | ADDC(sum, t3) |
451 | EXC( STORE t4, UNIT(4)(dst), s_exc) | 451 | EXC( STORE t4, UNIT(4)(dst), .Ls_exc) |
452 | ADDC(sum, t4) | 452 | ADDC(sum, t4) |
453 | EXC( STORE t5, UNIT(5)(dst), s_exc) | 453 | EXC( STORE t5, UNIT(5)(dst), .Ls_exc) |
454 | ADDC(sum, t5) | 454 | ADDC(sum, t5) |
455 | EXC( STORE t6, UNIT(6)(dst), s_exc) | 455 | EXC( STORE t6, UNIT(6)(dst), .Ls_exc) |
456 | ADDC(sum, t6) | 456 | ADDC(sum, t6) |
457 | EXC( STORE t7, UNIT(7)(dst), s_exc) | 457 | EXC( STORE t7, UNIT(7)(dst), .Ls_exc) |
458 | ADDC(sum, t7) | 458 | ADDC(sum, t7) |
459 | .set reorder /* DADDI_WAR */ | 459 | .set reorder /* DADDI_WAR */ |
460 | ADD dst, dst, 8*NBYTES | 460 | ADD dst, dst, 8*NBYTES |
@@ -465,44 +465,44 @@ EXC( STORE t7, UNIT(7)(dst), s_exc) | |||
465 | /* | 465 | /* |
466 | * len == the number of bytes left to copy < 8*NBYTES | 466 | * len == the number of bytes left to copy < 8*NBYTES |
467 | */ | 467 | */ |
468 | cleanup_both_aligned: | 468 | .Lcleanup_both_aligned: |
469 | #define rem t7 | 469 | #define rem t7 |
470 | beqz len, done | 470 | beqz len, .Ldone |
471 | sltu t0, len, 4*NBYTES | 471 | sltu t0, len, 4*NBYTES |
472 | bnez t0, less_than_4units | 472 | bnez t0, .Lless_than_4units |
473 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 473 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
474 | /* | 474 | /* |
475 | * len >= 4*NBYTES | 475 | * len >= 4*NBYTES |
476 | */ | 476 | */ |
477 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 477 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
478 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 478 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
479 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 479 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
480 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 480 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
481 | SUB len, len, 4*NBYTES | 481 | SUB len, len, 4*NBYTES |
482 | ADD src, src, 4*NBYTES | 482 | ADD src, src, 4*NBYTES |
483 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 483 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
484 | ADDC(sum, t0) | 484 | ADDC(sum, t0) |
485 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 485 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
486 | ADDC(sum, t1) | 486 | ADDC(sum, t1) |
487 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 487 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
488 | ADDC(sum, t2) | 488 | ADDC(sum, t2) |
489 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 489 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
490 | ADDC(sum, t3) | 490 | ADDC(sum, t3) |
491 | .set reorder /* DADDI_WAR */ | 491 | .set reorder /* DADDI_WAR */ |
492 | ADD dst, dst, 4*NBYTES | 492 | ADD dst, dst, 4*NBYTES |
493 | beqz len, done | 493 | beqz len, .Ldone |
494 | .set noreorder | 494 | .set noreorder |
495 | less_than_4units: | 495 | .Lless_than_4units: |
496 | /* | 496 | /* |
497 | * rem = len % NBYTES | 497 | * rem = len % NBYTES |
498 | */ | 498 | */ |
499 | beq rem, len, copy_bytes | 499 | beq rem, len, .Lcopy_bytes |
500 | nop | 500 | nop |
501 | 1: | 501 | 1: |
502 | EXC( LOAD t0, 0(src), l_exc) | 502 | EXC( LOAD t0, 0(src), .Ll_exc) |
503 | ADD src, src, NBYTES | 503 | ADD src, src, NBYTES |
504 | SUB len, len, NBYTES | 504 | SUB len, len, NBYTES |
505 | EXC( STORE t0, 0(dst), s_exc) | 505 | EXC( STORE t0, 0(dst), .Ls_exc) |
506 | ADDC(sum, t0) | 506 | ADDC(sum, t0) |
507 | .set reorder /* DADDI_WAR */ | 507 | .set reorder /* DADDI_WAR */ |
508 | ADD dst, dst, NBYTES | 508 | ADD dst, dst, NBYTES |
@@ -521,20 +521,20 @@ EXC( STORE t0, 0(dst), s_exc) | |||
521 | * more instruction-level parallelism. | 521 | * more instruction-level parallelism. |
522 | */ | 522 | */ |
523 | #define bits t2 | 523 | #define bits t2 |
524 | beqz len, done | 524 | beqz len, .Ldone |
525 | ADD t1, dst, len # t1 is just past last byte of dst | 525 | ADD t1, dst, len # t1 is just past last byte of dst |
526 | li bits, 8*NBYTES | 526 | li bits, 8*NBYTES |
527 | SLL rem, len, 3 # rem = number of bits to keep | 527 | SLL rem, len, 3 # rem = number of bits to keep |
528 | EXC( LOAD t0, 0(src), l_exc) | 528 | EXC( LOAD t0, 0(src), .Ll_exc) |
529 | SUB bits, bits, rem # bits = number of bits to discard | 529 | SUB bits, bits, rem # bits = number of bits to discard |
530 | SHIFT_DISCARD t0, t0, bits | 530 | SHIFT_DISCARD t0, t0, bits |
531 | EXC( STREST t0, -1(t1), s_exc) | 531 | EXC( STREST t0, -1(t1), .Ls_exc) |
532 | SHIFT_DISCARD_REVERT t0, t0, bits | 532 | SHIFT_DISCARD_REVERT t0, t0, bits |
533 | .set reorder | 533 | .set reorder |
534 | ADDC(sum, t0) | 534 | ADDC(sum, t0) |
535 | b done | 535 | b .Ldone |
536 | .set noreorder | 536 | .set noreorder |
537 | dst_unaligned: | 537 | .Ldst_unaligned: |
538 | /* | 538 | /* |
539 | * dst is unaligned | 539 | * dst is unaligned |
540 | * t0 = src & ADDRMASK | 540 | * t0 = src & ADDRMASK |
@@ -545,25 +545,25 @@ dst_unaligned: | |||
545 | * Set match = (src and dst have same alignment) | 545 | * Set match = (src and dst have same alignment) |
546 | */ | 546 | */ |
547 | #define match rem | 547 | #define match rem |
548 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 548 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
549 | ADD t2, zero, NBYTES | 549 | ADD t2, zero, NBYTES |
550 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 550 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
551 | SUB t2, t2, t1 # t2 = number of bytes copied | 551 | SUB t2, t2, t1 # t2 = number of bytes copied |
552 | xor match, t0, t1 | 552 | xor match, t0, t1 |
553 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) | 553 | EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) |
554 | SLL t4, t1, 3 # t4 = number of bits to discard | 554 | SLL t4, t1, 3 # t4 = number of bits to discard |
555 | SHIFT_DISCARD t3, t3, t4 | 555 | SHIFT_DISCARD t3, t3, t4 |
556 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ | 556 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ |
557 | ADDC(sum, t3) | 557 | ADDC(sum, t3) |
558 | beq len, t2, done | 558 | beq len, t2, .Ldone |
559 | SUB len, len, t2 | 559 | SUB len, len, t2 |
560 | ADD dst, dst, t2 | 560 | ADD dst, dst, t2 |
561 | beqz match, both_aligned | 561 | beqz match, .Lboth_aligned |
562 | ADD src, src, t2 | 562 | ADD src, src, t2 |
563 | 563 | ||
564 | src_unaligned_dst_aligned: | 564 | .Lsrc_unaligned_dst_aligned: |
565 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 565 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
566 | beqz t0, cleanup_src_unaligned | 566 | beqz t0, .Lcleanup_src_unaligned |
567 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 567 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
568 | 1: | 568 | 1: |
569 | /* | 569 | /* |
@@ -572,53 +572,53 @@ src_unaligned_dst_aligned: | |||
572 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 572 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
573 | * are to the same unit (unless src is aligned, but it's not). | 573 | * are to the same unit (unless src is aligned, but it's not). |
574 | */ | 574 | */ |
575 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 575 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
576 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 576 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
577 | SUB len, len, 4*NBYTES | 577 | SUB len, len, 4*NBYTES |
578 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 578 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
579 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 579 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
580 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 580 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
581 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 581 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
582 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 582 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
583 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 583 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
584 | ADD src, src, 4*NBYTES | 584 | ADD src, src, 4*NBYTES |
585 | #ifdef CONFIG_CPU_SB1 | 585 | #ifdef CONFIG_CPU_SB1 |
586 | nop # improves slotting | 586 | nop # improves slotting |
587 | #endif | 587 | #endif |
588 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 588 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
589 | ADDC(sum, t0) | 589 | ADDC(sum, t0) |
590 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 590 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
591 | ADDC(sum, t1) | 591 | ADDC(sum, t1) |
592 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 592 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
593 | ADDC(sum, t2) | 593 | ADDC(sum, t2) |
594 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 594 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
595 | ADDC(sum, t3) | 595 | ADDC(sum, t3) |
596 | .set reorder /* DADDI_WAR */ | 596 | .set reorder /* DADDI_WAR */ |
597 | ADD dst, dst, 4*NBYTES | 597 | ADD dst, dst, 4*NBYTES |
598 | bne len, rem, 1b | 598 | bne len, rem, 1b |
599 | .set noreorder | 599 | .set noreorder |
600 | 600 | ||
601 | cleanup_src_unaligned: | 601 | .Lcleanup_src_unaligned: |
602 | beqz len, done | 602 | beqz len, .Ldone |
603 | and rem, len, NBYTES-1 # rem = len % NBYTES | 603 | and rem, len, NBYTES-1 # rem = len % NBYTES |
604 | beq rem, len, copy_bytes | 604 | beq rem, len, .Lcopy_bytes |
605 | nop | 605 | nop |
606 | 1: | 606 | 1: |
607 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 607 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
608 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 608 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
609 | ADD src, src, NBYTES | 609 | ADD src, src, NBYTES |
610 | SUB len, len, NBYTES | 610 | SUB len, len, NBYTES |
611 | EXC( STORE t0, 0(dst), s_exc) | 611 | EXC( STORE t0, 0(dst), .Ls_exc) |
612 | ADDC(sum, t0) | 612 | ADDC(sum, t0) |
613 | .set reorder /* DADDI_WAR */ | 613 | .set reorder /* DADDI_WAR */ |
614 | ADD dst, dst, NBYTES | 614 | ADD dst, dst, NBYTES |
615 | bne len, rem, 1b | 615 | bne len, rem, 1b |
616 | .set noreorder | 616 | .set noreorder |
617 | 617 | ||
618 | copy_bytes_checklen: | 618 | .Lcopy_bytes_checklen: |
619 | beqz len, done | 619 | beqz len, .Ldone |
620 | nop | 620 | nop |
621 | copy_bytes: | 621 | .Lcopy_bytes: |
622 | /* 0 < len < NBYTES */ | 622 | /* 0 < len < NBYTES */ |
623 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | 623 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
624 | #define SHIFT_START 0 | 624 | #define SHIFT_START 0 |
@@ -629,14 +629,14 @@ copy_bytes: | |||
629 | #endif | 629 | #endif |
630 | move t2, zero # partial word | 630 | move t2, zero # partial word |
631 | li t3, SHIFT_START # shift | 631 | li t3, SHIFT_START # shift |
632 | /* use l_exc_copy here to return correct sum on fault */ | 632 | /* use .Ll_exc_copy here to return correct sum on fault */ |
633 | #define COPY_BYTE(N) \ | 633 | #define COPY_BYTE(N) \ |
634 | EXC( lbu t0, N(src), l_exc_copy); \ | 634 | EXC( lbu t0, N(src), .Ll_exc_copy); \ |
635 | SUB len, len, 1; \ | 635 | SUB len, len, 1; \ |
636 | EXC( sb t0, N(dst), s_exc); \ | 636 | EXC( sb t0, N(dst), .Ls_exc); \ |
637 | SLLV t0, t0, t3; \ | 637 | SLLV t0, t0, t3; \ |
638 | addu t3, SHIFT_INC; \ | 638 | addu t3, SHIFT_INC; \ |
639 | beqz len, copy_bytes_done; \ | 639 | beqz len, .Lcopy_bytes_done; \ |
640 | or t2, t0 | 640 | or t2, t0 |
641 | 641 | ||
642 | COPY_BYTE(0) | 642 | COPY_BYTE(0) |
@@ -647,14 +647,14 @@ EXC( sb t0, N(dst), s_exc); \ | |||
647 | COPY_BYTE(4) | 647 | COPY_BYTE(4) |
648 | COPY_BYTE(5) | 648 | COPY_BYTE(5) |
649 | #endif | 649 | #endif |
650 | EXC( lbu t0, NBYTES-2(src), l_exc_copy) | 650 | EXC( lbu t0, NBYTES-2(src), .Ll_exc_copy) |
651 | SUB len, len, 1 | 651 | SUB len, len, 1 |
652 | EXC( sb t0, NBYTES-2(dst), s_exc) | 652 | EXC( sb t0, NBYTES-2(dst), .Ls_exc) |
653 | SLLV t0, t0, t3 | 653 | SLLV t0, t0, t3 |
654 | or t2, t0 | 654 | or t2, t0 |
655 | copy_bytes_done: | 655 | .Lcopy_bytes_done: |
656 | ADDC(sum, t2) | 656 | ADDC(sum, t2) |
657 | done: | 657 | .Ldone: |
658 | /* fold checksum */ | 658 | /* fold checksum */ |
659 | .set push | 659 | .set push |
660 | .set noat | 660 | .set noat |
@@ -685,7 +685,7 @@ done: | |||
685 | jr ra | 685 | jr ra |
686 | .set noreorder | 686 | .set noreorder |
687 | 687 | ||
688 | l_exc_copy: | 688 | .Ll_exc_copy: |
689 | /* | 689 | /* |
690 | * Copy bytes from src until faulting load address (or until a | 690 | * Copy bytes from src until faulting load address (or until a |
691 | * lb faults) | 691 | * lb faults) |
@@ -700,7 +700,7 @@ l_exc_copy: | |||
700 | li t2, SHIFT_START | 700 | li t2, SHIFT_START |
701 | LOAD t0, THREAD_BUADDR(t0) | 701 | LOAD t0, THREAD_BUADDR(t0) |
702 | 1: | 702 | 1: |
703 | EXC( lbu t1, 0(src), l_exc) | 703 | EXC( lbu t1, 0(src), .Ll_exc) |
704 | ADD src, src, 1 | 704 | ADD src, src, 1 |
705 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 705 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
706 | SLLV t1, t1, t2 | 706 | SLLV t1, t1, t2 |
@@ -710,7 +710,7 @@ EXC( lbu t1, 0(src), l_exc) | |||
710 | ADD dst, dst, 1 | 710 | ADD dst, dst, 1 |
711 | bne src, t0, 1b | 711 | bne src, t0, 1b |
712 | .set noreorder | 712 | .set noreorder |
713 | l_exc: | 713 | .Ll_exc: |
714 | LOAD t0, TI_TASK($28) | 714 | LOAD t0, TI_TASK($28) |
715 | nop | 715 | nop |
716 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 716 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
@@ -729,7 +729,7 @@ l_exc: | |||
729 | */ | 729 | */ |
730 | .set reorder /* DADDI_WAR */ | 730 | .set reorder /* DADDI_WAR */ |
731 | SUB src, len, 1 | 731 | SUB src, len, 1 |
732 | beqz len, done | 732 | beqz len, .Ldone |
733 | .set noreorder | 733 | .set noreorder |
734 | 1: sb zero, 0(dst) | 734 | 1: sb zero, 0(dst) |
735 | ADD dst, dst, 1 | 735 | ADD dst, dst, 1 |
@@ -744,10 +744,10 @@ l_exc: | |||
744 | SUB src, src, v1 | 744 | SUB src, src, v1 |
745 | #endif | 745 | #endif |
746 | li v1, -EFAULT | 746 | li v1, -EFAULT |
747 | b done | 747 | b .Ldone |
748 | sw v1, (errptr) | 748 | sw v1, (errptr) |
749 | 749 | ||
750 | s_exc: | 750 | .Ls_exc: |
751 | li v0, -1 /* invalid checksum */ | 751 | li v0, -1 /* invalid checksum */ |
752 | li v1, -EFAULT | 752 | li v1, -EFAULT |
753 | jr ra | 753 | jr ra |
diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S index d1b08f5d6860..736d0fb56a94 100644 --- a/arch/mips/lib/memcpy-inatomic.S +++ b/arch/mips/lib/memcpy-inatomic.S | |||
@@ -209,36 +209,36 @@ LEAF(__copy_user_inatomic) | |||
209 | and t1, dst, ADDRMASK | 209 | and t1, dst, ADDRMASK |
210 | PREF( 0, 1*32(src) ) | 210 | PREF( 0, 1*32(src) ) |
211 | PREF( 1, 1*32(dst) ) | 211 | PREF( 1, 1*32(dst) ) |
212 | bnez t2, copy_bytes_checklen | 212 | bnez t2, .Lcopy_bytes_checklen |
213 | and t0, src, ADDRMASK | 213 | and t0, src, ADDRMASK |
214 | PREF( 0, 2*32(src) ) | 214 | PREF( 0, 2*32(src) ) |
215 | PREF( 1, 2*32(dst) ) | 215 | PREF( 1, 2*32(dst) ) |
216 | bnez t1, dst_unaligned | 216 | bnez t1, .Ldst_unaligned |
217 | nop | 217 | nop |
218 | bnez t0, src_unaligned_dst_aligned | 218 | bnez t0, .Lsrc_unaligned_dst_aligned |
219 | /* | 219 | /* |
220 | * use delay slot for fall-through | 220 | * use delay slot for fall-through |
221 | * src and dst are aligned; need to compute rem | 221 | * src and dst are aligned; need to compute rem |
222 | */ | 222 | */ |
223 | both_aligned: | 223 | .Lboth_aligned: |
224 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 224 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
225 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 225 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
226 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 226 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
227 | PREF( 0, 3*32(src) ) | 227 | PREF( 0, 3*32(src) ) |
228 | PREF( 1, 3*32(dst) ) | 228 | PREF( 1, 3*32(dst) ) |
229 | .align 4 | 229 | .align 4 |
230 | 1: | 230 | 1: |
231 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 231 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
232 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 232 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
233 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 233 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
234 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 234 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
235 | SUB len, len, 8*NBYTES | 235 | SUB len, len, 8*NBYTES |
236 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 236 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
237 | EXC( LOAD t7, UNIT(5)(src), l_exc_copy) | 237 | EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) |
238 | STORE t0, UNIT(0)(dst) | 238 | STORE t0, UNIT(0)(dst) |
239 | STORE t1, UNIT(1)(dst) | 239 | STORE t1, UNIT(1)(dst) |
240 | EXC( LOAD t0, UNIT(6)(src), l_exc_copy) | 240 | EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) |
241 | EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | 241 | EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) |
242 | ADD src, src, 8*NBYTES | 242 | ADD src, src, 8*NBYTES |
243 | ADD dst, dst, 8*NBYTES | 243 | ADD dst, dst, 8*NBYTES |
244 | STORE t2, UNIT(-6)(dst) | 244 | STORE t2, UNIT(-6)(dst) |
@@ -255,18 +255,18 @@ EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | |||
255 | /* | 255 | /* |
256 | * len == rem == the number of bytes left to copy < 8*NBYTES | 256 | * len == rem == the number of bytes left to copy < 8*NBYTES |
257 | */ | 257 | */ |
258 | cleanup_both_aligned: | 258 | .Lcleanup_both_aligned: |
259 | beqz len, done | 259 | beqz len, .Ldone |
260 | sltu t0, len, 4*NBYTES | 260 | sltu t0, len, 4*NBYTES |
261 | bnez t0, less_than_4units | 261 | bnez t0, .Lless_than_4units |
262 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 262 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
263 | /* | 263 | /* |
264 | * len >= 4*NBYTES | 264 | * len >= 4*NBYTES |
265 | */ | 265 | */ |
266 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 266 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
267 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 267 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
268 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 268 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
269 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 269 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
270 | SUB len, len, 4*NBYTES | 270 | SUB len, len, 4*NBYTES |
271 | ADD src, src, 4*NBYTES | 271 | ADD src, src, 4*NBYTES |
272 | STORE t0, UNIT(0)(dst) | 272 | STORE t0, UNIT(0)(dst) |
@@ -275,16 +275,16 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | |||
275 | STORE t3, UNIT(3)(dst) | 275 | STORE t3, UNIT(3)(dst) |
276 | .set reorder /* DADDI_WAR */ | 276 | .set reorder /* DADDI_WAR */ |
277 | ADD dst, dst, 4*NBYTES | 277 | ADD dst, dst, 4*NBYTES |
278 | beqz len, done | 278 | beqz len, .Ldone |
279 | .set noreorder | 279 | .set noreorder |
280 | less_than_4units: | 280 | .Lless_than_4units: |
281 | /* | 281 | /* |
282 | * rem = len % NBYTES | 282 | * rem = len % NBYTES |
283 | */ | 283 | */ |
284 | beq rem, len, copy_bytes | 284 | beq rem, len, .Lcopy_bytes |
285 | nop | 285 | nop |
286 | 1: | 286 | 1: |
287 | EXC( LOAD t0, 0(src), l_exc) | 287 | EXC( LOAD t0, 0(src), .Ll_exc) |
288 | ADD src, src, NBYTES | 288 | ADD src, src, NBYTES |
289 | SUB len, len, NBYTES | 289 | SUB len, len, NBYTES |
290 | STORE t0, 0(dst) | 290 | STORE t0, 0(dst) |
@@ -305,17 +305,17 @@ EXC( LOAD t0, 0(src), l_exc) | |||
305 | * more instruction-level parallelism. | 305 | * more instruction-level parallelism. |
306 | */ | 306 | */ |
307 | #define bits t2 | 307 | #define bits t2 |
308 | beqz len, done | 308 | beqz len, .Ldone |
309 | ADD t1, dst, len # t1 is just past last byte of dst | 309 | ADD t1, dst, len # t1 is just past last byte of dst |
310 | li bits, 8*NBYTES | 310 | li bits, 8*NBYTES |
311 | SLL rem, len, 3 # rem = number of bits to keep | 311 | SLL rem, len, 3 # rem = number of bits to keep |
312 | EXC( LOAD t0, 0(src), l_exc) | 312 | EXC( LOAD t0, 0(src), .Ll_exc) |
313 | SUB bits, bits, rem # bits = number of bits to discard | 313 | SUB bits, bits, rem # bits = number of bits to discard |
314 | SHIFT_DISCARD t0, t0, bits | 314 | SHIFT_DISCARD t0, t0, bits |
315 | STREST t0, -1(t1) | 315 | STREST t0, -1(t1) |
316 | jr ra | 316 | jr ra |
317 | move len, zero | 317 | move len, zero |
318 | dst_unaligned: | 318 | .Ldst_unaligned: |
319 | /* | 319 | /* |
320 | * dst is unaligned | 320 | * dst is unaligned |
321 | * t0 = src & ADDRMASK | 321 | * t0 = src & ADDRMASK |
@@ -326,22 +326,22 @@ dst_unaligned: | |||
326 | * Set match = (src and dst have same alignment) | 326 | * Set match = (src and dst have same alignment) |
327 | */ | 327 | */ |
328 | #define match rem | 328 | #define match rem |
329 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 329 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
330 | ADD t2, zero, NBYTES | 330 | ADD t2, zero, NBYTES |
331 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 331 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
332 | SUB t2, t2, t1 # t2 = number of bytes copied | 332 | SUB t2, t2, t1 # t2 = number of bytes copied |
333 | xor match, t0, t1 | 333 | xor match, t0, t1 |
334 | STFIRST t3, FIRST(0)(dst) | 334 | STFIRST t3, FIRST(0)(dst) |
335 | beq len, t2, done | 335 | beq len, t2, .Ldone |
336 | SUB len, len, t2 | 336 | SUB len, len, t2 |
337 | ADD dst, dst, t2 | 337 | ADD dst, dst, t2 |
338 | beqz match, both_aligned | 338 | beqz match, .Lboth_aligned |
339 | ADD src, src, t2 | 339 | ADD src, src, t2 |
340 | 340 | ||
341 | src_unaligned_dst_aligned: | 341 | .Lsrc_unaligned_dst_aligned: |
342 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 342 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
343 | PREF( 0, 3*32(src) ) | 343 | PREF( 0, 3*32(src) ) |
344 | beqz t0, cleanup_src_unaligned | 344 | beqz t0, .Lcleanup_src_unaligned |
345 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 345 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
346 | PREF( 1, 3*32(dst) ) | 346 | PREF( 1, 3*32(dst) ) |
347 | 1: | 347 | 1: |
@@ -351,15 +351,15 @@ src_unaligned_dst_aligned: | |||
351 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 351 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
352 | * are to the same unit (unless src is aligned, but it's not). | 352 | * are to the same unit (unless src is aligned, but it's not). |
353 | */ | 353 | */ |
354 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 354 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
355 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 355 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
356 | SUB len, len, 4*NBYTES | 356 | SUB len, len, 4*NBYTES |
357 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 357 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
358 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 358 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
359 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 359 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
360 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 360 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
361 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 361 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
362 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 362 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
363 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 363 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
364 | ADD src, src, 4*NBYTES | 364 | ADD src, src, 4*NBYTES |
365 | #ifdef CONFIG_CPU_SB1 | 365 | #ifdef CONFIG_CPU_SB1 |
@@ -375,14 +375,14 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy) | |||
375 | bne len, rem, 1b | 375 | bne len, rem, 1b |
376 | .set noreorder | 376 | .set noreorder |
377 | 377 | ||
378 | cleanup_src_unaligned: | 378 | .Lcleanup_src_unaligned: |
379 | beqz len, done | 379 | beqz len, .Ldone |
380 | and rem, len, NBYTES-1 # rem = len % NBYTES | 380 | and rem, len, NBYTES-1 # rem = len % NBYTES |
381 | beq rem, len, copy_bytes | 381 | beq rem, len, .Lcopy_bytes |
382 | nop | 382 | nop |
383 | 1: | 383 | 1: |
384 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 384 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
385 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 385 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
386 | ADD src, src, NBYTES | 386 | ADD src, src, NBYTES |
387 | SUB len, len, NBYTES | 387 | SUB len, len, NBYTES |
388 | STORE t0, 0(dst) | 388 | STORE t0, 0(dst) |
@@ -391,15 +391,15 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) | |||
391 | bne len, rem, 1b | 391 | bne len, rem, 1b |
392 | .set noreorder | 392 | .set noreorder |
393 | 393 | ||
394 | copy_bytes_checklen: | 394 | .Lcopy_bytes_checklen: |
395 | beqz len, done | 395 | beqz len, .Ldone |
396 | nop | 396 | nop |
397 | copy_bytes: | 397 | .Lcopy_bytes: |
398 | /* 0 < len < NBYTES */ | 398 | /* 0 < len < NBYTES */ |
399 | #define COPY_BYTE(N) \ | 399 | #define COPY_BYTE(N) \ |
400 | EXC( lb t0, N(src), l_exc); \ | 400 | EXC( lb t0, N(src), .Ll_exc); \ |
401 | SUB len, len, 1; \ | 401 | SUB len, len, 1; \ |
402 | beqz len, done; \ | 402 | beqz len, .Ldone; \ |
403 | sb t0, N(dst) | 403 | sb t0, N(dst) |
404 | 404 | ||
405 | COPY_BYTE(0) | 405 | COPY_BYTE(0) |
@@ -410,16 +410,16 @@ EXC( lb t0, N(src), l_exc); \ | |||
410 | COPY_BYTE(4) | 410 | COPY_BYTE(4) |
411 | COPY_BYTE(5) | 411 | COPY_BYTE(5) |
412 | #endif | 412 | #endif |
413 | EXC( lb t0, NBYTES-2(src), l_exc) | 413 | EXC( lb t0, NBYTES-2(src), .Ll_exc) |
414 | SUB len, len, 1 | 414 | SUB len, len, 1 |
415 | jr ra | 415 | jr ra |
416 | sb t0, NBYTES-2(dst) | 416 | sb t0, NBYTES-2(dst) |
417 | done: | 417 | .Ldone: |
418 | jr ra | 418 | jr ra |
419 | nop | 419 | nop |
420 | END(__copy_user_inatomic) | 420 | END(__copy_user_inatomic) |
421 | 421 | ||
422 | l_exc_copy: | 422 | .Ll_exc_copy: |
423 | /* | 423 | /* |
424 | * Copy bytes from src until faulting load address (or until a | 424 | * Copy bytes from src until faulting load address (or until a |
425 | * lb faults) | 425 | * lb faults) |
@@ -434,14 +434,14 @@ l_exc_copy: | |||
434 | nop | 434 | nop |
435 | LOAD t0, THREAD_BUADDR(t0) | 435 | LOAD t0, THREAD_BUADDR(t0) |
436 | 1: | 436 | 1: |
437 | EXC( lb t1, 0(src), l_exc) | 437 | EXC( lb t1, 0(src), .Ll_exc) |
438 | ADD src, src, 1 | 438 | ADD src, src, 1 |
439 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 439 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
440 | .set reorder /* DADDI_WAR */ | 440 | .set reorder /* DADDI_WAR */ |
441 | ADD dst, dst, 1 | 441 | ADD dst, dst, 1 |
442 | bne src, t0, 1b | 442 | bne src, t0, 1b |
443 | .set noreorder | 443 | .set noreorder |
444 | l_exc: | 444 | .Ll_exc: |
445 | LOAD t0, TI_TASK($28) | 445 | LOAD t0, TI_TASK($28) |
446 | nop | 446 | nop |
447 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 447 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 01e450b1ebc9..c06cccf60bec 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -191,7 +191,7 @@ | |||
191 | .align 5 | 191 | .align 5 |
192 | LEAF(memcpy) /* a0=dst a1=src a2=len */ | 192 | LEAF(memcpy) /* a0=dst a1=src a2=len */ |
193 | move v0, dst /* return value */ | 193 | move v0, dst /* return value */ |
194 | __memcpy: | 194 | .L__memcpy: |
195 | FEXPORT(__copy_user) | 195 | FEXPORT(__copy_user) |
196 | /* | 196 | /* |
197 | * Note: dst & src may be unaligned, len may be 0 | 197 | * Note: dst & src may be unaligned, len may be 0 |
@@ -213,45 +213,45 @@ FEXPORT(__copy_user) | |||
213 | and t1, dst, ADDRMASK | 213 | and t1, dst, ADDRMASK |
214 | PREF( 0, 1*32(src) ) | 214 | PREF( 0, 1*32(src) ) |
215 | PREF( 1, 1*32(dst) ) | 215 | PREF( 1, 1*32(dst) ) |
216 | bnez t2, copy_bytes_checklen | 216 | bnez t2, .Lcopy_bytes_checklen |
217 | and t0, src, ADDRMASK | 217 | and t0, src, ADDRMASK |
218 | PREF( 0, 2*32(src) ) | 218 | PREF( 0, 2*32(src) ) |
219 | PREF( 1, 2*32(dst) ) | 219 | PREF( 1, 2*32(dst) ) |
220 | bnez t1, dst_unaligned | 220 | bnez t1, .Ldst_unaligned |
221 | nop | 221 | nop |
222 | bnez t0, src_unaligned_dst_aligned | 222 | bnez t0, .Lsrc_unaligned_dst_aligned |
223 | /* | 223 | /* |
224 | * use delay slot for fall-through | 224 | * use delay slot for fall-through |
225 | * src and dst are aligned; need to compute rem | 225 | * src and dst are aligned; need to compute rem |
226 | */ | 226 | */ |
227 | both_aligned: | 227 | .Lboth_aligned: |
228 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 228 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
229 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 229 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
230 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 230 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
231 | PREF( 0, 3*32(src) ) | 231 | PREF( 0, 3*32(src) ) |
232 | PREF( 1, 3*32(dst) ) | 232 | PREF( 1, 3*32(dst) ) |
233 | .align 4 | 233 | .align 4 |
234 | 1: | 234 | 1: |
235 | R10KCBARRIER(0(ra)) | 235 | R10KCBARRIER(0(ra)) |
236 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 236 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
237 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 237 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
238 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 238 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
239 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 239 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
240 | SUB len, len, 8*NBYTES | 240 | SUB len, len, 8*NBYTES |
241 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 241 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
242 | EXC( LOAD t7, UNIT(5)(src), l_exc_copy) | 242 | EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) |
243 | EXC( STORE t0, UNIT(0)(dst), s_exc_p8u) | 243 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u) |
244 | EXC( STORE t1, UNIT(1)(dst), s_exc_p7u) | 244 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u) |
245 | EXC( LOAD t0, UNIT(6)(src), l_exc_copy) | 245 | EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) |
246 | EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | 246 | EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) |
247 | ADD src, src, 8*NBYTES | 247 | ADD src, src, 8*NBYTES |
248 | ADD dst, dst, 8*NBYTES | 248 | ADD dst, dst, 8*NBYTES |
249 | EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) | 249 | EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u) |
250 | EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) | 250 | EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u) |
251 | EXC( STORE t4, UNIT(-4)(dst), s_exc_p4u) | 251 | EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u) |
252 | EXC( STORE t7, UNIT(-3)(dst), s_exc_p3u) | 252 | EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u) |
253 | EXC( STORE t0, UNIT(-2)(dst), s_exc_p2u) | 253 | EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u) |
254 | EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) | 254 | EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u) |
255 | PREF( 0, 8*32(src) ) | 255 | PREF( 0, 8*32(src) ) |
256 | PREF( 1, 8*32(dst) ) | 256 | PREF( 1, 8*32(dst) ) |
257 | bne len, rem, 1b | 257 | bne len, rem, 1b |
@@ -260,41 +260,41 @@ EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) | |||
260 | /* | 260 | /* |
261 | * len == rem == the number of bytes left to copy < 8*NBYTES | 261 | * len == rem == the number of bytes left to copy < 8*NBYTES |
262 | */ | 262 | */ |
263 | cleanup_both_aligned: | 263 | .Lcleanup_both_aligned: |
264 | beqz len, done | 264 | beqz len, .Ldone |
265 | sltu t0, len, 4*NBYTES | 265 | sltu t0, len, 4*NBYTES |
266 | bnez t0, less_than_4units | 266 | bnez t0, .Lless_than_4units |
267 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 267 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
268 | /* | 268 | /* |
269 | * len >= 4*NBYTES | 269 | * len >= 4*NBYTES |
270 | */ | 270 | */ |
271 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 271 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
272 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 272 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
273 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 273 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
274 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 274 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
275 | SUB len, len, 4*NBYTES | 275 | SUB len, len, 4*NBYTES |
276 | ADD src, src, 4*NBYTES | 276 | ADD src, src, 4*NBYTES |
277 | R10KCBARRIER(0(ra)) | 277 | R10KCBARRIER(0(ra)) |
278 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | 278 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) |
279 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 279 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) |
280 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 280 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) |
281 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) | 281 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) |
282 | .set reorder /* DADDI_WAR */ | 282 | .set reorder /* DADDI_WAR */ |
283 | ADD dst, dst, 4*NBYTES | 283 | ADD dst, dst, 4*NBYTES |
284 | beqz len, done | 284 | beqz len, .Ldone |
285 | .set noreorder | 285 | .set noreorder |
286 | less_than_4units: | 286 | .Lless_than_4units: |
287 | /* | 287 | /* |
288 | * rem = len % NBYTES | 288 | * rem = len % NBYTES |
289 | */ | 289 | */ |
290 | beq rem, len, copy_bytes | 290 | beq rem, len, .Lcopy_bytes |
291 | nop | 291 | nop |
292 | 1: | 292 | 1: |
293 | R10KCBARRIER(0(ra)) | 293 | R10KCBARRIER(0(ra)) |
294 | EXC( LOAD t0, 0(src), l_exc) | 294 | EXC( LOAD t0, 0(src), .Ll_exc) |
295 | ADD src, src, NBYTES | 295 | ADD src, src, NBYTES |
296 | SUB len, len, NBYTES | 296 | SUB len, len, NBYTES |
297 | EXC( STORE t0, 0(dst), s_exc_p1u) | 297 | EXC( STORE t0, 0(dst), .Ls_exc_p1u) |
298 | .set reorder /* DADDI_WAR */ | 298 | .set reorder /* DADDI_WAR */ |
299 | ADD dst, dst, NBYTES | 299 | ADD dst, dst, NBYTES |
300 | bne rem, len, 1b | 300 | bne rem, len, 1b |
@@ -312,17 +312,17 @@ EXC( STORE t0, 0(dst), s_exc_p1u) | |||
312 | * more instruction-level parallelism. | 312 | * more instruction-level parallelism. |
313 | */ | 313 | */ |
314 | #define bits t2 | 314 | #define bits t2 |
315 | beqz len, done | 315 | beqz len, .Ldone |
316 | ADD t1, dst, len # t1 is just past last byte of dst | 316 | ADD t1, dst, len # t1 is just past last byte of dst |
317 | li bits, 8*NBYTES | 317 | li bits, 8*NBYTES |
318 | SLL rem, len, 3 # rem = number of bits to keep | 318 | SLL rem, len, 3 # rem = number of bits to keep |
319 | EXC( LOAD t0, 0(src), l_exc) | 319 | EXC( LOAD t0, 0(src), .Ll_exc) |
320 | SUB bits, bits, rem # bits = number of bits to discard | 320 | SUB bits, bits, rem # bits = number of bits to discard |
321 | SHIFT_DISCARD t0, t0, bits | 321 | SHIFT_DISCARD t0, t0, bits |
322 | EXC( STREST t0, -1(t1), s_exc) | 322 | EXC( STREST t0, -1(t1), .Ls_exc) |
323 | jr ra | 323 | jr ra |
324 | move len, zero | 324 | move len, zero |
325 | dst_unaligned: | 325 | .Ldst_unaligned: |
326 | /* | 326 | /* |
327 | * dst is unaligned | 327 | * dst is unaligned |
328 | * t0 = src & ADDRMASK | 328 | * t0 = src & ADDRMASK |
@@ -333,23 +333,23 @@ dst_unaligned: | |||
333 | * Set match = (src and dst have same alignment) | 333 | * Set match = (src and dst have same alignment) |
334 | */ | 334 | */ |
335 | #define match rem | 335 | #define match rem |
336 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 336 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
337 | ADD t2, zero, NBYTES | 337 | ADD t2, zero, NBYTES |
338 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 338 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
339 | SUB t2, t2, t1 # t2 = number of bytes copied | 339 | SUB t2, t2, t1 # t2 = number of bytes copied |
340 | xor match, t0, t1 | 340 | xor match, t0, t1 |
341 | R10KCBARRIER(0(ra)) | 341 | R10KCBARRIER(0(ra)) |
342 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) | 342 | EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) |
343 | beq len, t2, done | 343 | beq len, t2, .Ldone |
344 | SUB len, len, t2 | 344 | SUB len, len, t2 |
345 | ADD dst, dst, t2 | 345 | ADD dst, dst, t2 |
346 | beqz match, both_aligned | 346 | beqz match, .Lboth_aligned |
347 | ADD src, src, t2 | 347 | ADD src, src, t2 |
348 | 348 | ||
349 | src_unaligned_dst_aligned: | 349 | .Lsrc_unaligned_dst_aligned: |
350 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 350 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
351 | PREF( 0, 3*32(src) ) | 351 | PREF( 0, 3*32(src) ) |
352 | beqz t0, cleanup_src_unaligned | 352 | beqz t0, .Lcleanup_src_unaligned |
353 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 353 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
354 | PREF( 1, 3*32(dst) ) | 354 | PREF( 1, 3*32(dst) ) |
355 | 1: | 355 | 1: |
@@ -360,58 +360,58 @@ src_unaligned_dst_aligned: | |||
360 | * are to the same unit (unless src is aligned, but it's not). | 360 | * are to the same unit (unless src is aligned, but it's not). |
361 | */ | 361 | */ |
362 | R10KCBARRIER(0(ra)) | 362 | R10KCBARRIER(0(ra)) |
363 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 363 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
364 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 364 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
365 | SUB len, len, 4*NBYTES | 365 | SUB len, len, 4*NBYTES |
366 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 366 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
367 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 367 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
368 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 368 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
369 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 369 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
370 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 370 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
371 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 371 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
372 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 372 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
373 | ADD src, src, 4*NBYTES | 373 | ADD src, src, 4*NBYTES |
374 | #ifdef CONFIG_CPU_SB1 | 374 | #ifdef CONFIG_CPU_SB1 |
375 | nop # improves slotting | 375 | nop # improves slotting |
376 | #endif | 376 | #endif |
377 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | 377 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) |
378 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 378 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) |
379 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 379 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) |
380 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) | 380 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) |
381 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 381 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
382 | .set reorder /* DADDI_WAR */ | 382 | .set reorder /* DADDI_WAR */ |
383 | ADD dst, dst, 4*NBYTES | 383 | ADD dst, dst, 4*NBYTES |
384 | bne len, rem, 1b | 384 | bne len, rem, 1b |
385 | .set noreorder | 385 | .set noreorder |
386 | 386 | ||
387 | cleanup_src_unaligned: | 387 | .Lcleanup_src_unaligned: |
388 | beqz len, done | 388 | beqz len, .Ldone |
389 | and rem, len, NBYTES-1 # rem = len % NBYTES | 389 | and rem, len, NBYTES-1 # rem = len % NBYTES |
390 | beq rem, len, copy_bytes | 390 | beq rem, len, .Lcopy_bytes |
391 | nop | 391 | nop |
392 | 1: | 392 | 1: |
393 | R10KCBARRIER(0(ra)) | 393 | R10KCBARRIER(0(ra)) |
394 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 394 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
395 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 395 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
396 | ADD src, src, NBYTES | 396 | ADD src, src, NBYTES |
397 | SUB len, len, NBYTES | 397 | SUB len, len, NBYTES |
398 | EXC( STORE t0, 0(dst), s_exc_p1u) | 398 | EXC( STORE t0, 0(dst), .Ls_exc_p1u) |
399 | .set reorder /* DADDI_WAR */ | 399 | .set reorder /* DADDI_WAR */ |
400 | ADD dst, dst, NBYTES | 400 | ADD dst, dst, NBYTES |
401 | bne len, rem, 1b | 401 | bne len, rem, 1b |
402 | .set noreorder | 402 | .set noreorder |
403 | 403 | ||
404 | copy_bytes_checklen: | 404 | .Lcopy_bytes_checklen: |
405 | beqz len, done | 405 | beqz len, .Ldone |
406 | nop | 406 | nop |
407 | copy_bytes: | 407 | .Lcopy_bytes: |
408 | /* 0 < len < NBYTES */ | 408 | /* 0 < len < NBYTES */ |
409 | R10KCBARRIER(0(ra)) | 409 | R10KCBARRIER(0(ra)) |
410 | #define COPY_BYTE(N) \ | 410 | #define COPY_BYTE(N) \ |
411 | EXC( lb t0, N(src), l_exc); \ | 411 | EXC( lb t0, N(src), .Ll_exc); \ |
412 | SUB len, len, 1; \ | 412 | SUB len, len, 1; \ |
413 | beqz len, done; \ | 413 | beqz len, .Ldone; \ |
414 | EXC( sb t0, N(dst), s_exc_p1) | 414 | EXC( sb t0, N(dst), .Ls_exc_p1) |
415 | 415 | ||
416 | COPY_BYTE(0) | 416 | COPY_BYTE(0) |
417 | COPY_BYTE(1) | 417 | COPY_BYTE(1) |
@@ -421,16 +421,16 @@ EXC( sb t0, N(dst), s_exc_p1) | |||
421 | COPY_BYTE(4) | 421 | COPY_BYTE(4) |
422 | COPY_BYTE(5) | 422 | COPY_BYTE(5) |
423 | #endif | 423 | #endif |
424 | EXC( lb t0, NBYTES-2(src), l_exc) | 424 | EXC( lb t0, NBYTES-2(src), .Ll_exc) |
425 | SUB len, len, 1 | 425 | SUB len, len, 1 |
426 | jr ra | 426 | jr ra |
427 | EXC( sb t0, NBYTES-2(dst), s_exc_p1) | 427 | EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1) |
428 | done: | 428 | .Ldone: |
429 | jr ra | 429 | jr ra |
430 | nop | 430 | nop |
431 | END(memcpy) | 431 | END(memcpy) |
432 | 432 | ||
433 | l_exc_copy: | 433 | .Ll_exc_copy: |
434 | /* | 434 | /* |
435 | * Copy bytes from src until faulting load address (or until a | 435 | * Copy bytes from src until faulting load address (or until a |
436 | * lb faults) | 436 | * lb faults) |
@@ -445,14 +445,14 @@ l_exc_copy: | |||
445 | nop | 445 | nop |
446 | LOAD t0, THREAD_BUADDR(t0) | 446 | LOAD t0, THREAD_BUADDR(t0) |
447 | 1: | 447 | 1: |
448 | EXC( lb t1, 0(src), l_exc) | 448 | EXC( lb t1, 0(src), .Ll_exc) |
449 | ADD src, src, 1 | 449 | ADD src, src, 1 |
450 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 450 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
451 | .set reorder /* DADDI_WAR */ | 451 | .set reorder /* DADDI_WAR */ |
452 | ADD dst, dst, 1 | 452 | ADD dst, dst, 1 |
453 | bne src, t0, 1b | 453 | bne src, t0, 1b |
454 | .set noreorder | 454 | .set noreorder |
455 | l_exc: | 455 | .Ll_exc: |
456 | LOAD t0, TI_TASK($28) | 456 | LOAD t0, TI_TASK($28) |
457 | nop | 457 | nop |
458 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 458 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
@@ -471,7 +471,7 @@ l_exc: | |||
471 | */ | 471 | */ |
472 | .set reorder /* DADDI_WAR */ | 472 | .set reorder /* DADDI_WAR */ |
473 | SUB src, len, 1 | 473 | SUB src, len, 1 |
474 | beqz len, done | 474 | beqz len, .Ldone |
475 | .set noreorder | 475 | .set noreorder |
476 | 1: sb zero, 0(dst) | 476 | 1: sb zero, 0(dst) |
477 | ADD dst, dst, 1 | 477 | ADD dst, dst, 1 |
@@ -492,7 +492,7 @@ l_exc: | |||
492 | 492 | ||
493 | #define SEXC(n) \ | 493 | #define SEXC(n) \ |
494 | .set reorder; /* DADDI_WAR */ \ | 494 | .set reorder; /* DADDI_WAR */ \ |
495 | s_exc_p ## n ## u: \ | 495 | .Ls_exc_p ## n ## u: \ |
496 | ADD len, len, n*NBYTES; \ | 496 | ADD len, len, n*NBYTES; \ |
497 | jr ra; \ | 497 | jr ra; \ |
498 | .set noreorder | 498 | .set noreorder |
@@ -506,12 +506,12 @@ SEXC(3) | |||
506 | SEXC(2) | 506 | SEXC(2) |
507 | SEXC(1) | 507 | SEXC(1) |
508 | 508 | ||
509 | s_exc_p1: | 509 | .Ls_exc_p1: |
510 | .set reorder /* DADDI_WAR */ | 510 | .set reorder /* DADDI_WAR */ |
511 | ADD len, len, 1 | 511 | ADD len, len, 1 |
512 | jr ra | 512 | jr ra |
513 | .set noreorder | 513 | .set noreorder |
514 | s_exc: | 514 | .Ls_exc: |
515 | jr ra | 515 | jr ra |
516 | nop | 516 | nop |
517 | 517 | ||
@@ -522,20 +522,20 @@ LEAF(memmove) | |||
522 | sltu t0, a1, t0 # dst + len <= src -> memcpy | 522 | sltu t0, a1, t0 # dst + len <= src -> memcpy |
523 | sltu t1, a0, t1 # dst >= src + len -> memcpy | 523 | sltu t1, a0, t1 # dst >= src + len -> memcpy |
524 | and t0, t1 | 524 | and t0, t1 |
525 | beqz t0, __memcpy | 525 | beqz t0, .L__memcpy |
526 | move v0, a0 /* return value */ | 526 | move v0, a0 /* return value */ |
527 | beqz a2, r_out | 527 | beqz a2, .Lr_out |
528 | END(memmove) | 528 | END(memmove) |
529 | 529 | ||
530 | /* fall through to __rmemcpy */ | 530 | /* fall through to __rmemcpy */ |
531 | LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ | 531 | LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ |
532 | sltu t0, a1, a0 | 532 | sltu t0, a1, a0 |
533 | beqz t0, r_end_bytes_up # src >= dst | 533 | beqz t0, .Lr_end_bytes_up # src >= dst |
534 | nop | 534 | nop |
535 | ADD a0, a2 # dst = dst + len | 535 | ADD a0, a2 # dst = dst + len |
536 | ADD a1, a2 # src = src + len | 536 | ADD a1, a2 # src = src + len |
537 | 537 | ||
538 | r_end_bytes: | 538 | .Lr_end_bytes: |
539 | R10KCBARRIER(0(ra)) | 539 | R10KCBARRIER(0(ra)) |
540 | lb t0, -1(a1) | 540 | lb t0, -1(a1) |
541 | SUB a2, a2, 0x1 | 541 | SUB a2, a2, 0x1 |
@@ -543,14 +543,14 @@ r_end_bytes: | |||
543 | SUB a1, a1, 0x1 | 543 | SUB a1, a1, 0x1 |
544 | .set reorder /* DADDI_WAR */ | 544 | .set reorder /* DADDI_WAR */ |
545 | SUB a0, a0, 0x1 | 545 | SUB a0, a0, 0x1 |
546 | bnez a2, r_end_bytes | 546 | bnez a2, .Lr_end_bytes |
547 | .set noreorder | 547 | .set noreorder |
548 | 548 | ||
549 | r_out: | 549 | .Lr_out: |
550 | jr ra | 550 | jr ra |
551 | move a2, zero | 551 | move a2, zero |
552 | 552 | ||
553 | r_end_bytes_up: | 553 | .Lr_end_bytes_up: |
554 | R10KCBARRIER(0(ra)) | 554 | R10KCBARRIER(0(ra)) |
555 | lb t0, (a1) | 555 | lb t0, (a1) |
556 | SUB a2, a2, 0x1 | 556 | SUB a2, a2, 0x1 |
@@ -558,7 +558,7 @@ r_end_bytes_up: | |||
558 | ADD a1, a1, 0x1 | 558 | ADD a1, a1, 0x1 |
559 | .set reorder /* DADDI_WAR */ | 559 | .set reorder /* DADDI_WAR */ |
560 | ADD a0, a0, 0x1 | 560 | ADD a0, a0, 0x1 |
561 | bnez a2, r_end_bytes_up | 561 | bnez a2, .Lr_end_bytes_up |
562 | .set noreorder | 562 | .set noreorder |
563 | 563 | ||
564 | jr ra | 564 | jr ra |
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index c018a4721693..77dc3b20110a 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S | |||
@@ -72,7 +72,7 @@ LEAF(memset) | |||
72 | 72 | ||
73 | FEXPORT(__bzero) | 73 | FEXPORT(__bzero) |
74 | sltiu t0, a2, LONGSIZE /* very small region? */ | 74 | sltiu t0, a2, LONGSIZE /* very small region? */ |
75 | bnez t0, small_memset | 75 | bnez t0, .Lsmall_memset |
76 | andi t0, a0, LONGMASK /* aligned? */ | 76 | andi t0, a0, LONGMASK /* aligned? */ |
77 | 77 | ||
78 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS | 78 | #ifndef CONFIG_CPU_DADDI_WORKAROUNDS |
@@ -88,28 +88,28 @@ FEXPORT(__bzero) | |||
88 | 88 | ||
89 | R10KCBARRIER(0(ra)) | 89 | R10KCBARRIER(0(ra)) |
90 | #ifdef __MIPSEB__ | 90 | #ifdef __MIPSEB__ |
91 | EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ | 91 | EX(LONG_S_L, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */ |
92 | #endif | 92 | #endif |
93 | #ifdef __MIPSEL__ | 93 | #ifdef __MIPSEL__ |
94 | EX(LONG_S_R, a1, (a0), first_fixup) /* make word/dword aligned */ | 94 | EX(LONG_S_R, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */ |
95 | #endif | 95 | #endif |
96 | PTR_SUBU a0, t0 /* long align ptr */ | 96 | PTR_SUBU a0, t0 /* long align ptr */ |
97 | PTR_ADDU a2, t0 /* correct size */ | 97 | PTR_ADDU a2, t0 /* correct size */ |
98 | 98 | ||
99 | 1: ori t1, a2, 0x3f /* # of full blocks */ | 99 | 1: ori t1, a2, 0x3f /* # of full blocks */ |
100 | xori t1, 0x3f | 100 | xori t1, 0x3f |
101 | beqz t1, memset_partial /* no block to fill */ | 101 | beqz t1, .Lmemset_partial /* no block to fill */ |
102 | andi t0, a2, 0x40-LONGSIZE | 102 | andi t0, a2, 0x40-LONGSIZE |
103 | 103 | ||
104 | PTR_ADDU t1, a0 /* end address */ | 104 | PTR_ADDU t1, a0 /* end address */ |
105 | .set reorder | 105 | .set reorder |
106 | 1: PTR_ADDIU a0, 64 | 106 | 1: PTR_ADDIU a0, 64 |
107 | R10KCBARRIER(0(ra)) | 107 | R10KCBARRIER(0(ra)) |
108 | f_fill64 a0, -64, a1, fwd_fixup | 108 | f_fill64 a0, -64, a1, .Lfwd_fixup |
109 | bne t1, a0, 1b | 109 | bne t1, a0, 1b |
110 | .set noreorder | 110 | .set noreorder |
111 | 111 | ||
112 | memset_partial: | 112 | .Lmemset_partial: |
113 | R10KCBARRIER(0(ra)) | 113 | R10KCBARRIER(0(ra)) |
114 | PTR_LA t1, 2f /* where to start */ | 114 | PTR_LA t1, 2f /* where to start */ |
115 | #if LONGSIZE == 4 | 115 | #if LONGSIZE == 4 |
@@ -126,7 +126,7 @@ memset_partial: | |||
126 | .set push | 126 | .set push |
127 | .set noreorder | 127 | .set noreorder |
128 | .set nomacro | 128 | .set nomacro |
129 | f_fill64 a0, -64, a1, partial_fixup /* ... but first do longs ... */ | 129 | f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */ |
130 | 2: .set pop | 130 | 2: .set pop |
131 | andi a2, LONGMASK /* At most one long to go */ | 131 | andi a2, LONGMASK /* At most one long to go */ |
132 | 132 | ||
@@ -134,15 +134,15 @@ memset_partial: | |||
134 | PTR_ADDU a0, a2 /* What's left */ | 134 | PTR_ADDU a0, a2 /* What's left */ |
135 | R10KCBARRIER(0(ra)) | 135 | R10KCBARRIER(0(ra)) |
136 | #ifdef __MIPSEB__ | 136 | #ifdef __MIPSEB__ |
137 | EX(LONG_S_R, a1, -1(a0), last_fixup) | 137 | EX(LONG_S_R, a1, -1(a0), .Llast_fixup) |
138 | #endif | 138 | #endif |
139 | #ifdef __MIPSEL__ | 139 | #ifdef __MIPSEL__ |
140 | EX(LONG_S_L, a1, -1(a0), last_fixup) | 140 | EX(LONG_S_L, a1, -1(a0), .Llast_fixup) |
141 | #endif | 141 | #endif |
142 | 1: jr ra | 142 | 1: jr ra |
143 | move a2, zero | 143 | move a2, zero |
144 | 144 | ||
145 | small_memset: | 145 | .Lsmall_memset: |
146 | beqz a2, 2f | 146 | beqz a2, 2f |
147 | PTR_ADDU t1, a0, a2 | 147 | PTR_ADDU t1, a0, a2 |
148 | 148 | ||
@@ -155,11 +155,11 @@ small_memset: | |||
155 | move a2, zero | 155 | move a2, zero |
156 | END(memset) | 156 | END(memset) |
157 | 157 | ||
158 | first_fixup: | 158 | .Lfirst_fixup: |
159 | jr ra | 159 | jr ra |
160 | nop | 160 | nop |
161 | 161 | ||
162 | fwd_fixup: | 162 | .Lfwd_fixup: |
163 | PTR_L t0, TI_TASK($28) | 163 | PTR_L t0, TI_TASK($28) |
164 | LONG_L t0, THREAD_BUADDR(t0) | 164 | LONG_L t0, THREAD_BUADDR(t0) |
165 | andi a2, 0x3f | 165 | andi a2, 0x3f |
@@ -167,7 +167,7 @@ fwd_fixup: | |||
167 | jr ra | 167 | jr ra |
168 | LONG_SUBU a2, t0 | 168 | LONG_SUBU a2, t0 |
169 | 169 | ||
170 | partial_fixup: | 170 | .Lpartial_fixup: |
171 | PTR_L t0, TI_TASK($28) | 171 | PTR_L t0, TI_TASK($28) |
172 | LONG_L t0, THREAD_BUADDR(t0) | 172 | LONG_L t0, THREAD_BUADDR(t0) |
173 | andi a2, LONGMASK | 173 | andi a2, LONGMASK |
@@ -175,6 +175,6 @@ partial_fixup: | |||
175 | jr ra | 175 | jr ra |
176 | LONG_SUBU a2, t0 | 176 | LONG_SUBU a2, t0 |
177 | 177 | ||
178 | last_fixup: | 178 | .Llast_fixup: |
179 | jr ra | 179 | jr ra |
180 | andi v1, a2, LONGMASK | 180 | andi v1, a2, LONGMASK |
diff --git a/arch/mips/lib/strlen_user.S b/arch/mips/lib/strlen_user.S index eca558d83a37..fdbb970f670d 100644 --- a/arch/mips/lib/strlen_user.S +++ b/arch/mips/lib/strlen_user.S | |||
@@ -24,16 +24,16 @@ | |||
24 | LEAF(__strlen_user_asm) | 24 | LEAF(__strlen_user_asm) |
25 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? | 25 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? |
26 | and v0, a0 | 26 | and v0, a0 |
27 | bnez v0, fault | 27 | bnez v0, .Lfault |
28 | 28 | ||
29 | FEXPORT(__strlen_user_nocheck_asm) | 29 | FEXPORT(__strlen_user_nocheck_asm) |
30 | move v0, a0 | 30 | move v0, a0 |
31 | 1: EX(lb, t0, (v0), fault) | 31 | 1: EX(lb, t0, (v0), .Lfault) |
32 | PTR_ADDIU v0, 1 | 32 | PTR_ADDIU v0, 1 |
33 | bnez t0, 1b | 33 | bnez t0, 1b |
34 | PTR_SUBU v0, a0 | 34 | PTR_SUBU v0, a0 |
35 | jr ra | 35 | jr ra |
36 | END(__strlen_user_asm) | 36 | END(__strlen_user_asm) |
37 | 37 | ||
38 | fault: move v0, zero | 38 | .Lfault: move v0, zero |
39 | jr ra | 39 | jr ra |
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index 8a63f72b81d3..7201b2ff08c8 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S | |||
@@ -30,13 +30,13 @@ | |||
30 | LEAF(__strncpy_from_user_asm) | 30 | LEAF(__strncpy_from_user_asm) |
31 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? | 31 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? |
32 | and v0, a1 | 32 | and v0, a1 |
33 | bnez v0, fault | 33 | bnez v0, .Lfault |
34 | 34 | ||
35 | FEXPORT(__strncpy_from_user_nocheck_asm) | 35 | FEXPORT(__strncpy_from_user_nocheck_asm) |
36 | move v0, zero | 36 | move v0, zero |
37 | move v1, a1 | 37 | move v1, a1 |
38 | .set noreorder | 38 | .set noreorder |
39 | 1: EX(lbu, t0, (v1), fault) | 39 | 1: EX(lbu, t0, (v1), .Lfault) |
40 | PTR_ADDIU v1, 1 | 40 | PTR_ADDIU v1, 1 |
41 | R10KCBARRIER(0(ra)) | 41 | R10KCBARRIER(0(ra)) |
42 | beqz t0, 2f | 42 | beqz t0, 2f |
@@ -47,13 +47,13 @@ FEXPORT(__strncpy_from_user_nocheck_asm) | |||
47 | bne v0, a2, 1b | 47 | bne v0, a2, 1b |
48 | 2: PTR_ADDU t0, a1, v0 | 48 | 2: PTR_ADDU t0, a1, v0 |
49 | xor t0, a1 | 49 | xor t0, a1 |
50 | bltz t0, fault | 50 | bltz t0, .Lfault |
51 | jr ra # return n | 51 | jr ra # return n |
52 | END(__strncpy_from_user_asm) | 52 | END(__strncpy_from_user_asm) |
53 | 53 | ||
54 | fault: li v0, -EFAULT | 54 | .Lfault: li v0, -EFAULT |
55 | jr ra | 55 | jr ra |
56 | 56 | ||
57 | .section __ex_table,"a" | 57 | .section __ex_table,"a" |
58 | PTR 1b, fault | 58 | PTR 1b, .Lfault |
59 | .previous | 59 | .previous |
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index c0ea15194a0e..c768e3000616 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S | |||
@@ -28,18 +28,19 @@ | |||
28 | LEAF(__strnlen_user_asm) | 28 | LEAF(__strnlen_user_asm) |
29 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? | 29 | LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? |
30 | and v0, a0 | 30 | and v0, a0 |
31 | bnez v0, fault | 31 | bnez v0, .Lfault |
32 | 32 | ||
33 | FEXPORT(__strnlen_user_nocheck_asm) | 33 | FEXPORT(__strnlen_user_nocheck_asm) |
34 | move v0, a0 | 34 | move v0, a0 |
35 | PTR_ADDU a1, a0 # stop pointer | 35 | PTR_ADDU a1, a0 # stop pointer |
36 | 1: beq v0, a1, 1f # limit reached? | 36 | 1: beq v0, a1, 1f # limit reached? |
37 | EX(lb, t0, (v0), fault) | 37 | EX(lb, t0, (v0), .Lfault) |
38 | PTR_ADDU v0, 1 | 38 | PTR_ADDU v0, 1 |
39 | bnez t0, 1b | 39 | bnez t0, 1b |
40 | 1: PTR_SUBU v0, a0 | 40 | 1: PTR_SUBU v0, a0 |
41 | jr ra | 41 | jr ra |
42 | END(__strnlen_user_asm) | 42 | END(__strnlen_user_asm) |
43 | 43 | ||
44 | fault: move v0, zero | 44 | .Lfault: |
45 | move v0, zero | ||
45 | jr ra | 46 | jr ra |