aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips
diff options
context:
space:
mode:
Diffstat (limited to 'arch/mips')
-rw-r--r--arch/mips/lib/csum_partial.S214
-rw-r--r--arch/mips/lib/memcpy-inatomic.S116
-rw-r--r--arch/mips/lib/memcpy.S182
-rw-r--r--arch/mips/lib/memset.S28
-rw-r--r--arch/mips/lib/strlen_user.S6
-rw-r--r--arch/mips/lib/strncpy_user.S10
-rw-r--r--arch/mips/lib/strnlen_user.S7
7 files changed, 282 insertions, 281 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 957a82484e3e..8d7784122c14 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -96,13 +96,13 @@ LEAF(csum_partial)
96 move t7, zero 96 move t7, zero
97 97
98 sltiu t8, a1, 0x8 98 sltiu t8, a1, 0x8
99 bnez t8, small_csumcpy /* < 8 bytes to copy */ 99 bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */
100 move t2, a1 100 move t2, a1
101 101
102 andi t7, src, 0x1 /* odd buffer? */ 102 andi t7, src, 0x1 /* odd buffer? */
103 103
104hword_align: 104.Lhword_align:
105 beqz t7, word_align 105 beqz t7, .Lword_align
106 andi t8, src, 0x2 106 andi t8, src, 0x2
107 107
108 lbu t0, (src) 108 lbu t0, (src)
@@ -114,8 +114,8 @@ hword_align:
114 PTR_ADDU src, src, 0x1 114 PTR_ADDU src, src, 0x1
115 andi t8, src, 0x2 115 andi t8, src, 0x2
116 116
117word_align: 117.Lword_align:
118 beqz t8, dword_align 118 beqz t8, .Ldword_align
119 sltiu t8, a1, 56 119 sltiu t8, a1, 56
120 120
121 lhu t0, (src) 121 lhu t0, (src)
@@ -124,12 +124,12 @@ word_align:
124 sltiu t8, a1, 56 124 sltiu t8, a1, 56
125 PTR_ADDU src, src, 0x2 125 PTR_ADDU src, src, 0x2
126 126
127dword_align: 127.Ldword_align:
128 bnez t8, do_end_words 128 bnez t8, .Ldo_end_words
129 move t8, a1 129 move t8, a1
130 130
131 andi t8, src, 0x4 131 andi t8, src, 0x4
132 beqz t8, qword_align 132 beqz t8, .Lqword_align
133 andi t8, src, 0x8 133 andi t8, src, 0x8
134 134
135 lw t0, 0x00(src) 135 lw t0, 0x00(src)
@@ -138,8 +138,8 @@ dword_align:
138 PTR_ADDU src, src, 0x4 138 PTR_ADDU src, src, 0x4
139 andi t8, src, 0x8 139 andi t8, src, 0x8
140 140
141qword_align: 141.Lqword_align:
142 beqz t8, oword_align 142 beqz t8, .Loword_align
143 andi t8, src, 0x10 143 andi t8, src, 0x10
144 144
145#ifdef USE_DOUBLE 145#ifdef USE_DOUBLE
@@ -156,8 +156,8 @@ qword_align:
156 PTR_ADDU src, src, 0x8 156 PTR_ADDU src, src, 0x8
157 andi t8, src, 0x10 157 andi t8, src, 0x10
158 158
159oword_align: 159.Loword_align:
160 beqz t8, begin_movement 160 beqz t8, .Lbegin_movement
161 LONG_SRL t8, a1, 0x7 161 LONG_SRL t8, a1, 0x7
162 162
163#ifdef USE_DOUBLE 163#ifdef USE_DOUBLE
@@ -172,11 +172,11 @@ oword_align:
172 PTR_ADDU src, src, 0x10 172 PTR_ADDU src, src, 0x10
173 LONG_SRL t8, a1, 0x7 173 LONG_SRL t8, a1, 0x7
174 174
175begin_movement: 175.Lbegin_movement:
176 beqz t8, 1f 176 beqz t8, 1f
177 andi t2, a1, 0x40 177 andi t2, a1, 0x40
178 178
179move_128bytes: 179.Lmove_128bytes:
180 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 180 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
181 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) 181 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
182 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) 182 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
@@ -184,43 +184,43 @@ move_128bytes:
184 LONG_SUBU t8, t8, 0x01 184 LONG_SUBU t8, t8, 0x01
185 .set reorder /* DADDI_WAR */ 185 .set reorder /* DADDI_WAR */
186 PTR_ADDU src, src, 0x80 186 PTR_ADDU src, src, 0x80
187 bnez t8, move_128bytes 187 bnez t8, .Lmove_128bytes
188 .set noreorder 188 .set noreorder
189 189
1901: 1901:
191 beqz t2, 1f 191 beqz t2, 1f
192 andi t2, a1, 0x20 192 andi t2, a1, 0x20
193 193
194move_64bytes: 194.Lmove_64bytes:
195 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 195 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
196 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) 196 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
197 PTR_ADDU src, src, 0x40 197 PTR_ADDU src, src, 0x40
198 198
1991: 1991:
200 beqz t2, do_end_words 200 beqz t2, .Ldo_end_words
201 andi t8, a1, 0x1c 201 andi t8, a1, 0x1c
202 202
203move_32bytes: 203.Lmove_32bytes:
204 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) 204 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
205 andi t8, a1, 0x1c 205 andi t8, a1, 0x1c
206 PTR_ADDU src, src, 0x20 206 PTR_ADDU src, src, 0x20
207 207
208do_end_words: 208.Ldo_end_words:
209 beqz t8, small_csumcpy 209 beqz t8, .Lsmall_csumcpy
210 andi t2, a1, 0x3 210 andi t2, a1, 0x3
211 LONG_SRL t8, t8, 0x2 211 LONG_SRL t8, t8, 0x2
212 212
213end_words: 213.Lend_words:
214 lw t0, (src) 214 lw t0, (src)
215 LONG_SUBU t8, t8, 0x1 215 LONG_SUBU t8, t8, 0x1
216 ADDC(sum, t0) 216 ADDC(sum, t0)
217 .set reorder /* DADDI_WAR */ 217 .set reorder /* DADDI_WAR */
218 PTR_ADDU src, src, 0x4 218 PTR_ADDU src, src, 0x4
219 bnez t8, end_words 219 bnez t8, .Lend_words
220 .set noreorder 220 .set noreorder
221 221
222/* unknown src alignment and < 8 bytes to go */ 222/* unknown src alignment and < 8 bytes to go */
223small_csumcpy: 223.Lsmall_csumcpy:
224 move a1, t2 224 move a1, t2
225 225
226 andi t0, a1, 4 226 andi t0, a1, 4
@@ -413,48 +413,48 @@ FEXPORT(csum_partial_copy_nocheck)
413 */ 413 */
414 sltu t2, len, NBYTES 414 sltu t2, len, NBYTES
415 and t1, dst, ADDRMASK 415 and t1, dst, ADDRMASK
416 bnez t2, copy_bytes_checklen 416 bnez t2, .Lcopy_bytes_checklen
417 and t0, src, ADDRMASK 417 and t0, src, ADDRMASK
418 andi odd, dst, 0x1 /* odd buffer? */ 418 andi odd, dst, 0x1 /* odd buffer? */
419 bnez t1, dst_unaligned 419 bnez t1, .Ldst_unaligned
420 nop 420 nop
421 bnez t0, src_unaligned_dst_aligned 421 bnez t0, .Lsrc_unaligned_dst_aligned
422 /* 422 /*
423 * use delay slot for fall-through 423 * use delay slot for fall-through
424 * src and dst are aligned; need to compute rem 424 * src and dst are aligned; need to compute rem
425 */ 425 */
426both_aligned: 426.Lboth_aligned:
427 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter 427 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
428 beqz t0, cleanup_both_aligned # len < 8*NBYTES 428 beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES
429 nop 429 nop
430 SUB len, 8*NBYTES # subtract here for bgez loop 430 SUB len, 8*NBYTES # subtract here for bgez loop
431 .align 4 431 .align 4
4321: 4321:
433EXC( LOAD t0, UNIT(0)(src), l_exc) 433EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
434EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 434EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
435EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 435EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
436EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 436EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
437EXC( LOAD t4, UNIT(4)(src), l_exc_copy) 437EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy)
438EXC( LOAD t5, UNIT(5)(src), l_exc_copy) 438EXC( LOAD t5, UNIT(5)(src), .Ll_exc_copy)
439EXC( LOAD t6, UNIT(6)(src), l_exc_copy) 439EXC( LOAD t6, UNIT(6)(src), .Ll_exc_copy)
440EXC( LOAD t7, UNIT(7)(src), l_exc_copy) 440EXC( LOAD t7, UNIT(7)(src), .Ll_exc_copy)
441 SUB len, len, 8*NBYTES 441 SUB len, len, 8*NBYTES
442 ADD src, src, 8*NBYTES 442 ADD src, src, 8*NBYTES
443EXC( STORE t0, UNIT(0)(dst), s_exc) 443EXC( STORE t0, UNIT(0)(dst), .Ls_exc)
444 ADDC(sum, t0) 444 ADDC(sum, t0)
445EXC( STORE t1, UNIT(1)(dst), s_exc) 445EXC( STORE t1, UNIT(1)(dst), .Ls_exc)
446 ADDC(sum, t1) 446 ADDC(sum, t1)
447EXC( STORE t2, UNIT(2)(dst), s_exc) 447EXC( STORE t2, UNIT(2)(dst), .Ls_exc)
448 ADDC(sum, t2) 448 ADDC(sum, t2)
449EXC( STORE t3, UNIT(3)(dst), s_exc) 449EXC( STORE t3, UNIT(3)(dst), .Ls_exc)
450 ADDC(sum, t3) 450 ADDC(sum, t3)
451EXC( STORE t4, UNIT(4)(dst), s_exc) 451EXC( STORE t4, UNIT(4)(dst), .Ls_exc)
452 ADDC(sum, t4) 452 ADDC(sum, t4)
453EXC( STORE t5, UNIT(5)(dst), s_exc) 453EXC( STORE t5, UNIT(5)(dst), .Ls_exc)
454 ADDC(sum, t5) 454 ADDC(sum, t5)
455EXC( STORE t6, UNIT(6)(dst), s_exc) 455EXC( STORE t6, UNIT(6)(dst), .Ls_exc)
456 ADDC(sum, t6) 456 ADDC(sum, t6)
457EXC( STORE t7, UNIT(7)(dst), s_exc) 457EXC( STORE t7, UNIT(7)(dst), .Ls_exc)
458 ADDC(sum, t7) 458 ADDC(sum, t7)
459 .set reorder /* DADDI_WAR */ 459 .set reorder /* DADDI_WAR */
460 ADD dst, dst, 8*NBYTES 460 ADD dst, dst, 8*NBYTES
@@ -465,44 +465,44 @@ EXC( STORE t7, UNIT(7)(dst), s_exc)
465 /* 465 /*
466 * len == the number of bytes left to copy < 8*NBYTES 466 * len == the number of bytes left to copy < 8*NBYTES
467 */ 467 */
468cleanup_both_aligned: 468.Lcleanup_both_aligned:
469#define rem t7 469#define rem t7
470 beqz len, done 470 beqz len, .Ldone
471 sltu t0, len, 4*NBYTES 471 sltu t0, len, 4*NBYTES
472 bnez t0, less_than_4units 472 bnez t0, .Lless_than_4units
473 and rem, len, (NBYTES-1) # rem = len % NBYTES 473 and rem, len, (NBYTES-1) # rem = len % NBYTES
474 /* 474 /*
475 * len >= 4*NBYTES 475 * len >= 4*NBYTES
476 */ 476 */
477EXC( LOAD t0, UNIT(0)(src), l_exc) 477EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
478EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 478EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
479EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 479EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
480EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 480EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
481 SUB len, len, 4*NBYTES 481 SUB len, len, 4*NBYTES
482 ADD src, src, 4*NBYTES 482 ADD src, src, 4*NBYTES
483EXC( STORE t0, UNIT(0)(dst), s_exc) 483EXC( STORE t0, UNIT(0)(dst), .Ls_exc)
484 ADDC(sum, t0) 484 ADDC(sum, t0)
485EXC( STORE t1, UNIT(1)(dst), s_exc) 485EXC( STORE t1, UNIT(1)(dst), .Ls_exc)
486 ADDC(sum, t1) 486 ADDC(sum, t1)
487EXC( STORE t2, UNIT(2)(dst), s_exc) 487EXC( STORE t2, UNIT(2)(dst), .Ls_exc)
488 ADDC(sum, t2) 488 ADDC(sum, t2)
489EXC( STORE t3, UNIT(3)(dst), s_exc) 489EXC( STORE t3, UNIT(3)(dst), .Ls_exc)
490 ADDC(sum, t3) 490 ADDC(sum, t3)
491 .set reorder /* DADDI_WAR */ 491 .set reorder /* DADDI_WAR */
492 ADD dst, dst, 4*NBYTES 492 ADD dst, dst, 4*NBYTES
493 beqz len, done 493 beqz len, .Ldone
494 .set noreorder 494 .set noreorder
495less_than_4units: 495.Lless_than_4units:
496 /* 496 /*
497 * rem = len % NBYTES 497 * rem = len % NBYTES
498 */ 498 */
499 beq rem, len, copy_bytes 499 beq rem, len, .Lcopy_bytes
500 nop 500 nop
5011: 5011:
502EXC( LOAD t0, 0(src), l_exc) 502EXC( LOAD t0, 0(src), .Ll_exc)
503 ADD src, src, NBYTES 503 ADD src, src, NBYTES
504 SUB len, len, NBYTES 504 SUB len, len, NBYTES
505EXC( STORE t0, 0(dst), s_exc) 505EXC( STORE t0, 0(dst), .Ls_exc)
506 ADDC(sum, t0) 506 ADDC(sum, t0)
507 .set reorder /* DADDI_WAR */ 507 .set reorder /* DADDI_WAR */
508 ADD dst, dst, NBYTES 508 ADD dst, dst, NBYTES
@@ -521,20 +521,20 @@ EXC( STORE t0, 0(dst), s_exc)
521 * more instruction-level parallelism. 521 * more instruction-level parallelism.
522 */ 522 */
523#define bits t2 523#define bits t2
524 beqz len, done 524 beqz len, .Ldone
525 ADD t1, dst, len # t1 is just past last byte of dst 525 ADD t1, dst, len # t1 is just past last byte of dst
526 li bits, 8*NBYTES 526 li bits, 8*NBYTES
527 SLL rem, len, 3 # rem = number of bits to keep 527 SLL rem, len, 3 # rem = number of bits to keep
528EXC( LOAD t0, 0(src), l_exc) 528EXC( LOAD t0, 0(src), .Ll_exc)
529 SUB bits, bits, rem # bits = number of bits to discard 529 SUB bits, bits, rem # bits = number of bits to discard
530 SHIFT_DISCARD t0, t0, bits 530 SHIFT_DISCARD t0, t0, bits
531EXC( STREST t0, -1(t1), s_exc) 531EXC( STREST t0, -1(t1), .Ls_exc)
532 SHIFT_DISCARD_REVERT t0, t0, bits 532 SHIFT_DISCARD_REVERT t0, t0, bits
533 .set reorder 533 .set reorder
534 ADDC(sum, t0) 534 ADDC(sum, t0)
535 b done 535 b .Ldone
536 .set noreorder 536 .set noreorder
537dst_unaligned: 537.Ldst_unaligned:
538 /* 538 /*
539 * dst is unaligned 539 * dst is unaligned
540 * t0 = src & ADDRMASK 540 * t0 = src & ADDRMASK
@@ -545,25 +545,25 @@ dst_unaligned:
545 * Set match = (src and dst have same alignment) 545 * Set match = (src and dst have same alignment)
546 */ 546 */
547#define match rem 547#define match rem
548EXC( LDFIRST t3, FIRST(0)(src), l_exc) 548EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc)
549 ADD t2, zero, NBYTES 549 ADD t2, zero, NBYTES
550EXC( LDREST t3, REST(0)(src), l_exc_copy) 550EXC( LDREST t3, REST(0)(src), .Ll_exc_copy)
551 SUB t2, t2, t1 # t2 = number of bytes copied 551 SUB t2, t2, t1 # t2 = number of bytes copied
552 xor match, t0, t1 552 xor match, t0, t1
553EXC( STFIRST t3, FIRST(0)(dst), s_exc) 553EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc)
554 SLL t4, t1, 3 # t4 = number of bits to discard 554 SLL t4, t1, 3 # t4 = number of bits to discard
555 SHIFT_DISCARD t3, t3, t4 555 SHIFT_DISCARD t3, t3, t4
556 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ 556 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
557 ADDC(sum, t3) 557 ADDC(sum, t3)
558 beq len, t2, done 558 beq len, t2, .Ldone
559 SUB len, len, t2 559 SUB len, len, t2
560 ADD dst, dst, t2 560 ADD dst, dst, t2
561 beqz match, both_aligned 561 beqz match, .Lboth_aligned
562 ADD src, src, t2 562 ADD src, src, t2
563 563
564src_unaligned_dst_aligned: 564.Lsrc_unaligned_dst_aligned:
565 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 565 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
566 beqz t0, cleanup_src_unaligned 566 beqz t0, .Lcleanup_src_unaligned
567 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 567 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
5681: 5681:
569/* 569/*
@@ -572,53 +572,53 @@ src_unaligned_dst_aligned:
572 * It's OK to load FIRST(N+1) before REST(N) because the two addresses 572 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
573 * are to the same unit (unless src is aligned, but it's not). 573 * are to the same unit (unless src is aligned, but it's not).
574 */ 574 */
575EXC( LDFIRST t0, FIRST(0)(src), l_exc) 575EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
576EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) 576EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy)
577 SUB len, len, 4*NBYTES 577 SUB len, len, 4*NBYTES
578EXC( LDREST t0, REST(0)(src), l_exc_copy) 578EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
579EXC( LDREST t1, REST(1)(src), l_exc_copy) 579EXC( LDREST t1, REST(1)(src), .Ll_exc_copy)
580EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) 580EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy)
581EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) 581EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy)
582EXC( LDREST t2, REST(2)(src), l_exc_copy) 582EXC( LDREST t2, REST(2)(src), .Ll_exc_copy)
583EXC( LDREST t3, REST(3)(src), l_exc_copy) 583EXC( LDREST t3, REST(3)(src), .Ll_exc_copy)
584 ADD src, src, 4*NBYTES 584 ADD src, src, 4*NBYTES
585#ifdef CONFIG_CPU_SB1 585#ifdef CONFIG_CPU_SB1
586 nop # improves slotting 586 nop # improves slotting
587#endif 587#endif
588EXC( STORE t0, UNIT(0)(dst), s_exc) 588EXC( STORE t0, UNIT(0)(dst), .Ls_exc)
589 ADDC(sum, t0) 589 ADDC(sum, t0)
590EXC( STORE t1, UNIT(1)(dst), s_exc) 590EXC( STORE t1, UNIT(1)(dst), .Ls_exc)
591 ADDC(sum, t1) 591 ADDC(sum, t1)
592EXC( STORE t2, UNIT(2)(dst), s_exc) 592EXC( STORE t2, UNIT(2)(dst), .Ls_exc)
593 ADDC(sum, t2) 593 ADDC(sum, t2)
594EXC( STORE t3, UNIT(3)(dst), s_exc) 594EXC( STORE t3, UNIT(3)(dst), .Ls_exc)
595 ADDC(sum, t3) 595 ADDC(sum, t3)
596 .set reorder /* DADDI_WAR */ 596 .set reorder /* DADDI_WAR */
597 ADD dst, dst, 4*NBYTES 597 ADD dst, dst, 4*NBYTES
598 bne len, rem, 1b 598 bne len, rem, 1b
599 .set noreorder 599 .set noreorder
600 600
601cleanup_src_unaligned: 601.Lcleanup_src_unaligned:
602 beqz len, done 602 beqz len, .Ldone
603 and rem, len, NBYTES-1 # rem = len % NBYTES 603 and rem, len, NBYTES-1 # rem = len % NBYTES
604 beq rem, len, copy_bytes 604 beq rem, len, .Lcopy_bytes
605 nop 605 nop
6061: 6061:
607EXC( LDFIRST t0, FIRST(0)(src), l_exc) 607EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
608EXC( LDREST t0, REST(0)(src), l_exc_copy) 608EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
609 ADD src, src, NBYTES 609 ADD src, src, NBYTES
610 SUB len, len, NBYTES 610 SUB len, len, NBYTES
611EXC( STORE t0, 0(dst), s_exc) 611EXC( STORE t0, 0(dst), .Ls_exc)
612 ADDC(sum, t0) 612 ADDC(sum, t0)
613 .set reorder /* DADDI_WAR */ 613 .set reorder /* DADDI_WAR */
614 ADD dst, dst, NBYTES 614 ADD dst, dst, NBYTES
615 bne len, rem, 1b 615 bne len, rem, 1b
616 .set noreorder 616 .set noreorder
617 617
618copy_bytes_checklen: 618.Lcopy_bytes_checklen:
619 beqz len, done 619 beqz len, .Ldone
620 nop 620 nop
621copy_bytes: 621.Lcopy_bytes:
622 /* 0 < len < NBYTES */ 622 /* 0 < len < NBYTES */
623#ifdef CONFIG_CPU_LITTLE_ENDIAN 623#ifdef CONFIG_CPU_LITTLE_ENDIAN
624#define SHIFT_START 0 624#define SHIFT_START 0
@@ -629,14 +629,14 @@ copy_bytes:
629#endif 629#endif
630 move t2, zero # partial word 630 move t2, zero # partial word
631 li t3, SHIFT_START # shift 631 li t3, SHIFT_START # shift
632/* use l_exc_copy here to return correct sum on fault */ 632/* use .Ll_exc_copy here to return correct sum on fault */
633#define COPY_BYTE(N) \ 633#define COPY_BYTE(N) \
634EXC( lbu t0, N(src), l_exc_copy); \ 634EXC( lbu t0, N(src), .Ll_exc_copy); \
635 SUB len, len, 1; \ 635 SUB len, len, 1; \
636EXC( sb t0, N(dst), s_exc); \ 636EXC( sb t0, N(dst), .Ls_exc); \
637 SLLV t0, t0, t3; \ 637 SLLV t0, t0, t3; \
638 addu t3, SHIFT_INC; \ 638 addu t3, SHIFT_INC; \
639 beqz len, copy_bytes_done; \ 639 beqz len, .Lcopy_bytes_done; \
640 or t2, t0 640 or t2, t0
641 641
642 COPY_BYTE(0) 642 COPY_BYTE(0)
@@ -647,14 +647,14 @@ EXC( sb t0, N(dst), s_exc); \
647 COPY_BYTE(4) 647 COPY_BYTE(4)
648 COPY_BYTE(5) 648 COPY_BYTE(5)
649#endif 649#endif
650EXC( lbu t0, NBYTES-2(src), l_exc_copy) 650EXC( lbu t0, NBYTES-2(src), .Ll_exc_copy)
651 SUB len, len, 1 651 SUB len, len, 1
652EXC( sb t0, NBYTES-2(dst), s_exc) 652EXC( sb t0, NBYTES-2(dst), .Ls_exc)
653 SLLV t0, t0, t3 653 SLLV t0, t0, t3
654 or t2, t0 654 or t2, t0
655copy_bytes_done: 655.Lcopy_bytes_done:
656 ADDC(sum, t2) 656 ADDC(sum, t2)
657done: 657.Ldone:
658 /* fold checksum */ 658 /* fold checksum */
659 .set push 659 .set push
660 .set noat 660 .set noat
@@ -685,7 +685,7 @@ done:
685 jr ra 685 jr ra
686 .set noreorder 686 .set noreorder
687 687
688l_exc_copy: 688.Ll_exc_copy:
689 /* 689 /*
690 * Copy bytes from src until faulting load address (or until a 690 * Copy bytes from src until faulting load address (or until a
691 * lb faults) 691 * lb faults)
@@ -700,7 +700,7 @@ l_exc_copy:
700 li t2, SHIFT_START 700 li t2, SHIFT_START
701 LOAD t0, THREAD_BUADDR(t0) 701 LOAD t0, THREAD_BUADDR(t0)
7021: 7021:
703EXC( lbu t1, 0(src), l_exc) 703EXC( lbu t1, 0(src), .Ll_exc)
704 ADD src, src, 1 704 ADD src, src, 1
705 sb t1, 0(dst) # can't fault -- we're copy_from_user 705 sb t1, 0(dst) # can't fault -- we're copy_from_user
706 SLLV t1, t1, t2 706 SLLV t1, t1, t2
@@ -710,7 +710,7 @@ EXC( lbu t1, 0(src), l_exc)
710 ADD dst, dst, 1 710 ADD dst, dst, 1
711 bne src, t0, 1b 711 bne src, t0, 1b
712 .set noreorder 712 .set noreorder
713l_exc: 713.Ll_exc:
714 LOAD t0, TI_TASK($28) 714 LOAD t0, TI_TASK($28)
715 nop 715 nop
716 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address 716 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address
@@ -729,7 +729,7 @@ l_exc:
729 */ 729 */
730 .set reorder /* DADDI_WAR */ 730 .set reorder /* DADDI_WAR */
731 SUB src, len, 1 731 SUB src, len, 1
732 beqz len, done 732 beqz len, .Ldone
733 .set noreorder 733 .set noreorder
7341: sb zero, 0(dst) 7341: sb zero, 0(dst)
735 ADD dst, dst, 1 735 ADD dst, dst, 1
@@ -744,10 +744,10 @@ l_exc:
744 SUB src, src, v1 744 SUB src, src, v1
745#endif 745#endif
746 li v1, -EFAULT 746 li v1, -EFAULT
747 b done 747 b .Ldone
748 sw v1, (errptr) 748 sw v1, (errptr)
749 749
750s_exc: 750.Ls_exc:
751 li v0, -1 /* invalid checksum */ 751 li v0, -1 /* invalid checksum */
752 li v1, -EFAULT 752 li v1, -EFAULT
753 jr ra 753 jr ra
diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S
index d1b08f5d6860..736d0fb56a94 100644
--- a/arch/mips/lib/memcpy-inatomic.S
+++ b/arch/mips/lib/memcpy-inatomic.S
@@ -209,36 +209,36 @@ LEAF(__copy_user_inatomic)
209 and t1, dst, ADDRMASK 209 and t1, dst, ADDRMASK
210 PREF( 0, 1*32(src) ) 210 PREF( 0, 1*32(src) )
211 PREF( 1, 1*32(dst) ) 211 PREF( 1, 1*32(dst) )
212 bnez t2, copy_bytes_checklen 212 bnez t2, .Lcopy_bytes_checklen
213 and t0, src, ADDRMASK 213 and t0, src, ADDRMASK
214 PREF( 0, 2*32(src) ) 214 PREF( 0, 2*32(src) )
215 PREF( 1, 2*32(dst) ) 215 PREF( 1, 2*32(dst) )
216 bnez t1, dst_unaligned 216 bnez t1, .Ldst_unaligned
217 nop 217 nop
218 bnez t0, src_unaligned_dst_aligned 218 bnez t0, .Lsrc_unaligned_dst_aligned
219 /* 219 /*
220 * use delay slot for fall-through 220 * use delay slot for fall-through
221 * src and dst are aligned; need to compute rem 221 * src and dst are aligned; need to compute rem
222 */ 222 */
223both_aligned: 223.Lboth_aligned:
224 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter 224 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
225 beqz t0, cleanup_both_aligned # len < 8*NBYTES 225 beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES
226 and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) 226 and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES)
227 PREF( 0, 3*32(src) ) 227 PREF( 0, 3*32(src) )
228 PREF( 1, 3*32(dst) ) 228 PREF( 1, 3*32(dst) )
229 .align 4 229 .align 4
2301: 2301:
231EXC( LOAD t0, UNIT(0)(src), l_exc) 231EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
232EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 232EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
233EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 233EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
234EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 234EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
235 SUB len, len, 8*NBYTES 235 SUB len, len, 8*NBYTES
236EXC( LOAD t4, UNIT(4)(src), l_exc_copy) 236EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy)
237EXC( LOAD t7, UNIT(5)(src), l_exc_copy) 237EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy)
238 STORE t0, UNIT(0)(dst) 238 STORE t0, UNIT(0)(dst)
239 STORE t1, UNIT(1)(dst) 239 STORE t1, UNIT(1)(dst)
240EXC( LOAD t0, UNIT(6)(src), l_exc_copy) 240EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy)
241EXC( LOAD t1, UNIT(7)(src), l_exc_copy) 241EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy)
242 ADD src, src, 8*NBYTES 242 ADD src, src, 8*NBYTES
243 ADD dst, dst, 8*NBYTES 243 ADD dst, dst, 8*NBYTES
244 STORE t2, UNIT(-6)(dst) 244 STORE t2, UNIT(-6)(dst)
@@ -255,18 +255,18 @@ EXC( LOAD t1, UNIT(7)(src), l_exc_copy)
255 /* 255 /*
256 * len == rem == the number of bytes left to copy < 8*NBYTES 256 * len == rem == the number of bytes left to copy < 8*NBYTES
257 */ 257 */
258cleanup_both_aligned: 258.Lcleanup_both_aligned:
259 beqz len, done 259 beqz len, .Ldone
260 sltu t0, len, 4*NBYTES 260 sltu t0, len, 4*NBYTES
261 bnez t0, less_than_4units 261 bnez t0, .Lless_than_4units
262 and rem, len, (NBYTES-1) # rem = len % NBYTES 262 and rem, len, (NBYTES-1) # rem = len % NBYTES
263 /* 263 /*
264 * len >= 4*NBYTES 264 * len >= 4*NBYTES
265 */ 265 */
266EXC( LOAD t0, UNIT(0)(src), l_exc) 266EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
267EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 267EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
268EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 268EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
269EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 269EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
270 SUB len, len, 4*NBYTES 270 SUB len, len, 4*NBYTES
271 ADD src, src, 4*NBYTES 271 ADD src, src, 4*NBYTES
272 STORE t0, UNIT(0)(dst) 272 STORE t0, UNIT(0)(dst)
@@ -275,16 +275,16 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
275 STORE t3, UNIT(3)(dst) 275 STORE t3, UNIT(3)(dst)
276 .set reorder /* DADDI_WAR */ 276 .set reorder /* DADDI_WAR */
277 ADD dst, dst, 4*NBYTES 277 ADD dst, dst, 4*NBYTES
278 beqz len, done 278 beqz len, .Ldone
279 .set noreorder 279 .set noreorder
280less_than_4units: 280.Lless_than_4units:
281 /* 281 /*
282 * rem = len % NBYTES 282 * rem = len % NBYTES
283 */ 283 */
284 beq rem, len, copy_bytes 284 beq rem, len, .Lcopy_bytes
285 nop 285 nop
2861: 2861:
287EXC( LOAD t0, 0(src), l_exc) 287EXC( LOAD t0, 0(src), .Ll_exc)
288 ADD src, src, NBYTES 288 ADD src, src, NBYTES
289 SUB len, len, NBYTES 289 SUB len, len, NBYTES
290 STORE t0, 0(dst) 290 STORE t0, 0(dst)
@@ -305,17 +305,17 @@ EXC( LOAD t0, 0(src), l_exc)
305 * more instruction-level parallelism. 305 * more instruction-level parallelism.
306 */ 306 */
307#define bits t2 307#define bits t2
308 beqz len, done 308 beqz len, .Ldone
309 ADD t1, dst, len # t1 is just past last byte of dst 309 ADD t1, dst, len # t1 is just past last byte of dst
310 li bits, 8*NBYTES 310 li bits, 8*NBYTES
311 SLL rem, len, 3 # rem = number of bits to keep 311 SLL rem, len, 3 # rem = number of bits to keep
312EXC( LOAD t0, 0(src), l_exc) 312EXC( LOAD t0, 0(src), .Ll_exc)
313 SUB bits, bits, rem # bits = number of bits to discard 313 SUB bits, bits, rem # bits = number of bits to discard
314 SHIFT_DISCARD t0, t0, bits 314 SHIFT_DISCARD t0, t0, bits
315 STREST t0, -1(t1) 315 STREST t0, -1(t1)
316 jr ra 316 jr ra
317 move len, zero 317 move len, zero
318dst_unaligned: 318.Ldst_unaligned:
319 /* 319 /*
320 * dst is unaligned 320 * dst is unaligned
321 * t0 = src & ADDRMASK 321 * t0 = src & ADDRMASK
@@ -326,22 +326,22 @@ dst_unaligned:
326 * Set match = (src and dst have same alignment) 326 * Set match = (src and dst have same alignment)
327 */ 327 */
328#define match rem 328#define match rem
329EXC( LDFIRST t3, FIRST(0)(src), l_exc) 329EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc)
330 ADD t2, zero, NBYTES 330 ADD t2, zero, NBYTES
331EXC( LDREST t3, REST(0)(src), l_exc_copy) 331EXC( LDREST t3, REST(0)(src), .Ll_exc_copy)
332 SUB t2, t2, t1 # t2 = number of bytes copied 332 SUB t2, t2, t1 # t2 = number of bytes copied
333 xor match, t0, t1 333 xor match, t0, t1
334 STFIRST t3, FIRST(0)(dst) 334 STFIRST t3, FIRST(0)(dst)
335 beq len, t2, done 335 beq len, t2, .Ldone
336 SUB len, len, t2 336 SUB len, len, t2
337 ADD dst, dst, t2 337 ADD dst, dst, t2
338 beqz match, both_aligned 338 beqz match, .Lboth_aligned
339 ADD src, src, t2 339 ADD src, src, t2
340 340
341src_unaligned_dst_aligned: 341.Lsrc_unaligned_dst_aligned:
342 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 342 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
343 PREF( 0, 3*32(src) ) 343 PREF( 0, 3*32(src) )
344 beqz t0, cleanup_src_unaligned 344 beqz t0, .Lcleanup_src_unaligned
345 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 345 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
346 PREF( 1, 3*32(dst) ) 346 PREF( 1, 3*32(dst) )
3471: 3471:
@@ -351,15 +351,15 @@ src_unaligned_dst_aligned:
351 * It's OK to load FIRST(N+1) before REST(N) because the two addresses 351 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
352 * are to the same unit (unless src is aligned, but it's not). 352 * are to the same unit (unless src is aligned, but it's not).
353 */ 353 */
354EXC( LDFIRST t0, FIRST(0)(src), l_exc) 354EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
355EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) 355EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy)
356 SUB len, len, 4*NBYTES 356 SUB len, len, 4*NBYTES
357EXC( LDREST t0, REST(0)(src), l_exc_copy) 357EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
358EXC( LDREST t1, REST(1)(src), l_exc_copy) 358EXC( LDREST t1, REST(1)(src), .Ll_exc_copy)
359EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) 359EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy)
360EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) 360EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy)
361EXC( LDREST t2, REST(2)(src), l_exc_copy) 361EXC( LDREST t2, REST(2)(src), .Ll_exc_copy)
362EXC( LDREST t3, REST(3)(src), l_exc_copy) 362EXC( LDREST t3, REST(3)(src), .Ll_exc_copy)
363 PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) 363 PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed)
364 ADD src, src, 4*NBYTES 364 ADD src, src, 4*NBYTES
365#ifdef CONFIG_CPU_SB1 365#ifdef CONFIG_CPU_SB1
@@ -375,14 +375,14 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy)
375 bne len, rem, 1b 375 bne len, rem, 1b
376 .set noreorder 376 .set noreorder
377 377
378cleanup_src_unaligned: 378.Lcleanup_src_unaligned:
379 beqz len, done 379 beqz len, .Ldone
380 and rem, len, NBYTES-1 # rem = len % NBYTES 380 and rem, len, NBYTES-1 # rem = len % NBYTES
381 beq rem, len, copy_bytes 381 beq rem, len, .Lcopy_bytes
382 nop 382 nop
3831: 3831:
384EXC( LDFIRST t0, FIRST(0)(src), l_exc) 384EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
385EXC( LDREST t0, REST(0)(src), l_exc_copy) 385EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
386 ADD src, src, NBYTES 386 ADD src, src, NBYTES
387 SUB len, len, NBYTES 387 SUB len, len, NBYTES
388 STORE t0, 0(dst) 388 STORE t0, 0(dst)
@@ -391,15 +391,15 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
391 bne len, rem, 1b 391 bne len, rem, 1b
392 .set noreorder 392 .set noreorder
393 393
394copy_bytes_checklen: 394.Lcopy_bytes_checklen:
395 beqz len, done 395 beqz len, .Ldone
396 nop 396 nop
397copy_bytes: 397.Lcopy_bytes:
398 /* 0 < len < NBYTES */ 398 /* 0 < len < NBYTES */
399#define COPY_BYTE(N) \ 399#define COPY_BYTE(N) \
400EXC( lb t0, N(src), l_exc); \ 400EXC( lb t0, N(src), .Ll_exc); \
401 SUB len, len, 1; \ 401 SUB len, len, 1; \
402 beqz len, done; \ 402 beqz len, .Ldone; \
403 sb t0, N(dst) 403 sb t0, N(dst)
404 404
405 COPY_BYTE(0) 405 COPY_BYTE(0)
@@ -410,16 +410,16 @@ EXC( lb t0, N(src), l_exc); \
410 COPY_BYTE(4) 410 COPY_BYTE(4)
411 COPY_BYTE(5) 411 COPY_BYTE(5)
412#endif 412#endif
413EXC( lb t0, NBYTES-2(src), l_exc) 413EXC( lb t0, NBYTES-2(src), .Ll_exc)
414 SUB len, len, 1 414 SUB len, len, 1
415 jr ra 415 jr ra
416 sb t0, NBYTES-2(dst) 416 sb t0, NBYTES-2(dst)
417done: 417.Ldone:
418 jr ra 418 jr ra
419 nop 419 nop
420 END(__copy_user_inatomic) 420 END(__copy_user_inatomic)
421 421
422l_exc_copy: 422.Ll_exc_copy:
423 /* 423 /*
424 * Copy bytes from src until faulting load address (or until a 424 * Copy bytes from src until faulting load address (or until a
425 * lb faults) 425 * lb faults)
@@ -434,14 +434,14 @@ l_exc_copy:
434 nop 434 nop
435 LOAD t0, THREAD_BUADDR(t0) 435 LOAD t0, THREAD_BUADDR(t0)
4361: 4361:
437EXC( lb t1, 0(src), l_exc) 437EXC( lb t1, 0(src), .Ll_exc)
438 ADD src, src, 1 438 ADD src, src, 1
439 sb t1, 0(dst) # can't fault -- we're copy_from_user 439 sb t1, 0(dst) # can't fault -- we're copy_from_user
440 .set reorder /* DADDI_WAR */ 440 .set reorder /* DADDI_WAR */
441 ADD dst, dst, 1 441 ADD dst, dst, 1
442 bne src, t0, 1b 442 bne src, t0, 1b
443 .set noreorder 443 .set noreorder
444l_exc: 444.Ll_exc:
445 LOAD t0, TI_TASK($28) 445 LOAD t0, TI_TASK($28)
446 nop 446 nop
447 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address 447 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index 01e450b1ebc9..c06cccf60bec 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -191,7 +191,7 @@
191 .align 5 191 .align 5
192LEAF(memcpy) /* a0=dst a1=src a2=len */ 192LEAF(memcpy) /* a0=dst a1=src a2=len */
193 move v0, dst /* return value */ 193 move v0, dst /* return value */
194__memcpy: 194.L__memcpy:
195FEXPORT(__copy_user) 195FEXPORT(__copy_user)
196 /* 196 /*
197 * Note: dst & src may be unaligned, len may be 0 197 * Note: dst & src may be unaligned, len may be 0
@@ -213,45 +213,45 @@ FEXPORT(__copy_user)
213 and t1, dst, ADDRMASK 213 and t1, dst, ADDRMASK
214 PREF( 0, 1*32(src) ) 214 PREF( 0, 1*32(src) )
215 PREF( 1, 1*32(dst) ) 215 PREF( 1, 1*32(dst) )
216 bnez t2, copy_bytes_checklen 216 bnez t2, .Lcopy_bytes_checklen
217 and t0, src, ADDRMASK 217 and t0, src, ADDRMASK
218 PREF( 0, 2*32(src) ) 218 PREF( 0, 2*32(src) )
219 PREF( 1, 2*32(dst) ) 219 PREF( 1, 2*32(dst) )
220 bnez t1, dst_unaligned 220 bnez t1, .Ldst_unaligned
221 nop 221 nop
222 bnez t0, src_unaligned_dst_aligned 222 bnez t0, .Lsrc_unaligned_dst_aligned
223 /* 223 /*
224 * use delay slot for fall-through 224 * use delay slot for fall-through
225 * src and dst are aligned; need to compute rem 225 * src and dst are aligned; need to compute rem
226 */ 226 */
227both_aligned: 227.Lboth_aligned:
228 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter 228 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
229 beqz t0, cleanup_both_aligned # len < 8*NBYTES 229 beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES
230 and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) 230 and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES)
231 PREF( 0, 3*32(src) ) 231 PREF( 0, 3*32(src) )
232 PREF( 1, 3*32(dst) ) 232 PREF( 1, 3*32(dst) )
233 .align 4 233 .align 4
2341: 2341:
235 R10KCBARRIER(0(ra)) 235 R10KCBARRIER(0(ra))
236EXC( LOAD t0, UNIT(0)(src), l_exc) 236EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
237EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 237EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
238EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 238EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
239EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 239EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
240 SUB len, len, 8*NBYTES 240 SUB len, len, 8*NBYTES
241EXC( LOAD t4, UNIT(4)(src), l_exc_copy) 241EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy)
242EXC( LOAD t7, UNIT(5)(src), l_exc_copy) 242EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy)
243EXC( STORE t0, UNIT(0)(dst), s_exc_p8u) 243EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u)
244EXC( STORE t1, UNIT(1)(dst), s_exc_p7u) 244EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u)
245EXC( LOAD t0, UNIT(6)(src), l_exc_copy) 245EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy)
246EXC( LOAD t1, UNIT(7)(src), l_exc_copy) 246EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy)
247 ADD src, src, 8*NBYTES 247 ADD src, src, 8*NBYTES
248 ADD dst, dst, 8*NBYTES 248 ADD dst, dst, 8*NBYTES
249EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) 249EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u)
250EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) 250EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u)
251EXC( STORE t4, UNIT(-4)(dst), s_exc_p4u) 251EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u)
252EXC( STORE t7, UNIT(-3)(dst), s_exc_p3u) 252EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u)
253EXC( STORE t0, UNIT(-2)(dst), s_exc_p2u) 253EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u)
254EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) 254EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u)
255 PREF( 0, 8*32(src) ) 255 PREF( 0, 8*32(src) )
256 PREF( 1, 8*32(dst) ) 256 PREF( 1, 8*32(dst) )
257 bne len, rem, 1b 257 bne len, rem, 1b
@@ -260,41 +260,41 @@ EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u)
260 /* 260 /*
261 * len == rem == the number of bytes left to copy < 8*NBYTES 261 * len == rem == the number of bytes left to copy < 8*NBYTES
262 */ 262 */
263cleanup_both_aligned: 263.Lcleanup_both_aligned:
264 beqz len, done 264 beqz len, .Ldone
265 sltu t0, len, 4*NBYTES 265 sltu t0, len, 4*NBYTES
266 bnez t0, less_than_4units 266 bnez t0, .Lless_than_4units
267 and rem, len, (NBYTES-1) # rem = len % NBYTES 267 and rem, len, (NBYTES-1) # rem = len % NBYTES
268 /* 268 /*
269 * len >= 4*NBYTES 269 * len >= 4*NBYTES
270 */ 270 */
271EXC( LOAD t0, UNIT(0)(src), l_exc) 271EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
272EXC( LOAD t1, UNIT(1)(src), l_exc_copy) 272EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
273EXC( LOAD t2, UNIT(2)(src), l_exc_copy) 273EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
274EXC( LOAD t3, UNIT(3)(src), l_exc_copy) 274EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
275 SUB len, len, 4*NBYTES 275 SUB len, len, 4*NBYTES
276 ADD src, src, 4*NBYTES 276 ADD src, src, 4*NBYTES
277 R10KCBARRIER(0(ra)) 277 R10KCBARRIER(0(ra))
278EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) 278EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u)
279EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 279EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u)
280EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 280EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u)
281EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 281EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u)
282 .set reorder /* DADDI_WAR */ 282 .set reorder /* DADDI_WAR */
283 ADD dst, dst, 4*NBYTES 283 ADD dst, dst, 4*NBYTES
284 beqz len, done 284 beqz len, .Ldone
285 .set noreorder 285 .set noreorder
286less_than_4units: 286.Lless_than_4units:
287 /* 287 /*
288 * rem = len % NBYTES 288 * rem = len % NBYTES
289 */ 289 */
290 beq rem, len, copy_bytes 290 beq rem, len, .Lcopy_bytes
291 nop 291 nop
2921: 2921:
293 R10KCBARRIER(0(ra)) 293 R10KCBARRIER(0(ra))
294EXC( LOAD t0, 0(src), l_exc) 294EXC( LOAD t0, 0(src), .Ll_exc)
295 ADD src, src, NBYTES 295 ADD src, src, NBYTES
296 SUB len, len, NBYTES 296 SUB len, len, NBYTES
297EXC( STORE t0, 0(dst), s_exc_p1u) 297EXC( STORE t0, 0(dst), .Ls_exc_p1u)
298 .set reorder /* DADDI_WAR */ 298 .set reorder /* DADDI_WAR */
299 ADD dst, dst, NBYTES 299 ADD dst, dst, NBYTES
300 bne rem, len, 1b 300 bne rem, len, 1b
@@ -312,17 +312,17 @@ EXC( STORE t0, 0(dst), s_exc_p1u)
312 * more instruction-level parallelism. 312 * more instruction-level parallelism.
313 */ 313 */
314#define bits t2 314#define bits t2
315 beqz len, done 315 beqz len, .Ldone
316 ADD t1, dst, len # t1 is just past last byte of dst 316 ADD t1, dst, len # t1 is just past last byte of dst
317 li bits, 8*NBYTES 317 li bits, 8*NBYTES
318 SLL rem, len, 3 # rem = number of bits to keep 318 SLL rem, len, 3 # rem = number of bits to keep
319EXC( LOAD t0, 0(src), l_exc) 319EXC( LOAD t0, 0(src), .Ll_exc)
320 SUB bits, bits, rem # bits = number of bits to discard 320 SUB bits, bits, rem # bits = number of bits to discard
321 SHIFT_DISCARD t0, t0, bits 321 SHIFT_DISCARD t0, t0, bits
322EXC( STREST t0, -1(t1), s_exc) 322EXC( STREST t0, -1(t1), .Ls_exc)
323 jr ra 323 jr ra
324 move len, zero 324 move len, zero
325dst_unaligned: 325.Ldst_unaligned:
326 /* 326 /*
327 * dst is unaligned 327 * dst is unaligned
328 * t0 = src & ADDRMASK 328 * t0 = src & ADDRMASK
@@ -333,23 +333,23 @@ dst_unaligned:
333 * Set match = (src and dst have same alignment) 333 * Set match = (src and dst have same alignment)
334 */ 334 */
335#define match rem 335#define match rem
336EXC( LDFIRST t3, FIRST(0)(src), l_exc) 336EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc)
337 ADD t2, zero, NBYTES 337 ADD t2, zero, NBYTES
338EXC( LDREST t3, REST(0)(src), l_exc_copy) 338EXC( LDREST t3, REST(0)(src), .Ll_exc_copy)
339 SUB t2, t2, t1 # t2 = number of bytes copied 339 SUB t2, t2, t1 # t2 = number of bytes copied
340 xor match, t0, t1 340 xor match, t0, t1
341 R10KCBARRIER(0(ra)) 341 R10KCBARRIER(0(ra))
342EXC( STFIRST t3, FIRST(0)(dst), s_exc) 342EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc)
343 beq len, t2, done 343 beq len, t2, .Ldone
344 SUB len, len, t2 344 SUB len, len, t2
345 ADD dst, dst, t2 345 ADD dst, dst, t2
346 beqz match, both_aligned 346 beqz match, .Lboth_aligned
347 ADD src, src, t2 347 ADD src, src, t2
348 348
349src_unaligned_dst_aligned: 349.Lsrc_unaligned_dst_aligned:
350 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 350 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
351 PREF( 0, 3*32(src) ) 351 PREF( 0, 3*32(src) )
352 beqz t0, cleanup_src_unaligned 352 beqz t0, .Lcleanup_src_unaligned
353 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 353 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
354 PREF( 1, 3*32(dst) ) 354 PREF( 1, 3*32(dst) )
3551: 3551:
@@ -360,58 +360,58 @@ src_unaligned_dst_aligned:
360 * are to the same unit (unless src is aligned, but it's not). 360 * are to the same unit (unless src is aligned, but it's not).
361 */ 361 */
362 R10KCBARRIER(0(ra)) 362 R10KCBARRIER(0(ra))
363EXC( LDFIRST t0, FIRST(0)(src), l_exc) 363EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
364EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) 364EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy)
365 SUB len, len, 4*NBYTES 365 SUB len, len, 4*NBYTES
366EXC( LDREST t0, REST(0)(src), l_exc_copy) 366EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
367EXC( LDREST t1, REST(1)(src), l_exc_copy) 367EXC( LDREST t1, REST(1)(src), .Ll_exc_copy)
368EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) 368EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy)
369EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) 369EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy)
370EXC( LDREST t2, REST(2)(src), l_exc_copy) 370EXC( LDREST t2, REST(2)(src), .Ll_exc_copy)
371EXC( LDREST t3, REST(3)(src), l_exc_copy) 371EXC( LDREST t3, REST(3)(src), .Ll_exc_copy)
372 PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) 372 PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed)
373 ADD src, src, 4*NBYTES 373 ADD src, src, 4*NBYTES
374#ifdef CONFIG_CPU_SB1 374#ifdef CONFIG_CPU_SB1
375 nop # improves slotting 375 nop # improves slotting
376#endif 376#endif
377EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) 377EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u)
378EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) 378EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u)
379EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) 379EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u)
380EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) 380EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u)
381 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) 381 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
382 .set reorder /* DADDI_WAR */ 382 .set reorder /* DADDI_WAR */
383 ADD dst, dst, 4*NBYTES 383 ADD dst, dst, 4*NBYTES
384 bne len, rem, 1b 384 bne len, rem, 1b
385 .set noreorder 385 .set noreorder
386 386
387cleanup_src_unaligned: 387.Lcleanup_src_unaligned:
388 beqz len, done 388 beqz len, .Ldone
389 and rem, len, NBYTES-1 # rem = len % NBYTES 389 and rem, len, NBYTES-1 # rem = len % NBYTES
390 beq rem, len, copy_bytes 390 beq rem, len, .Lcopy_bytes
391 nop 391 nop
3921: 3921:
393 R10KCBARRIER(0(ra)) 393 R10KCBARRIER(0(ra))
394EXC( LDFIRST t0, FIRST(0)(src), l_exc) 394EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
395EXC( LDREST t0, REST(0)(src), l_exc_copy) 395EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
396 ADD src, src, NBYTES 396 ADD src, src, NBYTES
397 SUB len, len, NBYTES 397 SUB len, len, NBYTES
398EXC( STORE t0, 0(dst), s_exc_p1u) 398EXC( STORE t0, 0(dst), .Ls_exc_p1u)
399 .set reorder /* DADDI_WAR */ 399 .set reorder /* DADDI_WAR */
400 ADD dst, dst, NBYTES 400 ADD dst, dst, NBYTES
401 bne len, rem, 1b 401 bne len, rem, 1b
402 .set noreorder 402 .set noreorder
403 403
404copy_bytes_checklen: 404.Lcopy_bytes_checklen:
405 beqz len, done 405 beqz len, .Ldone
406 nop 406 nop
407copy_bytes: 407.Lcopy_bytes:
408 /* 0 < len < NBYTES */ 408 /* 0 < len < NBYTES */
409 R10KCBARRIER(0(ra)) 409 R10KCBARRIER(0(ra))
410#define COPY_BYTE(N) \ 410#define COPY_BYTE(N) \
411EXC( lb t0, N(src), l_exc); \ 411EXC( lb t0, N(src), .Ll_exc); \
412 SUB len, len, 1; \ 412 SUB len, len, 1; \
413 beqz len, done; \ 413 beqz len, .Ldone; \
414EXC( sb t0, N(dst), s_exc_p1) 414EXC( sb t0, N(dst), .Ls_exc_p1)
415 415
416 COPY_BYTE(0) 416 COPY_BYTE(0)
417 COPY_BYTE(1) 417 COPY_BYTE(1)
@@ -421,16 +421,16 @@ EXC( sb t0, N(dst), s_exc_p1)
421 COPY_BYTE(4) 421 COPY_BYTE(4)
422 COPY_BYTE(5) 422 COPY_BYTE(5)
423#endif 423#endif
424EXC( lb t0, NBYTES-2(src), l_exc) 424EXC( lb t0, NBYTES-2(src), .Ll_exc)
425 SUB len, len, 1 425 SUB len, len, 1
426 jr ra 426 jr ra
427EXC( sb t0, NBYTES-2(dst), s_exc_p1) 427EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1)
428done: 428.Ldone:
429 jr ra 429 jr ra
430 nop 430 nop
431 END(memcpy) 431 END(memcpy)
432 432
433l_exc_copy: 433.Ll_exc_copy:
434 /* 434 /*
435 * Copy bytes from src until faulting load address (or until a 435 * Copy bytes from src until faulting load address (or until a
436 * lb faults) 436 * lb faults)
@@ -445,14 +445,14 @@ l_exc_copy:
445 nop 445 nop
446 LOAD t0, THREAD_BUADDR(t0) 446 LOAD t0, THREAD_BUADDR(t0)
4471: 4471:
448EXC( lb t1, 0(src), l_exc) 448EXC( lb t1, 0(src), .Ll_exc)
449 ADD src, src, 1 449 ADD src, src, 1
450 sb t1, 0(dst) # can't fault -- we're copy_from_user 450 sb t1, 0(dst) # can't fault -- we're copy_from_user
451 .set reorder /* DADDI_WAR */ 451 .set reorder /* DADDI_WAR */
452 ADD dst, dst, 1 452 ADD dst, dst, 1
453 bne src, t0, 1b 453 bne src, t0, 1b
454 .set noreorder 454 .set noreorder
455l_exc: 455.Ll_exc:
456 LOAD t0, TI_TASK($28) 456 LOAD t0, TI_TASK($28)
457 nop 457 nop
458 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address 458 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address
@@ -471,7 +471,7 @@ l_exc:
471 */ 471 */
472 .set reorder /* DADDI_WAR */ 472 .set reorder /* DADDI_WAR */
473 SUB src, len, 1 473 SUB src, len, 1
474 beqz len, done 474 beqz len, .Ldone
475 .set noreorder 475 .set noreorder
4761: sb zero, 0(dst) 4761: sb zero, 0(dst)
477 ADD dst, dst, 1 477 ADD dst, dst, 1
@@ -492,7 +492,7 @@ l_exc:
492 492
493#define SEXC(n) \ 493#define SEXC(n) \
494 .set reorder; /* DADDI_WAR */ \ 494 .set reorder; /* DADDI_WAR */ \
495s_exc_p ## n ## u: \ 495.Ls_exc_p ## n ## u: \
496 ADD len, len, n*NBYTES; \ 496 ADD len, len, n*NBYTES; \
497 jr ra; \ 497 jr ra; \
498 .set noreorder 498 .set noreorder
@@ -506,12 +506,12 @@ SEXC(3)
506SEXC(2) 506SEXC(2)
507SEXC(1) 507SEXC(1)
508 508
509s_exc_p1: 509.Ls_exc_p1:
510 .set reorder /* DADDI_WAR */ 510 .set reorder /* DADDI_WAR */
511 ADD len, len, 1 511 ADD len, len, 1
512 jr ra 512 jr ra
513 .set noreorder 513 .set noreorder
514s_exc: 514.Ls_exc:
515 jr ra 515 jr ra
516 nop 516 nop
517 517
@@ -522,20 +522,20 @@ LEAF(memmove)
522 sltu t0, a1, t0 # dst + len <= src -> memcpy 522 sltu t0, a1, t0 # dst + len <= src -> memcpy
523 sltu t1, a0, t1 # dst >= src + len -> memcpy 523 sltu t1, a0, t1 # dst >= src + len -> memcpy
524 and t0, t1 524 and t0, t1
525 beqz t0, __memcpy 525 beqz t0, .L__memcpy
526 move v0, a0 /* return value */ 526 move v0, a0 /* return value */
527 beqz a2, r_out 527 beqz a2, .Lr_out
528 END(memmove) 528 END(memmove)
529 529
530 /* fall through to __rmemcpy */ 530 /* fall through to __rmemcpy */
531LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ 531LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
532 sltu t0, a1, a0 532 sltu t0, a1, a0
533 beqz t0, r_end_bytes_up # src >= dst 533 beqz t0, .Lr_end_bytes_up # src >= dst
534 nop 534 nop
535 ADD a0, a2 # dst = dst + len 535 ADD a0, a2 # dst = dst + len
536 ADD a1, a2 # src = src + len 536 ADD a1, a2 # src = src + len
537 537
538r_end_bytes: 538.Lr_end_bytes:
539 R10KCBARRIER(0(ra)) 539 R10KCBARRIER(0(ra))
540 lb t0, -1(a1) 540 lb t0, -1(a1)
541 SUB a2, a2, 0x1 541 SUB a2, a2, 0x1
@@ -543,14 +543,14 @@ r_end_bytes:
543 SUB a1, a1, 0x1 543 SUB a1, a1, 0x1
544 .set reorder /* DADDI_WAR */ 544 .set reorder /* DADDI_WAR */
545 SUB a0, a0, 0x1 545 SUB a0, a0, 0x1
546 bnez a2, r_end_bytes 546 bnez a2, .Lr_end_bytes
547 .set noreorder 547 .set noreorder
548 548
549r_out: 549.Lr_out:
550 jr ra 550 jr ra
551 move a2, zero 551 move a2, zero
552 552
553r_end_bytes_up: 553.Lr_end_bytes_up:
554 R10KCBARRIER(0(ra)) 554 R10KCBARRIER(0(ra))
555 lb t0, (a1) 555 lb t0, (a1)
556 SUB a2, a2, 0x1 556 SUB a2, a2, 0x1
@@ -558,7 +558,7 @@ r_end_bytes_up:
558 ADD a1, a1, 0x1 558 ADD a1, a1, 0x1
559 .set reorder /* DADDI_WAR */ 559 .set reorder /* DADDI_WAR */
560 ADD a0, a0, 0x1 560 ADD a0, a0, 0x1
561 bnez a2, r_end_bytes_up 561 bnez a2, .Lr_end_bytes_up
562 .set noreorder 562 .set noreorder
563 563
564 jr ra 564 jr ra
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index c018a4721693..77dc3b20110a 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -72,7 +72,7 @@ LEAF(memset)
72 72
73FEXPORT(__bzero) 73FEXPORT(__bzero)
74 sltiu t0, a2, LONGSIZE /* very small region? */ 74 sltiu t0, a2, LONGSIZE /* very small region? */
75 bnez t0, small_memset 75 bnez t0, .Lsmall_memset
76 andi t0, a0, LONGMASK /* aligned? */ 76 andi t0, a0, LONGMASK /* aligned? */
77 77
78#ifndef CONFIG_CPU_DADDI_WORKAROUNDS 78#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
@@ -88,28 +88,28 @@ FEXPORT(__bzero)
88 88
89 R10KCBARRIER(0(ra)) 89 R10KCBARRIER(0(ra))
90#ifdef __MIPSEB__ 90#ifdef __MIPSEB__
91 EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ 91 EX(LONG_S_L, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */
92#endif 92#endif
93#ifdef __MIPSEL__ 93#ifdef __MIPSEL__
94 EX(LONG_S_R, a1, (a0), first_fixup) /* make word/dword aligned */ 94 EX(LONG_S_R, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */
95#endif 95#endif
96 PTR_SUBU a0, t0 /* long align ptr */ 96 PTR_SUBU a0, t0 /* long align ptr */
97 PTR_ADDU a2, t0 /* correct size */ 97 PTR_ADDU a2, t0 /* correct size */
98 98
991: ori t1, a2, 0x3f /* # of full blocks */ 991: ori t1, a2, 0x3f /* # of full blocks */
100 xori t1, 0x3f 100 xori t1, 0x3f
101 beqz t1, memset_partial /* no block to fill */ 101 beqz t1, .Lmemset_partial /* no block to fill */
102 andi t0, a2, 0x40-LONGSIZE 102 andi t0, a2, 0x40-LONGSIZE
103 103
104 PTR_ADDU t1, a0 /* end address */ 104 PTR_ADDU t1, a0 /* end address */
105 .set reorder 105 .set reorder
1061: PTR_ADDIU a0, 64 1061: PTR_ADDIU a0, 64
107 R10KCBARRIER(0(ra)) 107 R10KCBARRIER(0(ra))
108 f_fill64 a0, -64, a1, fwd_fixup 108 f_fill64 a0, -64, a1, .Lfwd_fixup
109 bne t1, a0, 1b 109 bne t1, a0, 1b
110 .set noreorder 110 .set noreorder
111 111
112memset_partial: 112.Lmemset_partial:
113 R10KCBARRIER(0(ra)) 113 R10KCBARRIER(0(ra))
114 PTR_LA t1, 2f /* where to start */ 114 PTR_LA t1, 2f /* where to start */
115#if LONGSIZE == 4 115#if LONGSIZE == 4
@@ -126,7 +126,7 @@ memset_partial:
126 .set push 126 .set push
127 .set noreorder 127 .set noreorder
128 .set nomacro 128 .set nomacro
129 f_fill64 a0, -64, a1, partial_fixup /* ... but first do longs ... */ 129 f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */
1302: .set pop 1302: .set pop
131 andi a2, LONGMASK /* At most one long to go */ 131 andi a2, LONGMASK /* At most one long to go */
132 132
@@ -134,15 +134,15 @@ memset_partial:
134 PTR_ADDU a0, a2 /* What's left */ 134 PTR_ADDU a0, a2 /* What's left */
135 R10KCBARRIER(0(ra)) 135 R10KCBARRIER(0(ra))
136#ifdef __MIPSEB__ 136#ifdef __MIPSEB__
137 EX(LONG_S_R, a1, -1(a0), last_fixup) 137 EX(LONG_S_R, a1, -1(a0), .Llast_fixup)
138#endif 138#endif
139#ifdef __MIPSEL__ 139#ifdef __MIPSEL__
140 EX(LONG_S_L, a1, -1(a0), last_fixup) 140 EX(LONG_S_L, a1, -1(a0), .Llast_fixup)
141#endif 141#endif
1421: jr ra 1421: jr ra
143 move a2, zero 143 move a2, zero
144 144
145small_memset: 145.Lsmall_memset:
146 beqz a2, 2f 146 beqz a2, 2f
147 PTR_ADDU t1, a0, a2 147 PTR_ADDU t1, a0, a2
148 148
@@ -155,11 +155,11 @@ small_memset:
155 move a2, zero 155 move a2, zero
156 END(memset) 156 END(memset)
157 157
158first_fixup: 158.Lfirst_fixup:
159 jr ra 159 jr ra
160 nop 160 nop
161 161
162fwd_fixup: 162.Lfwd_fixup:
163 PTR_L t0, TI_TASK($28) 163 PTR_L t0, TI_TASK($28)
164 LONG_L t0, THREAD_BUADDR(t0) 164 LONG_L t0, THREAD_BUADDR(t0)
165 andi a2, 0x3f 165 andi a2, 0x3f
@@ -167,7 +167,7 @@ fwd_fixup:
167 jr ra 167 jr ra
168 LONG_SUBU a2, t0 168 LONG_SUBU a2, t0
169 169
170partial_fixup: 170.Lpartial_fixup:
171 PTR_L t0, TI_TASK($28) 171 PTR_L t0, TI_TASK($28)
172 LONG_L t0, THREAD_BUADDR(t0) 172 LONG_L t0, THREAD_BUADDR(t0)
173 andi a2, LONGMASK 173 andi a2, LONGMASK
@@ -175,6 +175,6 @@ partial_fixup:
175 jr ra 175 jr ra
176 LONG_SUBU a2, t0 176 LONG_SUBU a2, t0
177 177
178last_fixup: 178.Llast_fixup:
179 jr ra 179 jr ra
180 andi v1, a2, LONGMASK 180 andi v1, a2, LONGMASK
diff --git a/arch/mips/lib/strlen_user.S b/arch/mips/lib/strlen_user.S
index eca558d83a37..fdbb970f670d 100644
--- a/arch/mips/lib/strlen_user.S
+++ b/arch/mips/lib/strlen_user.S
@@ -24,16 +24,16 @@
24LEAF(__strlen_user_asm) 24LEAF(__strlen_user_asm)
25 LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? 25 LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok?
26 and v0, a0 26 and v0, a0
27 bnez v0, fault 27 bnez v0, .Lfault
28 28
29FEXPORT(__strlen_user_nocheck_asm) 29FEXPORT(__strlen_user_nocheck_asm)
30 move v0, a0 30 move v0, a0
311: EX(lb, t0, (v0), fault) 311: EX(lb, t0, (v0), .Lfault)
32 PTR_ADDIU v0, 1 32 PTR_ADDIU v0, 1
33 bnez t0, 1b 33 bnez t0, 1b
34 PTR_SUBU v0, a0 34 PTR_SUBU v0, a0
35 jr ra 35 jr ra
36 END(__strlen_user_asm) 36 END(__strlen_user_asm)
37 37
38fault: move v0, zero 38.Lfault: move v0, zero
39 jr ra 39 jr ra
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
index 8a63f72b81d3..7201b2ff08c8 100644
--- a/arch/mips/lib/strncpy_user.S
+++ b/arch/mips/lib/strncpy_user.S
@@ -30,13 +30,13 @@
30LEAF(__strncpy_from_user_asm) 30LEAF(__strncpy_from_user_asm)
31 LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? 31 LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok?
32 and v0, a1 32 and v0, a1
33 bnez v0, fault 33 bnez v0, .Lfault
34 34
35FEXPORT(__strncpy_from_user_nocheck_asm) 35FEXPORT(__strncpy_from_user_nocheck_asm)
36 move v0, zero 36 move v0, zero
37 move v1, a1 37 move v1, a1
38 .set noreorder 38 .set noreorder
391: EX(lbu, t0, (v1), fault) 391: EX(lbu, t0, (v1), .Lfault)
40 PTR_ADDIU v1, 1 40 PTR_ADDIU v1, 1
41 R10KCBARRIER(0(ra)) 41 R10KCBARRIER(0(ra))
42 beqz t0, 2f 42 beqz t0, 2f
@@ -47,13 +47,13 @@ FEXPORT(__strncpy_from_user_nocheck_asm)
47 bne v0, a2, 1b 47 bne v0, a2, 1b
482: PTR_ADDU t0, a1, v0 482: PTR_ADDU t0, a1, v0
49 xor t0, a1 49 xor t0, a1
50 bltz t0, fault 50 bltz t0, .Lfault
51 jr ra # return n 51 jr ra # return n
52 END(__strncpy_from_user_asm) 52 END(__strncpy_from_user_asm)
53 53
54fault: li v0, -EFAULT 54.Lfault: li v0, -EFAULT
55 jr ra 55 jr ra
56 56
57 .section __ex_table,"a" 57 .section __ex_table,"a"
58 PTR 1b, fault 58 PTR 1b, .Lfault
59 .previous 59 .previous
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index c0ea15194a0e..c768e3000616 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -28,18 +28,19 @@
28LEAF(__strnlen_user_asm) 28LEAF(__strnlen_user_asm)
29 LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? 29 LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok?
30 and v0, a0 30 and v0, a0
31 bnez v0, fault 31 bnez v0, .Lfault
32 32
33FEXPORT(__strnlen_user_nocheck_asm) 33FEXPORT(__strnlen_user_nocheck_asm)
34 move v0, a0 34 move v0, a0
35 PTR_ADDU a1, a0 # stop pointer 35 PTR_ADDU a1, a0 # stop pointer
361: beq v0, a1, 1f # limit reached? 361: beq v0, a1, 1f # limit reached?
37 EX(lb, t0, (v0), fault) 37 EX(lb, t0, (v0), .Lfault)
38 PTR_ADDU v0, 1 38 PTR_ADDU v0, 1
39 bnez t0, 1b 39 bnez t0, 1b
401: PTR_SUBU v0, a0 401: PTR_SUBU v0, a0
41 jr ra 41 jr ra
42 END(__strnlen_user_asm) 42 END(__strnlen_user_asm)
43 43
44fault: move v0, zero 44.Lfault:
45 move v0, zero
45 jr ra 46 jr ra