diff options
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 214 |
1 files changed, 107 insertions, 107 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 957a82484e3e..8d7784122c14 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -96,13 +96,13 @@ LEAF(csum_partial) | |||
96 | move t7, zero | 96 | move t7, zero |
97 | 97 | ||
98 | sltiu t8, a1, 0x8 | 98 | sltiu t8, a1, 0x8 |
99 | bnez t8, small_csumcpy /* < 8 bytes to copy */ | 99 | bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */ |
100 | move t2, a1 | 100 | move t2, a1 |
101 | 101 | ||
102 | andi t7, src, 0x1 /* odd buffer? */ | 102 | andi t7, src, 0x1 /* odd buffer? */ |
103 | 103 | ||
104 | hword_align: | 104 | .Lhword_align: |
105 | beqz t7, word_align | 105 | beqz t7, .Lword_align |
106 | andi t8, src, 0x2 | 106 | andi t8, src, 0x2 |
107 | 107 | ||
108 | lbu t0, (src) | 108 | lbu t0, (src) |
@@ -114,8 +114,8 @@ hword_align: | |||
114 | PTR_ADDU src, src, 0x1 | 114 | PTR_ADDU src, src, 0x1 |
115 | andi t8, src, 0x2 | 115 | andi t8, src, 0x2 |
116 | 116 | ||
117 | word_align: | 117 | .Lword_align: |
118 | beqz t8, dword_align | 118 | beqz t8, .Ldword_align |
119 | sltiu t8, a1, 56 | 119 | sltiu t8, a1, 56 |
120 | 120 | ||
121 | lhu t0, (src) | 121 | lhu t0, (src) |
@@ -124,12 +124,12 @@ word_align: | |||
124 | sltiu t8, a1, 56 | 124 | sltiu t8, a1, 56 |
125 | PTR_ADDU src, src, 0x2 | 125 | PTR_ADDU src, src, 0x2 |
126 | 126 | ||
127 | dword_align: | 127 | .Ldword_align: |
128 | bnez t8, do_end_words | 128 | bnez t8, .Ldo_end_words |
129 | move t8, a1 | 129 | move t8, a1 |
130 | 130 | ||
131 | andi t8, src, 0x4 | 131 | andi t8, src, 0x4 |
132 | beqz t8, qword_align | 132 | beqz t8, .Lqword_align |
133 | andi t8, src, 0x8 | 133 | andi t8, src, 0x8 |
134 | 134 | ||
135 | lw t0, 0x00(src) | 135 | lw t0, 0x00(src) |
@@ -138,8 +138,8 @@ dword_align: | |||
138 | PTR_ADDU src, src, 0x4 | 138 | PTR_ADDU src, src, 0x4 |
139 | andi t8, src, 0x8 | 139 | andi t8, src, 0x8 |
140 | 140 | ||
141 | qword_align: | 141 | .Lqword_align: |
142 | beqz t8, oword_align | 142 | beqz t8, .Loword_align |
143 | andi t8, src, 0x10 | 143 | andi t8, src, 0x10 |
144 | 144 | ||
145 | #ifdef USE_DOUBLE | 145 | #ifdef USE_DOUBLE |
@@ -156,8 +156,8 @@ qword_align: | |||
156 | PTR_ADDU src, src, 0x8 | 156 | PTR_ADDU src, src, 0x8 |
157 | andi t8, src, 0x10 | 157 | andi t8, src, 0x10 |
158 | 158 | ||
159 | oword_align: | 159 | .Loword_align: |
160 | beqz t8, begin_movement | 160 | beqz t8, .Lbegin_movement |
161 | LONG_SRL t8, a1, 0x7 | 161 | LONG_SRL t8, a1, 0x7 |
162 | 162 | ||
163 | #ifdef USE_DOUBLE | 163 | #ifdef USE_DOUBLE |
@@ -172,11 +172,11 @@ oword_align: | |||
172 | PTR_ADDU src, src, 0x10 | 172 | PTR_ADDU src, src, 0x10 |
173 | LONG_SRL t8, a1, 0x7 | 173 | LONG_SRL t8, a1, 0x7 |
174 | 174 | ||
175 | begin_movement: | 175 | .Lbegin_movement: |
176 | beqz t8, 1f | 176 | beqz t8, 1f |
177 | andi t2, a1, 0x40 | 177 | andi t2, a1, 0x40 |
178 | 178 | ||
179 | move_128bytes: | 179 | .Lmove_128bytes: |
180 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 180 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
181 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | 181 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) |
182 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) | 182 | CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) |
@@ -184,43 +184,43 @@ move_128bytes: | |||
184 | LONG_SUBU t8, t8, 0x01 | 184 | LONG_SUBU t8, t8, 0x01 |
185 | .set reorder /* DADDI_WAR */ | 185 | .set reorder /* DADDI_WAR */ |
186 | PTR_ADDU src, src, 0x80 | 186 | PTR_ADDU src, src, 0x80 |
187 | bnez t8, move_128bytes | 187 | bnez t8, .Lmove_128bytes |
188 | .set noreorder | 188 | .set noreorder |
189 | 189 | ||
190 | 1: | 190 | 1: |
191 | beqz t2, 1f | 191 | beqz t2, 1f |
192 | andi t2, a1, 0x20 | 192 | andi t2, a1, 0x20 |
193 | 193 | ||
194 | move_64bytes: | 194 | .Lmove_64bytes: |
195 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 195 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
196 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) | 196 | CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) |
197 | PTR_ADDU src, src, 0x40 | 197 | PTR_ADDU src, src, 0x40 |
198 | 198 | ||
199 | 1: | 199 | 1: |
200 | beqz t2, do_end_words | 200 | beqz t2, .Ldo_end_words |
201 | andi t8, a1, 0x1c | 201 | andi t8, a1, 0x1c |
202 | 202 | ||
203 | move_32bytes: | 203 | .Lmove_32bytes: |
204 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) | 204 | CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) |
205 | andi t8, a1, 0x1c | 205 | andi t8, a1, 0x1c |
206 | PTR_ADDU src, src, 0x20 | 206 | PTR_ADDU src, src, 0x20 |
207 | 207 | ||
208 | do_end_words: | 208 | .Ldo_end_words: |
209 | beqz t8, small_csumcpy | 209 | beqz t8, .Lsmall_csumcpy |
210 | andi t2, a1, 0x3 | 210 | andi t2, a1, 0x3 |
211 | LONG_SRL t8, t8, 0x2 | 211 | LONG_SRL t8, t8, 0x2 |
212 | 212 | ||
213 | end_words: | 213 | .Lend_words: |
214 | lw t0, (src) | 214 | lw t0, (src) |
215 | LONG_SUBU t8, t8, 0x1 | 215 | LONG_SUBU t8, t8, 0x1 |
216 | ADDC(sum, t0) | 216 | ADDC(sum, t0) |
217 | .set reorder /* DADDI_WAR */ | 217 | .set reorder /* DADDI_WAR */ |
218 | PTR_ADDU src, src, 0x4 | 218 | PTR_ADDU src, src, 0x4 |
219 | bnez t8, end_words | 219 | bnez t8, .Lend_words |
220 | .set noreorder | 220 | .set noreorder |
221 | 221 | ||
222 | /* unknown src alignment and < 8 bytes to go */ | 222 | /* unknown src alignment and < 8 bytes to go */ |
223 | small_csumcpy: | 223 | .Lsmall_csumcpy: |
224 | move a1, t2 | 224 | move a1, t2 |
225 | 225 | ||
226 | andi t0, a1, 4 | 226 | andi t0, a1, 4 |
@@ -413,48 +413,48 @@ FEXPORT(csum_partial_copy_nocheck) | |||
413 | */ | 413 | */ |
414 | sltu t2, len, NBYTES | 414 | sltu t2, len, NBYTES |
415 | and t1, dst, ADDRMASK | 415 | and t1, dst, ADDRMASK |
416 | bnez t2, copy_bytes_checklen | 416 | bnez t2, .Lcopy_bytes_checklen |
417 | and t0, src, ADDRMASK | 417 | and t0, src, ADDRMASK |
418 | andi odd, dst, 0x1 /* odd buffer? */ | 418 | andi odd, dst, 0x1 /* odd buffer? */ |
419 | bnez t1, dst_unaligned | 419 | bnez t1, .Ldst_unaligned |
420 | nop | 420 | nop |
421 | bnez t0, src_unaligned_dst_aligned | 421 | bnez t0, .Lsrc_unaligned_dst_aligned |
422 | /* | 422 | /* |
423 | * use delay slot for fall-through | 423 | * use delay slot for fall-through |
424 | * src and dst are aligned; need to compute rem | 424 | * src and dst are aligned; need to compute rem |
425 | */ | 425 | */ |
426 | both_aligned: | 426 | .Lboth_aligned: |
427 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 427 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
428 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 428 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
429 | nop | 429 | nop |
430 | SUB len, 8*NBYTES # subtract here for bgez loop | 430 | SUB len, 8*NBYTES # subtract here for bgez loop |
431 | .align 4 | 431 | .align 4 |
432 | 1: | 432 | 1: |
433 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 433 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
434 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 434 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
435 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 435 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
436 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 436 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
437 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 437 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
438 | EXC( LOAD t5, UNIT(5)(src), l_exc_copy) | 438 | EXC( LOAD t5, UNIT(5)(src), .Ll_exc_copy) |
439 | EXC( LOAD t6, UNIT(6)(src), l_exc_copy) | 439 | EXC( LOAD t6, UNIT(6)(src), .Ll_exc_copy) |
440 | EXC( LOAD t7, UNIT(7)(src), l_exc_copy) | 440 | EXC( LOAD t7, UNIT(7)(src), .Ll_exc_copy) |
441 | SUB len, len, 8*NBYTES | 441 | SUB len, len, 8*NBYTES |
442 | ADD src, src, 8*NBYTES | 442 | ADD src, src, 8*NBYTES |
443 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 443 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
444 | ADDC(sum, t0) | 444 | ADDC(sum, t0) |
445 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 445 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
446 | ADDC(sum, t1) | 446 | ADDC(sum, t1) |
447 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 447 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
448 | ADDC(sum, t2) | 448 | ADDC(sum, t2) |
449 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 449 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
450 | ADDC(sum, t3) | 450 | ADDC(sum, t3) |
451 | EXC( STORE t4, UNIT(4)(dst), s_exc) | 451 | EXC( STORE t4, UNIT(4)(dst), .Ls_exc) |
452 | ADDC(sum, t4) | 452 | ADDC(sum, t4) |
453 | EXC( STORE t5, UNIT(5)(dst), s_exc) | 453 | EXC( STORE t5, UNIT(5)(dst), .Ls_exc) |
454 | ADDC(sum, t5) | 454 | ADDC(sum, t5) |
455 | EXC( STORE t6, UNIT(6)(dst), s_exc) | 455 | EXC( STORE t6, UNIT(6)(dst), .Ls_exc) |
456 | ADDC(sum, t6) | 456 | ADDC(sum, t6) |
457 | EXC( STORE t7, UNIT(7)(dst), s_exc) | 457 | EXC( STORE t7, UNIT(7)(dst), .Ls_exc) |
458 | ADDC(sum, t7) | 458 | ADDC(sum, t7) |
459 | .set reorder /* DADDI_WAR */ | 459 | .set reorder /* DADDI_WAR */ |
460 | ADD dst, dst, 8*NBYTES | 460 | ADD dst, dst, 8*NBYTES |
@@ -465,44 +465,44 @@ EXC( STORE t7, UNIT(7)(dst), s_exc) | |||
465 | /* | 465 | /* |
466 | * len == the number of bytes left to copy < 8*NBYTES | 466 | * len == the number of bytes left to copy < 8*NBYTES |
467 | */ | 467 | */ |
468 | cleanup_both_aligned: | 468 | .Lcleanup_both_aligned: |
469 | #define rem t7 | 469 | #define rem t7 |
470 | beqz len, done | 470 | beqz len, .Ldone |
471 | sltu t0, len, 4*NBYTES | 471 | sltu t0, len, 4*NBYTES |
472 | bnez t0, less_than_4units | 472 | bnez t0, .Lless_than_4units |
473 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 473 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
474 | /* | 474 | /* |
475 | * len >= 4*NBYTES | 475 | * len >= 4*NBYTES |
476 | */ | 476 | */ |
477 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 477 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
478 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 478 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
479 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 479 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
480 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 480 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
481 | SUB len, len, 4*NBYTES | 481 | SUB len, len, 4*NBYTES |
482 | ADD src, src, 4*NBYTES | 482 | ADD src, src, 4*NBYTES |
483 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 483 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
484 | ADDC(sum, t0) | 484 | ADDC(sum, t0) |
485 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 485 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
486 | ADDC(sum, t1) | 486 | ADDC(sum, t1) |
487 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 487 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
488 | ADDC(sum, t2) | 488 | ADDC(sum, t2) |
489 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 489 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
490 | ADDC(sum, t3) | 490 | ADDC(sum, t3) |
491 | .set reorder /* DADDI_WAR */ | 491 | .set reorder /* DADDI_WAR */ |
492 | ADD dst, dst, 4*NBYTES | 492 | ADD dst, dst, 4*NBYTES |
493 | beqz len, done | 493 | beqz len, .Ldone |
494 | .set noreorder | 494 | .set noreorder |
495 | less_than_4units: | 495 | .Lless_than_4units: |
496 | /* | 496 | /* |
497 | * rem = len % NBYTES | 497 | * rem = len % NBYTES |
498 | */ | 498 | */ |
499 | beq rem, len, copy_bytes | 499 | beq rem, len, .Lcopy_bytes |
500 | nop | 500 | nop |
501 | 1: | 501 | 1: |
502 | EXC( LOAD t0, 0(src), l_exc) | 502 | EXC( LOAD t0, 0(src), .Ll_exc) |
503 | ADD src, src, NBYTES | 503 | ADD src, src, NBYTES |
504 | SUB len, len, NBYTES | 504 | SUB len, len, NBYTES |
505 | EXC( STORE t0, 0(dst), s_exc) | 505 | EXC( STORE t0, 0(dst), .Ls_exc) |
506 | ADDC(sum, t0) | 506 | ADDC(sum, t0) |
507 | .set reorder /* DADDI_WAR */ | 507 | .set reorder /* DADDI_WAR */ |
508 | ADD dst, dst, NBYTES | 508 | ADD dst, dst, NBYTES |
@@ -521,20 +521,20 @@ EXC( STORE t0, 0(dst), s_exc) | |||
521 | * more instruction-level parallelism. | 521 | * more instruction-level parallelism. |
522 | */ | 522 | */ |
523 | #define bits t2 | 523 | #define bits t2 |
524 | beqz len, done | 524 | beqz len, .Ldone |
525 | ADD t1, dst, len # t1 is just past last byte of dst | 525 | ADD t1, dst, len # t1 is just past last byte of dst |
526 | li bits, 8*NBYTES | 526 | li bits, 8*NBYTES |
527 | SLL rem, len, 3 # rem = number of bits to keep | 527 | SLL rem, len, 3 # rem = number of bits to keep |
528 | EXC( LOAD t0, 0(src), l_exc) | 528 | EXC( LOAD t0, 0(src), .Ll_exc) |
529 | SUB bits, bits, rem # bits = number of bits to discard | 529 | SUB bits, bits, rem # bits = number of bits to discard |
530 | SHIFT_DISCARD t0, t0, bits | 530 | SHIFT_DISCARD t0, t0, bits |
531 | EXC( STREST t0, -1(t1), s_exc) | 531 | EXC( STREST t0, -1(t1), .Ls_exc) |
532 | SHIFT_DISCARD_REVERT t0, t0, bits | 532 | SHIFT_DISCARD_REVERT t0, t0, bits |
533 | .set reorder | 533 | .set reorder |
534 | ADDC(sum, t0) | 534 | ADDC(sum, t0) |
535 | b done | 535 | b .Ldone |
536 | .set noreorder | 536 | .set noreorder |
537 | dst_unaligned: | 537 | .Ldst_unaligned: |
538 | /* | 538 | /* |
539 | * dst is unaligned | 539 | * dst is unaligned |
540 | * t0 = src & ADDRMASK | 540 | * t0 = src & ADDRMASK |
@@ -545,25 +545,25 @@ dst_unaligned: | |||
545 | * Set match = (src and dst have same alignment) | 545 | * Set match = (src and dst have same alignment) |
546 | */ | 546 | */ |
547 | #define match rem | 547 | #define match rem |
548 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 548 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
549 | ADD t2, zero, NBYTES | 549 | ADD t2, zero, NBYTES |
550 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 550 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
551 | SUB t2, t2, t1 # t2 = number of bytes copied | 551 | SUB t2, t2, t1 # t2 = number of bytes copied |
552 | xor match, t0, t1 | 552 | xor match, t0, t1 |
553 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) | 553 | EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) |
554 | SLL t4, t1, 3 # t4 = number of bits to discard | 554 | SLL t4, t1, 3 # t4 = number of bits to discard |
555 | SHIFT_DISCARD t3, t3, t4 | 555 | SHIFT_DISCARD t3, t3, t4 |
556 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ | 556 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ |
557 | ADDC(sum, t3) | 557 | ADDC(sum, t3) |
558 | beq len, t2, done | 558 | beq len, t2, .Ldone |
559 | SUB len, len, t2 | 559 | SUB len, len, t2 |
560 | ADD dst, dst, t2 | 560 | ADD dst, dst, t2 |
561 | beqz match, both_aligned | 561 | beqz match, .Lboth_aligned |
562 | ADD src, src, t2 | 562 | ADD src, src, t2 |
563 | 563 | ||
564 | src_unaligned_dst_aligned: | 564 | .Lsrc_unaligned_dst_aligned: |
565 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 565 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
566 | beqz t0, cleanup_src_unaligned | 566 | beqz t0, .Lcleanup_src_unaligned |
567 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 567 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
568 | 1: | 568 | 1: |
569 | /* | 569 | /* |
@@ -572,53 +572,53 @@ src_unaligned_dst_aligned: | |||
572 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 572 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
573 | * are to the same unit (unless src is aligned, but it's not). | 573 | * are to the same unit (unless src is aligned, but it's not). |
574 | */ | 574 | */ |
575 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 575 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
576 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 576 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
577 | SUB len, len, 4*NBYTES | 577 | SUB len, len, 4*NBYTES |
578 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 578 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
579 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 579 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
580 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 580 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
581 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 581 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
582 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 582 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
583 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 583 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
584 | ADD src, src, 4*NBYTES | 584 | ADD src, src, 4*NBYTES |
585 | #ifdef CONFIG_CPU_SB1 | 585 | #ifdef CONFIG_CPU_SB1 |
586 | nop # improves slotting | 586 | nop # improves slotting |
587 | #endif | 587 | #endif |
588 | EXC( STORE t0, UNIT(0)(dst), s_exc) | 588 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc) |
589 | ADDC(sum, t0) | 589 | ADDC(sum, t0) |
590 | EXC( STORE t1, UNIT(1)(dst), s_exc) | 590 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc) |
591 | ADDC(sum, t1) | 591 | ADDC(sum, t1) |
592 | EXC( STORE t2, UNIT(2)(dst), s_exc) | 592 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc) |
593 | ADDC(sum, t2) | 593 | ADDC(sum, t2) |
594 | EXC( STORE t3, UNIT(3)(dst), s_exc) | 594 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc) |
595 | ADDC(sum, t3) | 595 | ADDC(sum, t3) |
596 | .set reorder /* DADDI_WAR */ | 596 | .set reorder /* DADDI_WAR */ |
597 | ADD dst, dst, 4*NBYTES | 597 | ADD dst, dst, 4*NBYTES |
598 | bne len, rem, 1b | 598 | bne len, rem, 1b |
599 | .set noreorder | 599 | .set noreorder |
600 | 600 | ||
601 | cleanup_src_unaligned: | 601 | .Lcleanup_src_unaligned: |
602 | beqz len, done | 602 | beqz len, .Ldone |
603 | and rem, len, NBYTES-1 # rem = len % NBYTES | 603 | and rem, len, NBYTES-1 # rem = len % NBYTES |
604 | beq rem, len, copy_bytes | 604 | beq rem, len, .Lcopy_bytes |
605 | nop | 605 | nop |
606 | 1: | 606 | 1: |
607 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 607 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
608 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 608 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
609 | ADD src, src, NBYTES | 609 | ADD src, src, NBYTES |
610 | SUB len, len, NBYTES | 610 | SUB len, len, NBYTES |
611 | EXC( STORE t0, 0(dst), s_exc) | 611 | EXC( STORE t0, 0(dst), .Ls_exc) |
612 | ADDC(sum, t0) | 612 | ADDC(sum, t0) |
613 | .set reorder /* DADDI_WAR */ | 613 | .set reorder /* DADDI_WAR */ |
614 | ADD dst, dst, NBYTES | 614 | ADD dst, dst, NBYTES |
615 | bne len, rem, 1b | 615 | bne len, rem, 1b |
616 | .set noreorder | 616 | .set noreorder |
617 | 617 | ||
618 | copy_bytes_checklen: | 618 | .Lcopy_bytes_checklen: |
619 | beqz len, done | 619 | beqz len, .Ldone |
620 | nop | 620 | nop |
621 | copy_bytes: | 621 | .Lcopy_bytes: |
622 | /* 0 < len < NBYTES */ | 622 | /* 0 < len < NBYTES */ |
623 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | 623 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
624 | #define SHIFT_START 0 | 624 | #define SHIFT_START 0 |
@@ -629,14 +629,14 @@ copy_bytes: | |||
629 | #endif | 629 | #endif |
630 | move t2, zero # partial word | 630 | move t2, zero # partial word |
631 | li t3, SHIFT_START # shift | 631 | li t3, SHIFT_START # shift |
632 | /* use l_exc_copy here to return correct sum on fault */ | 632 | /* use .Ll_exc_copy here to return correct sum on fault */ |
633 | #define COPY_BYTE(N) \ | 633 | #define COPY_BYTE(N) \ |
634 | EXC( lbu t0, N(src), l_exc_copy); \ | 634 | EXC( lbu t0, N(src), .Ll_exc_copy); \ |
635 | SUB len, len, 1; \ | 635 | SUB len, len, 1; \ |
636 | EXC( sb t0, N(dst), s_exc); \ | 636 | EXC( sb t0, N(dst), .Ls_exc); \ |
637 | SLLV t0, t0, t3; \ | 637 | SLLV t0, t0, t3; \ |
638 | addu t3, SHIFT_INC; \ | 638 | addu t3, SHIFT_INC; \ |
639 | beqz len, copy_bytes_done; \ | 639 | beqz len, .Lcopy_bytes_done; \ |
640 | or t2, t0 | 640 | or t2, t0 |
641 | 641 | ||
642 | COPY_BYTE(0) | 642 | COPY_BYTE(0) |
@@ -647,14 +647,14 @@ EXC( sb t0, N(dst), s_exc); \ | |||
647 | COPY_BYTE(4) | 647 | COPY_BYTE(4) |
648 | COPY_BYTE(5) | 648 | COPY_BYTE(5) |
649 | #endif | 649 | #endif |
650 | EXC( lbu t0, NBYTES-2(src), l_exc_copy) | 650 | EXC( lbu t0, NBYTES-2(src), .Ll_exc_copy) |
651 | SUB len, len, 1 | 651 | SUB len, len, 1 |
652 | EXC( sb t0, NBYTES-2(dst), s_exc) | 652 | EXC( sb t0, NBYTES-2(dst), .Ls_exc) |
653 | SLLV t0, t0, t3 | 653 | SLLV t0, t0, t3 |
654 | or t2, t0 | 654 | or t2, t0 |
655 | copy_bytes_done: | 655 | .Lcopy_bytes_done: |
656 | ADDC(sum, t2) | 656 | ADDC(sum, t2) |
657 | done: | 657 | .Ldone: |
658 | /* fold checksum */ | 658 | /* fold checksum */ |
659 | .set push | 659 | .set push |
660 | .set noat | 660 | .set noat |
@@ -685,7 +685,7 @@ done: | |||
685 | jr ra | 685 | jr ra |
686 | .set noreorder | 686 | .set noreorder |
687 | 687 | ||
688 | l_exc_copy: | 688 | .Ll_exc_copy: |
689 | /* | 689 | /* |
690 | * Copy bytes from src until faulting load address (or until a | 690 | * Copy bytes from src until faulting load address (or until a |
691 | * lb faults) | 691 | * lb faults) |
@@ -700,7 +700,7 @@ l_exc_copy: | |||
700 | li t2, SHIFT_START | 700 | li t2, SHIFT_START |
701 | LOAD t0, THREAD_BUADDR(t0) | 701 | LOAD t0, THREAD_BUADDR(t0) |
702 | 1: | 702 | 1: |
703 | EXC( lbu t1, 0(src), l_exc) | 703 | EXC( lbu t1, 0(src), .Ll_exc) |
704 | ADD src, src, 1 | 704 | ADD src, src, 1 |
705 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 705 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
706 | SLLV t1, t1, t2 | 706 | SLLV t1, t1, t2 |
@@ -710,7 +710,7 @@ EXC( lbu t1, 0(src), l_exc) | |||
710 | ADD dst, dst, 1 | 710 | ADD dst, dst, 1 |
711 | bne src, t0, 1b | 711 | bne src, t0, 1b |
712 | .set noreorder | 712 | .set noreorder |
713 | l_exc: | 713 | .Ll_exc: |
714 | LOAD t0, TI_TASK($28) | 714 | LOAD t0, TI_TASK($28) |
715 | nop | 715 | nop |
716 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 716 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
@@ -729,7 +729,7 @@ l_exc: | |||
729 | */ | 729 | */ |
730 | .set reorder /* DADDI_WAR */ | 730 | .set reorder /* DADDI_WAR */ |
731 | SUB src, len, 1 | 731 | SUB src, len, 1 |
732 | beqz len, done | 732 | beqz len, .Ldone |
733 | .set noreorder | 733 | .set noreorder |
734 | 1: sb zero, 0(dst) | 734 | 1: sb zero, 0(dst) |
735 | ADD dst, dst, 1 | 735 | ADD dst, dst, 1 |
@@ -744,10 +744,10 @@ l_exc: | |||
744 | SUB src, src, v1 | 744 | SUB src, src, v1 |
745 | #endif | 745 | #endif |
746 | li v1, -EFAULT | 746 | li v1, -EFAULT |
747 | b done | 747 | b .Ldone |
748 | sw v1, (errptr) | 748 | sw v1, (errptr) |
749 | 749 | ||
750 | s_exc: | 750 | .Ls_exc: |
751 | li v0, -1 /* invalid checksum */ | 751 | li v0, -1 /* invalid checksum */ |
752 | li v1, -EFAULT | 752 | li v1, -EFAULT |
753 | jr ra | 753 | jr ra |