diff options
Diffstat (limited to 'arch/mips/lib/csum_partial.S')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 42 |
1 files changed, 21 insertions, 21 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 6b876ca299ee..507147aebd41 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -67,8 +67,8 @@ | |||
67 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ | 67 | #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ |
68 | LOAD _t0, (offset + UNIT(0))(src); \ | 68 | LOAD _t0, (offset + UNIT(0))(src); \ |
69 | LOAD _t1, (offset + UNIT(1))(src); \ | 69 | LOAD _t1, (offset + UNIT(1))(src); \ |
70 | LOAD _t2, (offset + UNIT(2))(src); \ | 70 | LOAD _t2, (offset + UNIT(2))(src); \ |
71 | LOAD _t3, (offset + UNIT(3))(src); \ | 71 | LOAD _t3, (offset + UNIT(3))(src); \ |
72 | ADDC(sum, _t0); \ | 72 | ADDC(sum, _t0); \ |
73 | ADDC(sum, _t1); \ | 73 | ADDC(sum, _t1); \ |
74 | ADDC(sum, _t2); \ | 74 | ADDC(sum, _t2); \ |
@@ -285,7 +285,7 @@ LEAF(csum_partial) | |||
285 | 1: | 285 | 1: |
286 | #endif | 286 | #endif |
287 | .set reorder | 287 | .set reorder |
288 | /* Add the passed partial csum. */ | 288 | /* Add the passed partial csum. */ |
289 | ADDC32(sum, a2) | 289 | ADDC32(sum, a2) |
290 | jr ra | 290 | jr ra |
291 | .set noreorder | 291 | .set noreorder |
@@ -298,7 +298,7 @@ LEAF(csum_partial) | |||
298 | * csum_partial_copy_nocheck(src, dst, len, sum) | 298 | * csum_partial_copy_nocheck(src, dst, len, sum) |
299 | * __csum_partial_copy_user(src, dst, len, sum, errp) | 299 | * __csum_partial_copy_user(src, dst, len, sum, errp) |
300 | * | 300 | * |
301 | * See "Spec" in memcpy.S for details. Unlike __copy_user, all | 301 | * See "Spec" in memcpy.S for details. Unlike __copy_user, all |
302 | * function in this file use the standard calling convention. | 302 | * function in this file use the standard calling convention. |
303 | */ | 303 | */ |
304 | 304 | ||
@@ -371,16 +371,16 @@ LEAF(csum_partial) | |||
371 | 371 | ||
372 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | 372 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
373 | #define LDFIRST LOADR | 373 | #define LDFIRST LOADR |
374 | #define LDREST LOADL | 374 | #define LDREST LOADL |
375 | #define STFIRST STORER | 375 | #define STFIRST STORER |
376 | #define STREST STOREL | 376 | #define STREST STOREL |
377 | #define SHIFT_DISCARD SLLV | 377 | #define SHIFT_DISCARD SLLV |
378 | #define SHIFT_DISCARD_REVERT SRLV | 378 | #define SHIFT_DISCARD_REVERT SRLV |
379 | #else | 379 | #else |
380 | #define LDFIRST LOADL | 380 | #define LDFIRST LOADL |
381 | #define LDREST LOADR | 381 | #define LDREST LOADR |
382 | #define STFIRST STOREL | 382 | #define STFIRST STOREL |
383 | #define STREST STORER | 383 | #define STREST STORER |
384 | #define SHIFT_DISCARD SRLV | 384 | #define SHIFT_DISCARD SRLV |
385 | #define SHIFT_DISCARD_REVERT SLLV | 385 | #define SHIFT_DISCARD_REVERT SLLV |
386 | #endif | 386 | #endif |
@@ -430,7 +430,7 @@ FEXPORT(csum_partial_copy_nocheck) | |||
430 | * src and dst are aligned; need to compute rem | 430 | * src and dst are aligned; need to compute rem |
431 | */ | 431 | */ |
432 | .Lboth_aligned: | 432 | .Lboth_aligned: |
433 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 433 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
434 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES | 434 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
435 | nop | 435 | nop |
436 | SUB len, 8*NBYTES # subtract here for bgez loop | 436 | SUB len, 8*NBYTES # subtract here for bgez loop |
@@ -518,7 +518,7 @@ EXC( STORE t0, 0(dst), .Ls_exc) | |||
518 | /* | 518 | /* |
519 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) | 519 | * src and dst are aligned, need to copy rem bytes (rem < NBYTES) |
520 | * A loop would do only a byte at a time with possible branch | 520 | * A loop would do only a byte at a time with possible branch |
521 | * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE | 521 | * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE |
522 | * because can't assume read-access to dst. Instead, use | 522 | * because can't assume read-access to dst. Instead, use |
523 | * STREST dst, which doesn't require read access to dst. | 523 | * STREST dst, which doesn't require read access to dst. |
524 | * | 524 | * |
@@ -532,7 +532,7 @@ EXC( STORE t0, 0(dst), .Ls_exc) | |||
532 | li bits, 8*NBYTES | 532 | li bits, 8*NBYTES |
533 | SLL rem, len, 3 # rem = number of bits to keep | 533 | SLL rem, len, 3 # rem = number of bits to keep |
534 | EXC( LOAD t0, 0(src), .Ll_exc) | 534 | EXC( LOAD t0, 0(src), .Ll_exc) |
535 | SUB bits, bits, rem # bits = number of bits to discard | 535 | SUB bits, bits, rem # bits = number of bits to discard |
536 | SHIFT_DISCARD t0, t0, bits | 536 | SHIFT_DISCARD t0, t0, bits |
537 | EXC( STREST t0, -1(t1), .Ls_exc) | 537 | EXC( STREST t0, -1(t1), .Ls_exc) |
538 | SHIFT_DISCARD_REVERT t0, t0, bits | 538 | SHIFT_DISCARD_REVERT t0, t0, bits |
@@ -551,7 +551,7 @@ EXC( STREST t0, -1(t1), .Ls_exc) | |||
551 | * Set match = (src and dst have same alignment) | 551 | * Set match = (src and dst have same alignment) |
552 | */ | 552 | */ |
553 | #define match rem | 553 | #define match rem |
554 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) | 554 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
555 | ADD t2, zero, NBYTES | 555 | ADD t2, zero, NBYTES |
556 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) | 556 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
557 | SUB t2, t2, t1 # t2 = number of bytes copied | 557 | SUB t2, t2, t1 # t2 = number of bytes copied |
@@ -568,9 +568,9 @@ EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) | |||
568 | ADD src, src, t2 | 568 | ADD src, src, t2 |
569 | 569 | ||
570 | .Lsrc_unaligned_dst_aligned: | 570 | .Lsrc_unaligned_dst_aligned: |
571 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 571 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
572 | beqz t0, .Lcleanup_src_unaligned | 572 | beqz t0, .Lcleanup_src_unaligned |
573 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 573 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
574 | 1: | 574 | 1: |
575 | /* | 575 | /* |
576 | * Avoid consecutive LD*'s to the same register since some mips | 576 | * Avoid consecutive LD*'s to the same register since some mips |
@@ -578,13 +578,13 @@ EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) | |||
578 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 578 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
579 | * are to the same unit (unless src is aligned, but it's not). | 579 | * are to the same unit (unless src is aligned, but it's not). |
580 | */ | 580 | */ |
581 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) | 581 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
582 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) | 582 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
583 | SUB len, len, 4*NBYTES | 583 | SUB len, len, 4*NBYTES |
584 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) | 584 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
585 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) | 585 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
586 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) | 586 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
587 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) | 587 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
588 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) | 588 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
589 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) | 589 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
590 | ADD src, src, 4*NBYTES | 590 | ADD src, src, 4*NBYTES |
@@ -634,7 +634,7 @@ EXC( STORE t0, 0(dst), .Ls_exc) | |||
634 | #define SHIFT_INC -8 | 634 | #define SHIFT_INC -8 |
635 | #endif | 635 | #endif |
636 | move t2, zero # partial word | 636 | move t2, zero # partial word |
637 | li t3, SHIFT_START # shift | 637 | li t3, SHIFT_START # shift |
638 | /* use .Ll_exc_copy here to return correct sum on fault */ | 638 | /* use .Ll_exc_copy here to return correct sum on fault */ |
639 | #define COPY_BYTE(N) \ | 639 | #define COPY_BYTE(N) \ |
640 | EXC( lbu t0, N(src), .Ll_exc_copy); \ | 640 | EXC( lbu t0, N(src), .Ll_exc_copy); \ |
@@ -642,7 +642,7 @@ EXC( lbu t0, N(src), .Ll_exc_copy); \ | |||
642 | EXC( sb t0, N(dst), .Ls_exc); \ | 642 | EXC( sb t0, N(dst), .Ls_exc); \ |
643 | SLLV t0, t0, t3; \ | 643 | SLLV t0, t0, t3; \ |
644 | addu t3, SHIFT_INC; \ | 644 | addu t3, SHIFT_INC; \ |
645 | beqz len, .Lcopy_bytes_done; \ | 645 | beqz len, .Lcopy_bytes_done; \ |
646 | or t2, t0 | 646 | or t2, t0 |
647 | 647 | ||
648 | COPY_BYTE(0) | 648 | COPY_BYTE(0) |