diff options
author | Markos Chandras <markos.chandras@imgtec.com> | 2014-01-17 05:48:46 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2014-03-26 18:09:17 -0400 |
commit | e89fb56c8bcf5514cfe7abd7a3dda9e6007b7238 (patch) | |
tree | e71494a33e6dad52b0f02b477e9f8aeada7af4e1 /arch/mips/lib | |
parent | 2ab82e66483798670e129c48c05d7fc8a39ea996 (diff) |
MIPS: lib: csum_partial: Add macro to build csum_partial symbols
In preparation for EVA support, we use a macro to build the
__csum_partial_copy_user main code so it can be shared across
multiple implementations. EVA uses the same code but it replaces
the load/store/prefetch instructions with the EVA specific ones
therefore using a macro avoids unnecessary code duplications.
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Diffstat (limited to 'arch/mips/lib')
-rw-r--r-- | arch/mips/lib/csum_partial.S | 200 |
1 files changed, 108 insertions, 92 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index bff5167b59a6..62c8768a59ce 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
@@ -331,6 +331,10 @@ LEAF(csum_partial) | |||
331 | /* Instruction type */ | 331 | /* Instruction type */ |
332 | #define LD_INSN 1 | 332 | #define LD_INSN 1 |
333 | #define ST_INSN 2 | 333 | #define ST_INSN 2 |
334 | #define LEGACY_MODE 1 | ||
335 | #define EVA_MODE 2 | ||
336 | #define USEROP 1 | ||
337 | #define KERNELOP 2 | ||
334 | 338 | ||
335 | /* | 339 | /* |
336 | * Wrapper to add an entry in the exception table | 340 | * Wrapper to add an entry in the exception table |
@@ -343,10 +347,12 @@ LEAF(csum_partial) | |||
343 | * handler : Exception handler | 347 | * handler : Exception handler |
344 | */ | 348 | */ |
345 | #define EXC(insn, type, reg, addr, handler) \ | 349 | #define EXC(insn, type, reg, addr, handler) \ |
346 | 9: insn reg, addr; \ | 350 | .if \mode == LEGACY_MODE; \ |
347 | .section __ex_table,"a"; \ | 351 | 9: insn reg, addr; \ |
348 | PTR 9b, handler; \ | 352 | .section __ex_table,"a"; \ |
349 | .previous | 353 | PTR 9b, handler; \ |
354 | .previous; \ | ||
355 | .endif | ||
350 | 356 | ||
351 | #undef LOAD | 357 | #undef LOAD |
352 | 358 | ||
@@ -419,16 +425,20 @@ LEAF(csum_partial) | |||
419 | .set at=v1 | 425 | .set at=v1 |
420 | #endif | 426 | #endif |
421 | 427 | ||
422 | LEAF(__csum_partial_copy_kernel) | 428 | .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to, __nocheck |
423 | FEXPORT(__csum_partial_copy_to_user) | 429 | |
424 | FEXPORT(__csum_partial_copy_from_user) | ||
425 | PTR_ADDU AT, src, len /* See (1) above. */ | 430 | PTR_ADDU AT, src, len /* See (1) above. */ |
431 | /* initialize __nocheck if this the first time we execute this | ||
432 | * macro | ||
433 | */ | ||
426 | #ifdef CONFIG_64BIT | 434 | #ifdef CONFIG_64BIT |
427 | move errptr, a4 | 435 | move errptr, a4 |
428 | #else | 436 | #else |
429 | lw errptr, 16(sp) | 437 | lw errptr, 16(sp) |
430 | #endif | 438 | #endif |
431 | FEXPORT(csum_partial_copy_nocheck) | 439 | .if \__nocheck == 1 |
440 | FEXPORT(csum_partial_copy_nocheck) | ||
441 | .endif | ||
432 | move sum, zero | 442 | move sum, zero |
433 | move odd, zero | 443 | move odd, zero |
434 | /* | 444 | /* |
@@ -444,48 +454,48 @@ FEXPORT(csum_partial_copy_nocheck) | |||
444 | */ | 454 | */ |
445 | sltu t2, len, NBYTES | 455 | sltu t2, len, NBYTES |
446 | and t1, dst, ADDRMASK | 456 | and t1, dst, ADDRMASK |
447 | bnez t2, .Lcopy_bytes_checklen | 457 | bnez t2, .Lcopy_bytes_checklen\@ |
448 | and t0, src, ADDRMASK | 458 | and t0, src, ADDRMASK |
449 | andi odd, dst, 0x1 /* odd buffer? */ | 459 | andi odd, dst, 0x1 /* odd buffer? */ |
450 | bnez t1, .Ldst_unaligned | 460 | bnez t1, .Ldst_unaligned\@ |
451 | nop | 461 | nop |
452 | bnez t0, .Lsrc_unaligned_dst_aligned | 462 | bnez t0, .Lsrc_unaligned_dst_aligned\@ |
453 | /* | 463 | /* |
454 | * use delay slot for fall-through | 464 | * use delay slot for fall-through |
455 | * src and dst are aligned; need to compute rem | 465 | * src and dst are aligned; need to compute rem |
456 | */ | 466 | */ |
457 | .Lboth_aligned: | 467 | .Lboth_aligned\@: |
458 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 468 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
459 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES | 469 | beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES |
460 | nop | 470 | nop |
461 | SUB len, 8*NBYTES # subtract here for bgez loop | 471 | SUB len, 8*NBYTES # subtract here for bgez loop |
462 | .align 4 | 472 | .align 4 |
463 | 1: | 473 | 1: |
464 | LOAD(t0, UNIT(0)(src), .Ll_exc) | 474 | LOAD(t0, UNIT(0)(src), .Ll_exc\@) |
465 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy) | 475 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) |
466 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy) | 476 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) |
467 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy) | 477 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) |
468 | LOAD(t4, UNIT(4)(src), .Ll_exc_copy) | 478 | LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) |
469 | LOAD(t5, UNIT(5)(src), .Ll_exc_copy) | 479 | LOAD(t5, UNIT(5)(src), .Ll_exc_copy\@) |
470 | LOAD(t6, UNIT(6)(src), .Ll_exc_copy) | 480 | LOAD(t6, UNIT(6)(src), .Ll_exc_copy\@) |
471 | LOAD(t7, UNIT(7)(src), .Ll_exc_copy) | 481 | LOAD(t7, UNIT(7)(src), .Ll_exc_copy\@) |
472 | SUB len, len, 8*NBYTES | 482 | SUB len, len, 8*NBYTES |
473 | ADD src, src, 8*NBYTES | 483 | ADD src, src, 8*NBYTES |
474 | STORE(t0, UNIT(0)(dst), .Ls_exc) | 484 | STORE(t0, UNIT(0)(dst), .Ls_exc\@) |
475 | ADDC(sum, t0) | 485 | ADDC(sum, t0) |
476 | STORE(t1, UNIT(1)(dst), .Ls_exc) | 486 | STORE(t1, UNIT(1)(dst), .Ls_exc\@) |
477 | ADDC(sum, t1) | 487 | ADDC(sum, t1) |
478 | STORE(t2, UNIT(2)(dst), .Ls_exc) | 488 | STORE(t2, UNIT(2)(dst), .Ls_exc\@) |
479 | ADDC(sum, t2) | 489 | ADDC(sum, t2) |
480 | STORE(t3, UNIT(3)(dst), .Ls_exc) | 490 | STORE(t3, UNIT(3)(dst), .Ls_exc\@) |
481 | ADDC(sum, t3) | 491 | ADDC(sum, t3) |
482 | STORE(t4, UNIT(4)(dst), .Ls_exc) | 492 | STORE(t4, UNIT(4)(dst), .Ls_exc\@) |
483 | ADDC(sum, t4) | 493 | ADDC(sum, t4) |
484 | STORE(t5, UNIT(5)(dst), .Ls_exc) | 494 | STORE(t5, UNIT(5)(dst), .Ls_exc\@) |
485 | ADDC(sum, t5) | 495 | ADDC(sum, t5) |
486 | STORE(t6, UNIT(6)(dst), .Ls_exc) | 496 | STORE(t6, UNIT(6)(dst), .Ls_exc\@) |
487 | ADDC(sum, t6) | 497 | ADDC(sum, t6) |
488 | STORE(t7, UNIT(7)(dst), .Ls_exc) | 498 | STORE(t7, UNIT(7)(dst), .Ls_exc\@) |
489 | ADDC(sum, t7) | 499 | ADDC(sum, t7) |
490 | .set reorder /* DADDI_WAR */ | 500 | .set reorder /* DADDI_WAR */ |
491 | ADD dst, dst, 8*NBYTES | 501 | ADD dst, dst, 8*NBYTES |
@@ -496,44 +506,44 @@ FEXPORT(csum_partial_copy_nocheck) | |||
496 | /* | 506 | /* |
497 | * len == the number of bytes left to copy < 8*NBYTES | 507 | * len == the number of bytes left to copy < 8*NBYTES |
498 | */ | 508 | */ |
499 | .Lcleanup_both_aligned: | 509 | .Lcleanup_both_aligned\@: |
500 | #define rem t7 | 510 | #define rem t7 |
501 | beqz len, .Ldone | 511 | beqz len, .Ldone\@ |
502 | sltu t0, len, 4*NBYTES | 512 | sltu t0, len, 4*NBYTES |
503 | bnez t0, .Lless_than_4units | 513 | bnez t0, .Lless_than_4units\@ |
504 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 514 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
505 | /* | 515 | /* |
506 | * len >= 4*NBYTES | 516 | * len >= 4*NBYTES |
507 | */ | 517 | */ |
508 | LOAD(t0, UNIT(0)(src), .Ll_exc) | 518 | LOAD(t0, UNIT(0)(src), .Ll_exc\@) |
509 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy) | 519 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) |
510 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy) | 520 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) |
511 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy) | 521 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) |
512 | SUB len, len, 4*NBYTES | 522 | SUB len, len, 4*NBYTES |
513 | ADD src, src, 4*NBYTES | 523 | ADD src, src, 4*NBYTES |
514 | STORE(t0, UNIT(0)(dst), .Ls_exc) | 524 | STORE(t0, UNIT(0)(dst), .Ls_exc\@) |
515 | ADDC(sum, t0) | 525 | ADDC(sum, t0) |
516 | STORE(t1, UNIT(1)(dst), .Ls_exc) | 526 | STORE(t1, UNIT(1)(dst), .Ls_exc\@) |
517 | ADDC(sum, t1) | 527 | ADDC(sum, t1) |
518 | STORE(t2, UNIT(2)(dst), .Ls_exc) | 528 | STORE(t2, UNIT(2)(dst), .Ls_exc\@) |
519 | ADDC(sum, t2) | 529 | ADDC(sum, t2) |
520 | STORE(t3, UNIT(3)(dst), .Ls_exc) | 530 | STORE(t3, UNIT(3)(dst), .Ls_exc\@) |
521 | ADDC(sum, t3) | 531 | ADDC(sum, t3) |
522 | .set reorder /* DADDI_WAR */ | 532 | .set reorder /* DADDI_WAR */ |
523 | ADD dst, dst, 4*NBYTES | 533 | ADD dst, dst, 4*NBYTES |
524 | beqz len, .Ldone | 534 | beqz len, .Ldone\@ |
525 | .set noreorder | 535 | .set noreorder |
526 | .Lless_than_4units: | 536 | .Lless_than_4units\@: |
527 | /* | 537 | /* |
528 | * rem = len % NBYTES | 538 | * rem = len % NBYTES |
529 | */ | 539 | */ |
530 | beq rem, len, .Lcopy_bytes | 540 | beq rem, len, .Lcopy_bytes\@ |
531 | nop | 541 | nop |
532 | 1: | 542 | 1: |
533 | LOAD(t0, 0(src), .Ll_exc) | 543 | LOAD(t0, 0(src), .Ll_exc\@) |
534 | ADD src, src, NBYTES | 544 | ADD src, src, NBYTES |
535 | SUB len, len, NBYTES | 545 | SUB len, len, NBYTES |
536 | STORE(t0, 0(dst), .Ls_exc) | 546 | STORE(t0, 0(dst), .Ls_exc\@) |
537 | ADDC(sum, t0) | 547 | ADDC(sum, t0) |
538 | .set reorder /* DADDI_WAR */ | 548 | .set reorder /* DADDI_WAR */ |
539 | ADD dst, dst, NBYTES | 549 | ADD dst, dst, NBYTES |
@@ -552,20 +562,20 @@ FEXPORT(csum_partial_copy_nocheck) | |||
552 | * more instruction-level parallelism. | 562 | * more instruction-level parallelism. |
553 | */ | 563 | */ |
554 | #define bits t2 | 564 | #define bits t2 |
555 | beqz len, .Ldone | 565 | beqz len, .Ldone\@ |
556 | ADD t1, dst, len # t1 is just past last byte of dst | 566 | ADD t1, dst, len # t1 is just past last byte of dst |
557 | li bits, 8*NBYTES | 567 | li bits, 8*NBYTES |
558 | SLL rem, len, 3 # rem = number of bits to keep | 568 | SLL rem, len, 3 # rem = number of bits to keep |
559 | LOAD(t0, 0(src), .Ll_exc) | 569 | LOAD(t0, 0(src), .Ll_exc\@) |
560 | SUB bits, bits, rem # bits = number of bits to discard | 570 | SUB bits, bits, rem # bits = number of bits to discard |
561 | SHIFT_DISCARD t0, t0, bits | 571 | SHIFT_DISCARD t0, t0, bits |
562 | STREST(t0, -1(t1), .Ls_exc) | 572 | STREST(t0, -1(t1), .Ls_exc\@) |
563 | SHIFT_DISCARD_REVERT t0, t0, bits | 573 | SHIFT_DISCARD_REVERT t0, t0, bits |
564 | .set reorder | 574 | .set reorder |
565 | ADDC(sum, t0) | 575 | ADDC(sum, t0) |
566 | b .Ldone | 576 | b .Ldone\@ |
567 | .set noreorder | 577 | .set noreorder |
568 | .Ldst_unaligned: | 578 | .Ldst_unaligned\@: |
569 | /* | 579 | /* |
570 | * dst is unaligned | 580 | * dst is unaligned |
571 | * t0 = src & ADDRMASK | 581 | * t0 = src & ADDRMASK |
@@ -576,25 +586,25 @@ FEXPORT(csum_partial_copy_nocheck) | |||
576 | * Set match = (src and dst have same alignment) | 586 | * Set match = (src and dst have same alignment) |
577 | */ | 587 | */ |
578 | #define match rem | 588 | #define match rem |
579 | LDFIRST(t3, FIRST(0)(src), .Ll_exc) | 589 | LDFIRST(t3, FIRST(0)(src), .Ll_exc\@) |
580 | ADD t2, zero, NBYTES | 590 | ADD t2, zero, NBYTES |
581 | LDREST(t3, REST(0)(src), .Ll_exc_copy) | 591 | LDREST(t3, REST(0)(src), .Ll_exc_copy\@) |
582 | SUB t2, t2, t1 # t2 = number of bytes copied | 592 | SUB t2, t2, t1 # t2 = number of bytes copied |
583 | xor match, t0, t1 | 593 | xor match, t0, t1 |
584 | STFIRST(t3, FIRST(0)(dst), .Ls_exc) | 594 | STFIRST(t3, FIRST(0)(dst), .Ls_exc\@) |
585 | SLL t4, t1, 3 # t4 = number of bits to discard | 595 | SLL t4, t1, 3 # t4 = number of bits to discard |
586 | SHIFT_DISCARD t3, t3, t4 | 596 | SHIFT_DISCARD t3, t3, t4 |
587 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ | 597 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ |
588 | ADDC(sum, t3) | 598 | ADDC(sum, t3) |
589 | beq len, t2, .Ldone | 599 | beq len, t2, .Ldone\@ |
590 | SUB len, len, t2 | 600 | SUB len, len, t2 |
591 | ADD dst, dst, t2 | 601 | ADD dst, dst, t2 |
592 | beqz match, .Lboth_aligned | 602 | beqz match, .Lboth_aligned\@ |
593 | ADD src, src, t2 | 603 | ADD src, src, t2 |
594 | 604 | ||
595 | .Lsrc_unaligned_dst_aligned: | 605 | .Lsrc_unaligned_dst_aligned\@: |
596 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 606 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
597 | beqz t0, .Lcleanup_src_unaligned | 607 | beqz t0, .Lcleanup_src_unaligned\@ |
598 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 608 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
599 | 1: | 609 | 1: |
600 | /* | 610 | /* |
@@ -603,53 +613,53 @@ FEXPORT(csum_partial_copy_nocheck) | |||
603 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 613 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
604 | * are to the same unit (unless src is aligned, but it's not). | 614 | * are to the same unit (unless src is aligned, but it's not). |
605 | */ | 615 | */ |
606 | LDFIRST(t0, FIRST(0)(src), .Ll_exc) | 616 | LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) |
607 | LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy) | 617 | LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@) |
608 | SUB len, len, 4*NBYTES | 618 | SUB len, len, 4*NBYTES |
609 | LDREST(t0, REST(0)(src), .Ll_exc_copy) | 619 | LDREST(t0, REST(0)(src), .Ll_exc_copy\@) |
610 | LDREST(t1, REST(1)(src), .Ll_exc_copy) | 620 | LDREST(t1, REST(1)(src), .Ll_exc_copy\@) |
611 | LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy) | 621 | LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@) |
612 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy) | 622 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@) |
613 | LDREST(t2, REST(2)(src), .Ll_exc_copy) | 623 | LDREST(t2, REST(2)(src), .Ll_exc_copy\@) |
614 | LDREST(t3, REST(3)(src), .Ll_exc_copy) | 624 | LDREST(t3, REST(3)(src), .Ll_exc_copy\@) |
615 | ADD src, src, 4*NBYTES | 625 | ADD src, src, 4*NBYTES |
616 | #ifdef CONFIG_CPU_SB1 | 626 | #ifdef CONFIG_CPU_SB1 |
617 | nop # improves slotting | 627 | nop # improves slotting |
618 | #endif | 628 | #endif |
619 | STORE(t0, UNIT(0)(dst), .Ls_exc) | 629 | STORE(t0, UNIT(0)(dst), .Ls_exc\@) |
620 | ADDC(sum, t0) | 630 | ADDC(sum, t0) |
621 | STORE(t1, UNIT(1)(dst), .Ls_exc) | 631 | STORE(t1, UNIT(1)(dst), .Ls_exc\@) |
622 | ADDC(sum, t1) | 632 | ADDC(sum, t1) |
623 | STORE(t2, UNIT(2)(dst), .Ls_exc) | 633 | STORE(t2, UNIT(2)(dst), .Ls_exc\@) |
624 | ADDC(sum, t2) | 634 | ADDC(sum, t2) |
625 | STORE(t3, UNIT(3)(dst), .Ls_exc) | 635 | STORE(t3, UNIT(3)(dst), .Ls_exc\@) |
626 | ADDC(sum, t3) | 636 | ADDC(sum, t3) |
627 | .set reorder /* DADDI_WAR */ | 637 | .set reorder /* DADDI_WAR */ |
628 | ADD dst, dst, 4*NBYTES | 638 | ADD dst, dst, 4*NBYTES |
629 | bne len, rem, 1b | 639 | bne len, rem, 1b |
630 | .set noreorder | 640 | .set noreorder |
631 | 641 | ||
632 | .Lcleanup_src_unaligned: | 642 | .Lcleanup_src_unaligned\@: |
633 | beqz len, .Ldone | 643 | beqz len, .Ldone\@ |
634 | and rem, len, NBYTES-1 # rem = len % NBYTES | 644 | and rem, len, NBYTES-1 # rem = len % NBYTES |
635 | beq rem, len, .Lcopy_bytes | 645 | beq rem, len, .Lcopy_bytes\@ |
636 | nop | 646 | nop |
637 | 1: | 647 | 1: |
638 | LDFIRST(t0, FIRST(0)(src), .Ll_exc) | 648 | LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) |
639 | LDREST(t0, REST(0)(src), .Ll_exc_copy) | 649 | LDREST(t0, REST(0)(src), .Ll_exc_copy\@) |
640 | ADD src, src, NBYTES | 650 | ADD src, src, NBYTES |
641 | SUB len, len, NBYTES | 651 | SUB len, len, NBYTES |
642 | STORE(t0, 0(dst), .Ls_exc) | 652 | STORE(t0, 0(dst), .Ls_exc\@) |
643 | ADDC(sum, t0) | 653 | ADDC(sum, t0) |
644 | .set reorder /* DADDI_WAR */ | 654 | .set reorder /* DADDI_WAR */ |
645 | ADD dst, dst, NBYTES | 655 | ADD dst, dst, NBYTES |
646 | bne len, rem, 1b | 656 | bne len, rem, 1b |
647 | .set noreorder | 657 | .set noreorder |
648 | 658 | ||
649 | .Lcopy_bytes_checklen: | 659 | .Lcopy_bytes_checklen\@: |
650 | beqz len, .Ldone | 660 | beqz len, .Ldone\@ |
651 | nop | 661 | nop |
652 | .Lcopy_bytes: | 662 | .Lcopy_bytes\@: |
653 | /* 0 < len < NBYTES */ | 663 | /* 0 < len < NBYTES */ |
654 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | 664 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
655 | #define SHIFT_START 0 | 665 | #define SHIFT_START 0 |
@@ -662,12 +672,12 @@ FEXPORT(csum_partial_copy_nocheck) | |||
662 | li t3, SHIFT_START # shift | 672 | li t3, SHIFT_START # shift |
663 | /* use .Ll_exc_copy here to return correct sum on fault */ | 673 | /* use .Ll_exc_copy here to return correct sum on fault */ |
664 | #define COPY_BYTE(N) \ | 674 | #define COPY_BYTE(N) \ |
665 | LOADBU(t0, N(src), .Ll_exc_copy); \ | 675 | LOADBU(t0, N(src), .Ll_exc_copy\@); \ |
666 | SUB len, len, 1; \ | 676 | SUB len, len, 1; \ |
667 | STOREB(t0, N(dst), .Ls_exc); \ | 677 | STOREB(t0, N(dst), .Ls_exc\@); \ |
668 | SLLV t0, t0, t3; \ | 678 | SLLV t0, t0, t3; \ |
669 | addu t3, SHIFT_INC; \ | 679 | addu t3, SHIFT_INC; \ |
670 | beqz len, .Lcopy_bytes_done; \ | 680 | beqz len, .Lcopy_bytes_done\@; \ |
671 | or t2, t0 | 681 | or t2, t0 |
672 | 682 | ||
673 | COPY_BYTE(0) | 683 | COPY_BYTE(0) |
@@ -678,14 +688,14 @@ FEXPORT(csum_partial_copy_nocheck) | |||
678 | COPY_BYTE(4) | 688 | COPY_BYTE(4) |
679 | COPY_BYTE(5) | 689 | COPY_BYTE(5) |
680 | #endif | 690 | #endif |
681 | LOADBU(t0, NBYTES-2(src), .Ll_exc_copy) | 691 | LOADBU(t0, NBYTES-2(src), .Ll_exc_copy\@) |
682 | SUB len, len, 1 | 692 | SUB len, len, 1 |
683 | STOREB(t0, NBYTES-2(dst), .Ls_exc) | 693 | STOREB(t0, NBYTES-2(dst), .Ls_exc\@) |
684 | SLLV t0, t0, t3 | 694 | SLLV t0, t0, t3 |
685 | or t2, t0 | 695 | or t2, t0 |
686 | .Lcopy_bytes_done: | 696 | .Lcopy_bytes_done\@: |
687 | ADDC(sum, t2) | 697 | ADDC(sum, t2) |
688 | .Ldone: | 698 | .Ldone\@: |
689 | /* fold checksum */ | 699 | /* fold checksum */ |
690 | #ifdef USE_DOUBLE | 700 | #ifdef USE_DOUBLE |
691 | dsll32 v1, sum, 0 | 701 | dsll32 v1, sum, 0 |
@@ -714,7 +724,7 @@ FEXPORT(csum_partial_copy_nocheck) | |||
714 | jr ra | 724 | jr ra |
715 | .set noreorder | 725 | .set noreorder |
716 | 726 | ||
717 | .Ll_exc_copy: | 727 | .Ll_exc_copy\@: |
718 | /* | 728 | /* |
719 | * Copy bytes from src until faulting load address (or until a | 729 | * Copy bytes from src until faulting load address (or until a |
720 | * lb faults) | 730 | * lb faults) |
@@ -729,7 +739,7 @@ FEXPORT(csum_partial_copy_nocheck) | |||
729 | li t2, SHIFT_START | 739 | li t2, SHIFT_START |
730 | LOADK t0, THREAD_BUADDR(t0) | 740 | LOADK t0, THREAD_BUADDR(t0) |
731 | 1: | 741 | 1: |
732 | LOADBU(t1, 0(src), .Ll_exc) | 742 | LOADBU(t1, 0(src), .Ll_exc\@) |
733 | ADD src, src, 1 | 743 | ADD src, src, 1 |
734 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 744 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
735 | SLLV t1, t1, t2 | 745 | SLLV t1, t1, t2 |
@@ -739,7 +749,7 @@ FEXPORT(csum_partial_copy_nocheck) | |||
739 | ADD dst, dst, 1 | 749 | ADD dst, dst, 1 |
740 | bne src, t0, 1b | 750 | bne src, t0, 1b |
741 | .set noreorder | 751 | .set noreorder |
742 | .Ll_exc: | 752 | .Ll_exc\@: |
743 | LOADK t0, TI_TASK($28) | 753 | LOADK t0, TI_TASK($28) |
744 | nop | 754 | nop |
745 | LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address | 755 | LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address |
@@ -758,7 +768,7 @@ FEXPORT(csum_partial_copy_nocheck) | |||
758 | */ | 768 | */ |
759 | .set reorder /* DADDI_WAR */ | 769 | .set reorder /* DADDI_WAR */ |
760 | SUB src, len, 1 | 770 | SUB src, len, 1 |
761 | beqz len, .Ldone | 771 | beqz len, .Ldone\@ |
762 | .set noreorder | 772 | .set noreorder |
763 | 1: sb zero, 0(dst) | 773 | 1: sb zero, 0(dst) |
764 | ADD dst, dst, 1 | 774 | ADD dst, dst, 1 |
@@ -773,13 +783,19 @@ FEXPORT(csum_partial_copy_nocheck) | |||
773 | SUB src, src, v1 | 783 | SUB src, src, v1 |
774 | #endif | 784 | #endif |
775 | li v1, -EFAULT | 785 | li v1, -EFAULT |
776 | b .Ldone | 786 | b .Ldone\@ |
777 | sw v1, (errptr) | 787 | sw v1, (errptr) |
778 | 788 | ||
779 | .Ls_exc: | 789 | .Ls_exc\@: |
780 | li v0, -1 /* invalid checksum */ | 790 | li v0, -1 /* invalid checksum */ |
781 | li v1, -EFAULT | 791 | li v1, -EFAULT |
782 | jr ra | 792 | jr ra |
783 | sw v1, (errptr) | 793 | sw v1, (errptr) |
784 | .set pop | 794 | .set pop |
785 | END(__csum_partial_copy_kernel) | 795 | .endm |
796 | |||
797 | LEAF(__csum_partial_copy_kernel) | ||
798 | FEXPORT(__csum_partial_copy_to_user) | ||
799 | FEXPORT(__csum_partial_copy_from_user) | ||
800 | __BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1 | ||
801 | END(__csum_partial_copy_kernel) | ||