diff options
| author | Markos Chandras <markos.chandras@imgtec.com> | 2014-01-17 05:48:46 -0500 |
|---|---|---|
| committer | Ralf Baechle <ralf@linux-mips.org> | 2014-03-26 18:09:17 -0400 |
| commit | e89fb56c8bcf5514cfe7abd7a3dda9e6007b7238 (patch) | |
| tree | e71494a33e6dad52b0f02b477e9f8aeada7af4e1 /arch/mips/lib | |
| parent | 2ab82e66483798670e129c48c05d7fc8a39ea996 (diff) | |
MIPS: lib: csum_partial: Add macro to build csum_partial symbols
In preparation for EVA support, we use a macro to build the
__csum_partial_copy_user main code so it can be shared across
multiple implementations. EVA uses the same code but it replaces
the load/store/prefetch instructions with the EVA specific ones
therefore using a macro avoids unnecessary code duplications.
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Diffstat (limited to 'arch/mips/lib')
| -rw-r--r-- | arch/mips/lib/csum_partial.S | 200 |
1 files changed, 108 insertions, 92 deletions
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index bff5167b59a6..62c8768a59ce 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S | |||
| @@ -331,6 +331,10 @@ LEAF(csum_partial) | |||
| 331 | /* Instruction type */ | 331 | /* Instruction type */ |
| 332 | #define LD_INSN 1 | 332 | #define LD_INSN 1 |
| 333 | #define ST_INSN 2 | 333 | #define ST_INSN 2 |
| 334 | #define LEGACY_MODE 1 | ||
| 335 | #define EVA_MODE 2 | ||
| 336 | #define USEROP 1 | ||
| 337 | #define KERNELOP 2 | ||
| 334 | 338 | ||
| 335 | /* | 339 | /* |
| 336 | * Wrapper to add an entry in the exception table | 340 | * Wrapper to add an entry in the exception table |
| @@ -343,10 +347,12 @@ LEAF(csum_partial) | |||
| 343 | * handler : Exception handler | 347 | * handler : Exception handler |
| 344 | */ | 348 | */ |
| 345 | #define EXC(insn, type, reg, addr, handler) \ | 349 | #define EXC(insn, type, reg, addr, handler) \ |
| 346 | 9: insn reg, addr; \ | 350 | .if \mode == LEGACY_MODE; \ |
| 347 | .section __ex_table,"a"; \ | 351 | 9: insn reg, addr; \ |
| 348 | PTR 9b, handler; \ | 352 | .section __ex_table,"a"; \ |
| 349 | .previous | 353 | PTR 9b, handler; \ |
| 354 | .previous; \ | ||
| 355 | .endif | ||
| 350 | 356 | ||
| 351 | #undef LOAD | 357 | #undef LOAD |
| 352 | 358 | ||
| @@ -419,16 +425,20 @@ LEAF(csum_partial) | |||
| 419 | .set at=v1 | 425 | .set at=v1 |
| 420 | #endif | 426 | #endif |
| 421 | 427 | ||
| 422 | LEAF(__csum_partial_copy_kernel) | 428 | .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to, __nocheck |
| 423 | FEXPORT(__csum_partial_copy_to_user) | 429 | |
| 424 | FEXPORT(__csum_partial_copy_from_user) | ||
| 425 | PTR_ADDU AT, src, len /* See (1) above. */ | 430 | PTR_ADDU AT, src, len /* See (1) above. */ |
| 431 | /* initialize __nocheck if this the first time we execute this | ||
| 432 | * macro | ||
| 433 | */ | ||
| 426 | #ifdef CONFIG_64BIT | 434 | #ifdef CONFIG_64BIT |
| 427 | move errptr, a4 | 435 | move errptr, a4 |
| 428 | #else | 436 | #else |
| 429 | lw errptr, 16(sp) | 437 | lw errptr, 16(sp) |
| 430 | #endif | 438 | #endif |
| 431 | FEXPORT(csum_partial_copy_nocheck) | 439 | .if \__nocheck == 1 |
| 440 | FEXPORT(csum_partial_copy_nocheck) | ||
| 441 | .endif | ||
| 432 | move sum, zero | 442 | move sum, zero |
| 433 | move odd, zero | 443 | move odd, zero |
| 434 | /* | 444 | /* |
| @@ -444,48 +454,48 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 444 | */ | 454 | */ |
| 445 | sltu t2, len, NBYTES | 455 | sltu t2, len, NBYTES |
| 446 | and t1, dst, ADDRMASK | 456 | and t1, dst, ADDRMASK |
| 447 | bnez t2, .Lcopy_bytes_checklen | 457 | bnez t2, .Lcopy_bytes_checklen\@ |
| 448 | and t0, src, ADDRMASK | 458 | and t0, src, ADDRMASK |
| 449 | andi odd, dst, 0x1 /* odd buffer? */ | 459 | andi odd, dst, 0x1 /* odd buffer? */ |
| 450 | bnez t1, .Ldst_unaligned | 460 | bnez t1, .Ldst_unaligned\@ |
| 451 | nop | 461 | nop |
| 452 | bnez t0, .Lsrc_unaligned_dst_aligned | 462 | bnez t0, .Lsrc_unaligned_dst_aligned\@ |
| 453 | /* | 463 | /* |
| 454 | * use delay slot for fall-through | 464 | * use delay slot for fall-through |
| 455 | * src and dst are aligned; need to compute rem | 465 | * src and dst are aligned; need to compute rem |
| 456 | */ | 466 | */ |
| 457 | .Lboth_aligned: | 467 | .Lboth_aligned\@: |
| 458 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 468 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
| 459 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES | 469 | beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES |
| 460 | nop | 470 | nop |
| 461 | SUB len, 8*NBYTES # subtract here for bgez loop | 471 | SUB len, 8*NBYTES # subtract here for bgez loop |
| 462 | .align 4 | 472 | .align 4 |
| 463 | 1: | 473 | 1: |
| 464 | LOAD(t0, UNIT(0)(src), .Ll_exc) | 474 | LOAD(t0, UNIT(0)(src), .Ll_exc\@) |
| 465 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy) | 475 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) |
| 466 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy) | 476 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) |
| 467 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy) | 477 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) |
| 468 | LOAD(t4, UNIT(4)(src), .Ll_exc_copy) | 478 | LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) |
| 469 | LOAD(t5, UNIT(5)(src), .Ll_exc_copy) | 479 | LOAD(t5, UNIT(5)(src), .Ll_exc_copy\@) |
| 470 | LOAD(t6, UNIT(6)(src), .Ll_exc_copy) | 480 | LOAD(t6, UNIT(6)(src), .Ll_exc_copy\@) |
| 471 | LOAD(t7, UNIT(7)(src), .Ll_exc_copy) | 481 | LOAD(t7, UNIT(7)(src), .Ll_exc_copy\@) |
| 472 | SUB len, len, 8*NBYTES | 482 | SUB len, len, 8*NBYTES |
| 473 | ADD src, src, 8*NBYTES | 483 | ADD src, src, 8*NBYTES |
| 474 | STORE(t0, UNIT(0)(dst), .Ls_exc) | 484 | STORE(t0, UNIT(0)(dst), .Ls_exc\@) |
| 475 | ADDC(sum, t0) | 485 | ADDC(sum, t0) |
| 476 | STORE(t1, UNIT(1)(dst), .Ls_exc) | 486 | STORE(t1, UNIT(1)(dst), .Ls_exc\@) |
| 477 | ADDC(sum, t1) | 487 | ADDC(sum, t1) |
| 478 | STORE(t2, UNIT(2)(dst), .Ls_exc) | 488 | STORE(t2, UNIT(2)(dst), .Ls_exc\@) |
| 479 | ADDC(sum, t2) | 489 | ADDC(sum, t2) |
| 480 | STORE(t3, UNIT(3)(dst), .Ls_exc) | 490 | STORE(t3, UNIT(3)(dst), .Ls_exc\@) |
| 481 | ADDC(sum, t3) | 491 | ADDC(sum, t3) |
| 482 | STORE(t4, UNIT(4)(dst), .Ls_exc) | 492 | STORE(t4, UNIT(4)(dst), .Ls_exc\@) |
| 483 | ADDC(sum, t4) | 493 | ADDC(sum, t4) |
| 484 | STORE(t5, UNIT(5)(dst), .Ls_exc) | 494 | STORE(t5, UNIT(5)(dst), .Ls_exc\@) |
| 485 | ADDC(sum, t5) | 495 | ADDC(sum, t5) |
| 486 | STORE(t6, UNIT(6)(dst), .Ls_exc) | 496 | STORE(t6, UNIT(6)(dst), .Ls_exc\@) |
| 487 | ADDC(sum, t6) | 497 | ADDC(sum, t6) |
| 488 | STORE(t7, UNIT(7)(dst), .Ls_exc) | 498 | STORE(t7, UNIT(7)(dst), .Ls_exc\@) |
| 489 | ADDC(sum, t7) | 499 | ADDC(sum, t7) |
| 490 | .set reorder /* DADDI_WAR */ | 500 | .set reorder /* DADDI_WAR */ |
| 491 | ADD dst, dst, 8*NBYTES | 501 | ADD dst, dst, 8*NBYTES |
| @@ -496,44 +506,44 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 496 | /* | 506 | /* |
| 497 | * len == the number of bytes left to copy < 8*NBYTES | 507 | * len == the number of bytes left to copy < 8*NBYTES |
| 498 | */ | 508 | */ |
| 499 | .Lcleanup_both_aligned: | 509 | .Lcleanup_both_aligned\@: |
| 500 | #define rem t7 | 510 | #define rem t7 |
| 501 | beqz len, .Ldone | 511 | beqz len, .Ldone\@ |
| 502 | sltu t0, len, 4*NBYTES | 512 | sltu t0, len, 4*NBYTES |
| 503 | bnez t0, .Lless_than_4units | 513 | bnez t0, .Lless_than_4units\@ |
| 504 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 514 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
| 505 | /* | 515 | /* |
| 506 | * len >= 4*NBYTES | 516 | * len >= 4*NBYTES |
| 507 | */ | 517 | */ |
| 508 | LOAD(t0, UNIT(0)(src), .Ll_exc) | 518 | LOAD(t0, UNIT(0)(src), .Ll_exc\@) |
| 509 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy) | 519 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) |
| 510 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy) | 520 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) |
| 511 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy) | 521 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) |
| 512 | SUB len, len, 4*NBYTES | 522 | SUB len, len, 4*NBYTES |
| 513 | ADD src, src, 4*NBYTES | 523 | ADD src, src, 4*NBYTES |
| 514 | STORE(t0, UNIT(0)(dst), .Ls_exc) | 524 | STORE(t0, UNIT(0)(dst), .Ls_exc\@) |
| 515 | ADDC(sum, t0) | 525 | ADDC(sum, t0) |
| 516 | STORE(t1, UNIT(1)(dst), .Ls_exc) | 526 | STORE(t1, UNIT(1)(dst), .Ls_exc\@) |
| 517 | ADDC(sum, t1) | 527 | ADDC(sum, t1) |
| 518 | STORE(t2, UNIT(2)(dst), .Ls_exc) | 528 | STORE(t2, UNIT(2)(dst), .Ls_exc\@) |
| 519 | ADDC(sum, t2) | 529 | ADDC(sum, t2) |
| 520 | STORE(t3, UNIT(3)(dst), .Ls_exc) | 530 | STORE(t3, UNIT(3)(dst), .Ls_exc\@) |
| 521 | ADDC(sum, t3) | 531 | ADDC(sum, t3) |
| 522 | .set reorder /* DADDI_WAR */ | 532 | .set reorder /* DADDI_WAR */ |
| 523 | ADD dst, dst, 4*NBYTES | 533 | ADD dst, dst, 4*NBYTES |
| 524 | beqz len, .Ldone | 534 | beqz len, .Ldone\@ |
| 525 | .set noreorder | 535 | .set noreorder |
| 526 | .Lless_than_4units: | 536 | .Lless_than_4units\@: |
| 527 | /* | 537 | /* |
| 528 | * rem = len % NBYTES | 538 | * rem = len % NBYTES |
| 529 | */ | 539 | */ |
| 530 | beq rem, len, .Lcopy_bytes | 540 | beq rem, len, .Lcopy_bytes\@ |
| 531 | nop | 541 | nop |
| 532 | 1: | 542 | 1: |
| 533 | LOAD(t0, 0(src), .Ll_exc) | 543 | LOAD(t0, 0(src), .Ll_exc\@) |
| 534 | ADD src, src, NBYTES | 544 | ADD src, src, NBYTES |
| 535 | SUB len, len, NBYTES | 545 | SUB len, len, NBYTES |
| 536 | STORE(t0, 0(dst), .Ls_exc) | 546 | STORE(t0, 0(dst), .Ls_exc\@) |
| 537 | ADDC(sum, t0) | 547 | ADDC(sum, t0) |
| 538 | .set reorder /* DADDI_WAR */ | 548 | .set reorder /* DADDI_WAR */ |
| 539 | ADD dst, dst, NBYTES | 549 | ADD dst, dst, NBYTES |
| @@ -552,20 +562,20 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 552 | * more instruction-level parallelism. | 562 | * more instruction-level parallelism. |
| 553 | */ | 563 | */ |
| 554 | #define bits t2 | 564 | #define bits t2 |
| 555 | beqz len, .Ldone | 565 | beqz len, .Ldone\@ |
| 556 | ADD t1, dst, len # t1 is just past last byte of dst | 566 | ADD t1, dst, len # t1 is just past last byte of dst |
| 557 | li bits, 8*NBYTES | 567 | li bits, 8*NBYTES |
| 558 | SLL rem, len, 3 # rem = number of bits to keep | 568 | SLL rem, len, 3 # rem = number of bits to keep |
| 559 | LOAD(t0, 0(src), .Ll_exc) | 569 | LOAD(t0, 0(src), .Ll_exc\@) |
| 560 | SUB bits, bits, rem # bits = number of bits to discard | 570 | SUB bits, bits, rem # bits = number of bits to discard |
| 561 | SHIFT_DISCARD t0, t0, bits | 571 | SHIFT_DISCARD t0, t0, bits |
| 562 | STREST(t0, -1(t1), .Ls_exc) | 572 | STREST(t0, -1(t1), .Ls_exc\@) |
| 563 | SHIFT_DISCARD_REVERT t0, t0, bits | 573 | SHIFT_DISCARD_REVERT t0, t0, bits |
| 564 | .set reorder | 574 | .set reorder |
| 565 | ADDC(sum, t0) | 575 | ADDC(sum, t0) |
| 566 | b .Ldone | 576 | b .Ldone\@ |
| 567 | .set noreorder | 577 | .set noreorder |
| 568 | .Ldst_unaligned: | 578 | .Ldst_unaligned\@: |
| 569 | /* | 579 | /* |
| 570 | * dst is unaligned | 580 | * dst is unaligned |
| 571 | * t0 = src & ADDRMASK | 581 | * t0 = src & ADDRMASK |
| @@ -576,25 +586,25 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 576 | * Set match = (src and dst have same alignment) | 586 | * Set match = (src and dst have same alignment) |
| 577 | */ | 587 | */ |
| 578 | #define match rem | 588 | #define match rem |
| 579 | LDFIRST(t3, FIRST(0)(src), .Ll_exc) | 589 | LDFIRST(t3, FIRST(0)(src), .Ll_exc\@) |
| 580 | ADD t2, zero, NBYTES | 590 | ADD t2, zero, NBYTES |
| 581 | LDREST(t3, REST(0)(src), .Ll_exc_copy) | 591 | LDREST(t3, REST(0)(src), .Ll_exc_copy\@) |
| 582 | SUB t2, t2, t1 # t2 = number of bytes copied | 592 | SUB t2, t2, t1 # t2 = number of bytes copied |
| 583 | xor match, t0, t1 | 593 | xor match, t0, t1 |
| 584 | STFIRST(t3, FIRST(0)(dst), .Ls_exc) | 594 | STFIRST(t3, FIRST(0)(dst), .Ls_exc\@) |
| 585 | SLL t4, t1, 3 # t4 = number of bits to discard | 595 | SLL t4, t1, 3 # t4 = number of bits to discard |
| 586 | SHIFT_DISCARD t3, t3, t4 | 596 | SHIFT_DISCARD t3, t3, t4 |
| 587 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ | 597 | /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ |
| 588 | ADDC(sum, t3) | 598 | ADDC(sum, t3) |
| 589 | beq len, t2, .Ldone | 599 | beq len, t2, .Ldone\@ |
| 590 | SUB len, len, t2 | 600 | SUB len, len, t2 |
| 591 | ADD dst, dst, t2 | 601 | ADD dst, dst, t2 |
| 592 | beqz match, .Lboth_aligned | 602 | beqz match, .Lboth_aligned\@ |
| 593 | ADD src, src, t2 | 603 | ADD src, src, t2 |
| 594 | 604 | ||
| 595 | .Lsrc_unaligned_dst_aligned: | 605 | .Lsrc_unaligned_dst_aligned\@: |
| 596 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 606 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
| 597 | beqz t0, .Lcleanup_src_unaligned | 607 | beqz t0, .Lcleanup_src_unaligned\@ |
| 598 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 608 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
| 599 | 1: | 609 | 1: |
| 600 | /* | 610 | /* |
| @@ -603,53 +613,53 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 603 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 613 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
| 604 | * are to the same unit (unless src is aligned, but it's not). | 614 | * are to the same unit (unless src is aligned, but it's not). |
| 605 | */ | 615 | */ |
| 606 | LDFIRST(t0, FIRST(0)(src), .Ll_exc) | 616 | LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) |
| 607 | LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy) | 617 | LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@) |
| 608 | SUB len, len, 4*NBYTES | 618 | SUB len, len, 4*NBYTES |
| 609 | LDREST(t0, REST(0)(src), .Ll_exc_copy) | 619 | LDREST(t0, REST(0)(src), .Ll_exc_copy\@) |
| 610 | LDREST(t1, REST(1)(src), .Ll_exc_copy) | 620 | LDREST(t1, REST(1)(src), .Ll_exc_copy\@) |
| 611 | LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy) | 621 | LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@) |
| 612 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy) | 622 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@) |
| 613 | LDREST(t2, REST(2)(src), .Ll_exc_copy) | 623 | LDREST(t2, REST(2)(src), .Ll_exc_copy\@) |
| 614 | LDREST(t3, REST(3)(src), .Ll_exc_copy) | 624 | LDREST(t3, REST(3)(src), .Ll_exc_copy\@) |
| 615 | ADD src, src, 4*NBYTES | 625 | ADD src, src, 4*NBYTES |
| 616 | #ifdef CONFIG_CPU_SB1 | 626 | #ifdef CONFIG_CPU_SB1 |
| 617 | nop # improves slotting | 627 | nop # improves slotting |
| 618 | #endif | 628 | #endif |
| 619 | STORE(t0, UNIT(0)(dst), .Ls_exc) | 629 | STORE(t0, UNIT(0)(dst), .Ls_exc\@) |
| 620 | ADDC(sum, t0) | 630 | ADDC(sum, t0) |
| 621 | STORE(t1, UNIT(1)(dst), .Ls_exc) | 631 | STORE(t1, UNIT(1)(dst), .Ls_exc\@) |
| 622 | ADDC(sum, t1) | 632 | ADDC(sum, t1) |
| 623 | STORE(t2, UNIT(2)(dst), .Ls_exc) | 633 | STORE(t2, UNIT(2)(dst), .Ls_exc\@) |
| 624 | ADDC(sum, t2) | 634 | ADDC(sum, t2) |
| 625 | STORE(t3, UNIT(3)(dst), .Ls_exc) | 635 | STORE(t3, UNIT(3)(dst), .Ls_exc\@) |
| 626 | ADDC(sum, t3) | 636 | ADDC(sum, t3) |
| 627 | .set reorder /* DADDI_WAR */ | 637 | .set reorder /* DADDI_WAR */ |
| 628 | ADD dst, dst, 4*NBYTES | 638 | ADD dst, dst, 4*NBYTES |
| 629 | bne len, rem, 1b | 639 | bne len, rem, 1b |
| 630 | .set noreorder | 640 | .set noreorder |
| 631 | 641 | ||
| 632 | .Lcleanup_src_unaligned: | 642 | .Lcleanup_src_unaligned\@: |
| 633 | beqz len, .Ldone | 643 | beqz len, .Ldone\@ |
| 634 | and rem, len, NBYTES-1 # rem = len % NBYTES | 644 | and rem, len, NBYTES-1 # rem = len % NBYTES |
| 635 | beq rem, len, .Lcopy_bytes | 645 | beq rem, len, .Lcopy_bytes\@ |
| 636 | nop | 646 | nop |
| 637 | 1: | 647 | 1: |
| 638 | LDFIRST(t0, FIRST(0)(src), .Ll_exc) | 648 | LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) |
| 639 | LDREST(t0, REST(0)(src), .Ll_exc_copy) | 649 | LDREST(t0, REST(0)(src), .Ll_exc_copy\@) |
| 640 | ADD src, src, NBYTES | 650 | ADD src, src, NBYTES |
| 641 | SUB len, len, NBYTES | 651 | SUB len, len, NBYTES |
| 642 | STORE(t0, 0(dst), .Ls_exc) | 652 | STORE(t0, 0(dst), .Ls_exc\@) |
| 643 | ADDC(sum, t0) | 653 | ADDC(sum, t0) |
| 644 | .set reorder /* DADDI_WAR */ | 654 | .set reorder /* DADDI_WAR */ |
| 645 | ADD dst, dst, NBYTES | 655 | ADD dst, dst, NBYTES |
| 646 | bne len, rem, 1b | 656 | bne len, rem, 1b |
| 647 | .set noreorder | 657 | .set noreorder |
| 648 | 658 | ||
| 649 | .Lcopy_bytes_checklen: | 659 | .Lcopy_bytes_checklen\@: |
| 650 | beqz len, .Ldone | 660 | beqz len, .Ldone\@ |
| 651 | nop | 661 | nop |
| 652 | .Lcopy_bytes: | 662 | .Lcopy_bytes\@: |
| 653 | /* 0 < len < NBYTES */ | 663 | /* 0 < len < NBYTES */ |
| 654 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | 664 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
| 655 | #define SHIFT_START 0 | 665 | #define SHIFT_START 0 |
| @@ -662,12 +672,12 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 662 | li t3, SHIFT_START # shift | 672 | li t3, SHIFT_START # shift |
| 663 | /* use .Ll_exc_copy here to return correct sum on fault */ | 673 | /* use .Ll_exc_copy here to return correct sum on fault */ |
| 664 | #define COPY_BYTE(N) \ | 674 | #define COPY_BYTE(N) \ |
| 665 | LOADBU(t0, N(src), .Ll_exc_copy); \ | 675 | LOADBU(t0, N(src), .Ll_exc_copy\@); \ |
| 666 | SUB len, len, 1; \ | 676 | SUB len, len, 1; \ |
| 667 | STOREB(t0, N(dst), .Ls_exc); \ | 677 | STOREB(t0, N(dst), .Ls_exc\@); \ |
| 668 | SLLV t0, t0, t3; \ | 678 | SLLV t0, t0, t3; \ |
| 669 | addu t3, SHIFT_INC; \ | 679 | addu t3, SHIFT_INC; \ |
| 670 | beqz len, .Lcopy_bytes_done; \ | 680 | beqz len, .Lcopy_bytes_done\@; \ |
| 671 | or t2, t0 | 681 | or t2, t0 |
| 672 | 682 | ||
| 673 | COPY_BYTE(0) | 683 | COPY_BYTE(0) |
| @@ -678,14 +688,14 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 678 | COPY_BYTE(4) | 688 | COPY_BYTE(4) |
| 679 | COPY_BYTE(5) | 689 | COPY_BYTE(5) |
| 680 | #endif | 690 | #endif |
| 681 | LOADBU(t0, NBYTES-2(src), .Ll_exc_copy) | 691 | LOADBU(t0, NBYTES-2(src), .Ll_exc_copy\@) |
| 682 | SUB len, len, 1 | 692 | SUB len, len, 1 |
| 683 | STOREB(t0, NBYTES-2(dst), .Ls_exc) | 693 | STOREB(t0, NBYTES-2(dst), .Ls_exc\@) |
| 684 | SLLV t0, t0, t3 | 694 | SLLV t0, t0, t3 |
| 685 | or t2, t0 | 695 | or t2, t0 |
| 686 | .Lcopy_bytes_done: | 696 | .Lcopy_bytes_done\@: |
| 687 | ADDC(sum, t2) | 697 | ADDC(sum, t2) |
| 688 | .Ldone: | 698 | .Ldone\@: |
| 689 | /* fold checksum */ | 699 | /* fold checksum */ |
| 690 | #ifdef USE_DOUBLE | 700 | #ifdef USE_DOUBLE |
| 691 | dsll32 v1, sum, 0 | 701 | dsll32 v1, sum, 0 |
| @@ -714,7 +724,7 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 714 | jr ra | 724 | jr ra |
| 715 | .set noreorder | 725 | .set noreorder |
| 716 | 726 | ||
| 717 | .Ll_exc_copy: | 727 | .Ll_exc_copy\@: |
| 718 | /* | 728 | /* |
| 719 | * Copy bytes from src until faulting load address (or until a | 729 | * Copy bytes from src until faulting load address (or until a |
| 720 | * lb faults) | 730 | * lb faults) |
| @@ -729,7 +739,7 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 729 | li t2, SHIFT_START | 739 | li t2, SHIFT_START |
| 730 | LOADK t0, THREAD_BUADDR(t0) | 740 | LOADK t0, THREAD_BUADDR(t0) |
| 731 | 1: | 741 | 1: |
| 732 | LOADBU(t1, 0(src), .Ll_exc) | 742 | LOADBU(t1, 0(src), .Ll_exc\@) |
| 733 | ADD src, src, 1 | 743 | ADD src, src, 1 |
| 734 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 744 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
| 735 | SLLV t1, t1, t2 | 745 | SLLV t1, t1, t2 |
| @@ -739,7 +749,7 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 739 | ADD dst, dst, 1 | 749 | ADD dst, dst, 1 |
| 740 | bne src, t0, 1b | 750 | bne src, t0, 1b |
| 741 | .set noreorder | 751 | .set noreorder |
| 742 | .Ll_exc: | 752 | .Ll_exc\@: |
| 743 | LOADK t0, TI_TASK($28) | 753 | LOADK t0, TI_TASK($28) |
| 744 | nop | 754 | nop |
| 745 | LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address | 755 | LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address |
| @@ -758,7 +768,7 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 758 | */ | 768 | */ |
| 759 | .set reorder /* DADDI_WAR */ | 769 | .set reorder /* DADDI_WAR */ |
| 760 | SUB src, len, 1 | 770 | SUB src, len, 1 |
| 761 | beqz len, .Ldone | 771 | beqz len, .Ldone\@ |
| 762 | .set noreorder | 772 | .set noreorder |
| 763 | 1: sb zero, 0(dst) | 773 | 1: sb zero, 0(dst) |
| 764 | ADD dst, dst, 1 | 774 | ADD dst, dst, 1 |
| @@ -773,13 +783,19 @@ FEXPORT(csum_partial_copy_nocheck) | |||
| 773 | SUB src, src, v1 | 783 | SUB src, src, v1 |
| 774 | #endif | 784 | #endif |
| 775 | li v1, -EFAULT | 785 | li v1, -EFAULT |
| 776 | b .Ldone | 786 | b .Ldone\@ |
| 777 | sw v1, (errptr) | 787 | sw v1, (errptr) |
| 778 | 788 | ||
| 779 | .Ls_exc: | 789 | .Ls_exc\@: |
| 780 | li v0, -1 /* invalid checksum */ | 790 | li v0, -1 /* invalid checksum */ |
| 781 | li v1, -EFAULT | 791 | li v1, -EFAULT |
| 782 | jr ra | 792 | jr ra |
| 783 | sw v1, (errptr) | 793 | sw v1, (errptr) |
| 784 | .set pop | 794 | .set pop |
| 785 | END(__csum_partial_copy_kernel) | 795 | .endm |
| 796 | |||
| 797 | LEAF(__csum_partial_copy_kernel) | ||
| 798 | FEXPORT(__csum_partial_copy_to_user) | ||
| 799 | FEXPORT(__csum_partial_copy_from_user) | ||
| 800 | __BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP 1 | ||
| 801 | END(__csum_partial_copy_kernel) | ||
