aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/lib/copyuser_power7.S54
-rw-r--r--arch/powerpc/lib/memcpy_power7.S55
2 files changed, 63 insertions, 46 deletions
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index d1f11795a7ad..e8e9c36dc784 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -19,6 +19,14 @@
19 */ 19 */
20#include <asm/ppc_asm.h> 20#include <asm/ppc_asm.h>
21 21
22#ifdef __BIG_ENDIAN__
23#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
24#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
25#else
26#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
27#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
28#endif
29
22 .macro err1 30 .macro err1
23100: 31100:
24 .section __ex_table,"a" 32 .section __ex_table,"a"
@@ -552,13 +560,13 @@ err3; stw r7,4(r3)
552 li r10,32 560 li r10,32
553 li r11,48 561 li r11,48
554 562
555 lvsl vr16,0,r4 /* Setup permute control vector */ 563 LVS(vr16,0,r4) /* Setup permute control vector */
556err3; lvx vr0,0,r4 564err3; lvx vr0,0,r4
557 addi r4,r4,16 565 addi r4,r4,16
558 566
559 bf cr7*4+3,5f 567 bf cr7*4+3,5f
560err3; lvx vr1,r0,r4 568err3; lvx vr1,r0,r4
561 vperm vr8,vr0,vr1,vr16 569 VPERM(vr8,vr0,vr1,vr16)
562 addi r4,r4,16 570 addi r4,r4,16
563err3; stvx vr8,r0,r3 571err3; stvx vr8,r0,r3
564 addi r3,r3,16 572 addi r3,r3,16
@@ -566,9 +574,9 @@ err3; stvx vr8,r0,r3
566 574
5675: bf cr7*4+2,6f 5755: bf cr7*4+2,6f
568err3; lvx vr1,r0,r4 576err3; lvx vr1,r0,r4
569 vperm vr8,vr0,vr1,vr16 577 VPERM(vr8,vr0,vr1,vr16)
570err3; lvx vr0,r4,r9 578err3; lvx vr0,r4,r9
571 vperm vr9,vr1,vr0,vr16 579 VPERM(vr9,vr1,vr0,vr16)
572 addi r4,r4,32 580 addi r4,r4,32
573err3; stvx vr8,r0,r3 581err3; stvx vr8,r0,r3
574err3; stvx vr9,r3,r9 582err3; stvx vr9,r3,r9
@@ -576,13 +584,13 @@ err3; stvx vr9,r3,r9
576 584
5776: bf cr7*4+1,7f 5856: bf cr7*4+1,7f
578err3; lvx vr3,r0,r4 586err3; lvx vr3,r0,r4
579 vperm vr8,vr0,vr3,vr16 587 VPERM(vr8,vr0,vr3,vr16)
580err3; lvx vr2,r4,r9 588err3; lvx vr2,r4,r9
581 vperm vr9,vr3,vr2,vr16 589 VPERM(vr9,vr3,vr2,vr16)
582err3; lvx vr1,r4,r10 590err3; lvx vr1,r4,r10
583 vperm vr10,vr2,vr1,vr16 591 VPERM(vr10,vr2,vr1,vr16)
584err3; lvx vr0,r4,r11 592err3; lvx vr0,r4,r11
585 vperm vr11,vr1,vr0,vr16 593 VPERM(vr11,vr1,vr0,vr16)
586 addi r4,r4,64 594 addi r4,r4,64
587err3; stvx vr8,r0,r3 595err3; stvx vr8,r0,r3
588err3; stvx vr9,r3,r9 596err3; stvx vr9,r3,r9
@@ -611,21 +619,21 @@ err3; stvx vr11,r3,r11
611 .align 5 619 .align 5
6128: 6208:
613err4; lvx vr7,r0,r4 621err4; lvx vr7,r0,r4
614 vperm vr8,vr0,vr7,vr16 622 VPERM(vr8,vr0,vr7,vr16)
615err4; lvx vr6,r4,r9 623err4; lvx vr6,r4,r9
616 vperm vr9,vr7,vr6,vr16 624 VPERM(vr9,vr7,vr6,vr16)
617err4; lvx vr5,r4,r10 625err4; lvx vr5,r4,r10
618 vperm vr10,vr6,vr5,vr16 626 VPERM(vr10,vr6,vr5,vr16)
619err4; lvx vr4,r4,r11 627err4; lvx vr4,r4,r11
620 vperm vr11,vr5,vr4,vr16 628 VPERM(vr11,vr5,vr4,vr16)
621err4; lvx vr3,r4,r12 629err4; lvx vr3,r4,r12
622 vperm vr12,vr4,vr3,vr16 630 VPERM(vr12,vr4,vr3,vr16)
623err4; lvx vr2,r4,r14 631err4; lvx vr2,r4,r14
624 vperm vr13,vr3,vr2,vr16 632 VPERM(vr13,vr3,vr2,vr16)
625err4; lvx vr1,r4,r15 633err4; lvx vr1,r4,r15
626 vperm vr14,vr2,vr1,vr16 634 VPERM(vr14,vr2,vr1,vr16)
627err4; lvx vr0,r4,r16 635err4; lvx vr0,r4,r16
628 vperm vr15,vr1,vr0,vr16 636 VPERM(vr15,vr1,vr0,vr16)
629 addi r4,r4,128 637 addi r4,r4,128
630err4; stvx vr8,r0,r3 638err4; stvx vr8,r0,r3
631err4; stvx vr9,r3,r9 639err4; stvx vr9,r3,r9
@@ -649,13 +657,13 @@ err4; stvx vr15,r3,r16
649 657
650 bf cr7*4+1,9f 658 bf cr7*4+1,9f
651err3; lvx vr3,r0,r4 659err3; lvx vr3,r0,r4
652 vperm vr8,vr0,vr3,vr16 660 VPERM(vr8,vr0,vr3,vr16)
653err3; lvx vr2,r4,r9 661err3; lvx vr2,r4,r9
654 vperm vr9,vr3,vr2,vr16 662 VPERM(vr9,vr3,vr2,vr16)
655err3; lvx vr1,r4,r10 663err3; lvx vr1,r4,r10
656 vperm vr10,vr2,vr1,vr16 664 VPERM(vr10,vr2,vr1,vr16)
657err3; lvx vr0,r4,r11 665err3; lvx vr0,r4,r11
658 vperm vr11,vr1,vr0,vr16 666 VPERM(vr11,vr1,vr0,vr16)
659 addi r4,r4,64 667 addi r4,r4,64
660err3; stvx vr8,r0,r3 668err3; stvx vr8,r0,r3
661err3; stvx vr9,r3,r9 669err3; stvx vr9,r3,r9
@@ -665,9 +673,9 @@ err3; stvx vr11,r3,r11
665 673
6669: bf cr7*4+2,10f 6749: bf cr7*4+2,10f
667err3; lvx vr1,r0,r4 675err3; lvx vr1,r0,r4
668 vperm vr8,vr0,vr1,vr16 676 VPERM(vr8,vr0,vr1,vr16)
669err3; lvx vr0,r4,r9 677err3; lvx vr0,r4,r9
670 vperm vr9,vr1,vr0,vr16 678 VPERM(vr9,vr1,vr0,vr16)
671 addi r4,r4,32 679 addi r4,r4,32
672err3; stvx vr8,r0,r3 680err3; stvx vr8,r0,r3
673err3; stvx vr9,r3,r9 681err3; stvx vr9,r3,r9
@@ -675,7 +683,7 @@ err3; stvx vr9,r3,r9
675 683
67610: bf cr7*4+3,11f 68410: bf cr7*4+3,11f
677err3; lvx vr1,r0,r4 685err3; lvx vr1,r0,r4
678 vperm vr8,vr0,vr1,vr16 686 VPERM(vr8,vr0,vr1,vr16)
679 addi r4,r4,16 687 addi r4,r4,16
680err3; stvx vr8,r0,r3 688err3; stvx vr8,r0,r3
681 addi r3,r3,16 689 addi r3,r3,16
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 0663630baf3b..e4177dbea6bd 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -20,6 +20,15 @@
20#include <asm/ppc_asm.h> 20#include <asm/ppc_asm.h>
21 21
22_GLOBAL(memcpy_power7) 22_GLOBAL(memcpy_power7)
23
24#ifdef __BIG_ENDIAN__
25#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
26#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
27#else
28#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
29#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
30#endif
31
23#ifdef CONFIG_ALTIVEC 32#ifdef CONFIG_ALTIVEC
24 cmpldi r5,16 33 cmpldi r5,16
25 cmpldi cr1,r5,4096 34 cmpldi cr1,r5,4096
@@ -485,13 +494,13 @@ _GLOBAL(memcpy_power7)
485 li r10,32 494 li r10,32
486 li r11,48 495 li r11,48
487 496
488 lvsl vr16,0,r4 /* Setup permute control vector */ 497 LVS(vr16,0,r4) /* Setup permute control vector */
489 lvx vr0,0,r4 498 lvx vr0,0,r4
490 addi r4,r4,16 499 addi r4,r4,16
491 500
492 bf cr7*4+3,5f 501 bf cr7*4+3,5f
493 lvx vr1,r0,r4 502 lvx vr1,r0,r4
494 vperm vr8,vr0,vr1,vr16 503 VPERM(vr8,vr0,vr1,vr16)
495 addi r4,r4,16 504 addi r4,r4,16
496 stvx vr8,r0,r3 505 stvx vr8,r0,r3
497 addi r3,r3,16 506 addi r3,r3,16
@@ -499,9 +508,9 @@ _GLOBAL(memcpy_power7)
499 508
5005: bf cr7*4+2,6f 5095: bf cr7*4+2,6f
501 lvx vr1,r0,r4 510 lvx vr1,r0,r4
502 vperm vr8,vr0,vr1,vr16 511 VPERM(vr8,vr0,vr1,vr16)
503 lvx vr0,r4,r9 512 lvx vr0,r4,r9
504 vperm vr9,vr1,vr0,vr16 513 VPERM(vr9,vr1,vr0,vr16)
505 addi r4,r4,32 514 addi r4,r4,32
506 stvx vr8,r0,r3 515 stvx vr8,r0,r3
507 stvx vr9,r3,r9 516 stvx vr9,r3,r9
@@ -509,13 +518,13 @@ _GLOBAL(memcpy_power7)
509 518
5106: bf cr7*4+1,7f 5196: bf cr7*4+1,7f
511 lvx vr3,r0,r4 520 lvx vr3,r0,r4
512 vperm vr8,vr0,vr3,vr16 521 VPERM(vr8,vr0,vr3,vr16)
513 lvx vr2,r4,r9 522 lvx vr2,r4,r9
514 vperm vr9,vr3,vr2,vr16 523 VPERM(vr9,vr3,vr2,vr16)
515 lvx vr1,r4,r10 524 lvx vr1,r4,r10
516 vperm vr10,vr2,vr1,vr16 525 VPERM(vr10,vr2,vr1,vr16)
517 lvx vr0,r4,r11 526 lvx vr0,r4,r11
518 vperm vr11,vr1,vr0,vr16 527 VPERM(vr11,vr1,vr0,vr16)
519 addi r4,r4,64 528 addi r4,r4,64
520 stvx vr8,r0,r3 529 stvx vr8,r0,r3
521 stvx vr9,r3,r9 530 stvx vr9,r3,r9
@@ -544,21 +553,21 @@ _GLOBAL(memcpy_power7)
544 .align 5 553 .align 5
5458: 5548:
546 lvx vr7,r0,r4 555 lvx vr7,r0,r4
547 vperm vr8,vr0,vr7,vr16 556 VPERM(vr8,vr0,vr7,vr16)
548 lvx vr6,r4,r9 557 lvx vr6,r4,r9
549 vperm vr9,vr7,vr6,vr16 558 VPERM(vr9,vr7,vr6,vr16)
550 lvx vr5,r4,r10 559 lvx vr5,r4,r10
551 vperm vr10,vr6,vr5,vr16 560 VPERM(vr10,vr6,vr5,vr16)
552 lvx vr4,r4,r11 561 lvx vr4,r4,r11
553 vperm vr11,vr5,vr4,vr16 562 VPERM(vr11,vr5,vr4,vr16)
554 lvx vr3,r4,r12 563 lvx vr3,r4,r12
555 vperm vr12,vr4,vr3,vr16 564 VPERM(vr12,vr4,vr3,vr16)
556 lvx vr2,r4,r14 565 lvx vr2,r4,r14
557 vperm vr13,vr3,vr2,vr16 566 VPERM(vr13,vr3,vr2,vr16)
558 lvx vr1,r4,r15 567 lvx vr1,r4,r15
559 vperm vr14,vr2,vr1,vr16 568 VPERM(vr14,vr2,vr1,vr16)
560 lvx vr0,r4,r16 569 lvx vr0,r4,r16
561 vperm vr15,vr1,vr0,vr16 570 VPERM(vr15,vr1,vr0,vr16)
562 addi r4,r4,128 571 addi r4,r4,128
563 stvx vr8,r0,r3 572 stvx vr8,r0,r3
564 stvx vr9,r3,r9 573 stvx vr9,r3,r9
@@ -582,13 +591,13 @@ _GLOBAL(memcpy_power7)
582 591
583 bf cr7*4+1,9f 592 bf cr7*4+1,9f
584 lvx vr3,r0,r4 593 lvx vr3,r0,r4
585 vperm vr8,vr0,vr3,vr16 594 VPERM(vr8,vr0,vr3,vr16)
586 lvx vr2,r4,r9 595 lvx vr2,r4,r9
587 vperm vr9,vr3,vr2,vr16 596 VPERM(vr9,vr3,vr2,vr16)
588 lvx vr1,r4,r10 597 lvx vr1,r4,r10
589 vperm vr10,vr2,vr1,vr16 598 VPERM(vr10,vr2,vr1,vr16)
590 lvx vr0,r4,r11 599 lvx vr0,r4,r11
591 vperm vr11,vr1,vr0,vr16 600 VPERM(vr11,vr1,vr0,vr16)
592 addi r4,r4,64 601 addi r4,r4,64
593 stvx vr8,r0,r3 602 stvx vr8,r0,r3
594 stvx vr9,r3,r9 603 stvx vr9,r3,r9
@@ -598,9 +607,9 @@ _GLOBAL(memcpy_power7)
598 607
5999: bf cr7*4+2,10f 6089: bf cr7*4+2,10f
600 lvx vr1,r0,r4 609 lvx vr1,r0,r4
601 vperm vr8,vr0,vr1,vr16 610 VPERM(vr8,vr0,vr1,vr16)
602 lvx vr0,r4,r9 611 lvx vr0,r4,r9
603 vperm vr9,vr1,vr0,vr16 612 VPERM(vr9,vr1,vr0,vr16)
604 addi r4,r4,32 613 addi r4,r4,32
605 stvx vr8,r0,r3 614 stvx vr8,r0,r3
606 stvx vr9,r3,r9 615 stvx vr9,r3,r9
@@ -608,7 +617,7 @@ _GLOBAL(memcpy_power7)
608 617
60910: bf cr7*4+3,11f 61810: bf cr7*4+3,11f
610 lvx vr1,r0,r4 619 lvx vr1,r0,r4
611 vperm vr8,vr0,vr1,vr16 620 VPERM(vr8,vr0,vr1,vr16)
612 addi r4,r4,16 621 addi r4,r4,16
613 stvx vr8,r0,r3 622 stvx vr8,r0,r3
614 addi r3,r3,16 623 addi r3,r3,16