diff options
author | David Mosberger-Tang <davidm@hpl.hp.com> | 2005-04-28 00:15:13 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-04-28 00:15:13 -0400 |
commit | 3c79c8b1d92a9ae3edf3cbcd2c43c553ee0f1d83 (patch) | |
tree | 50a6d59383a15d92aa054371a247112e3e539bc5 /arch/ia64 | |
parent | 9ec1a7ad434fa951ff845dbee3397cf6ad4f76df (diff) |
[IA64] Schedule fp-clearing insns at least 6 cycles after reading ar.bsp.
Decreases syscall overhead by approximately 6 cycles.
Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64')
-rw-r--r-- | arch/ia64/kernel/entry.S | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index c0f28339d58b..0c84bed1bda9 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S | |||
@@ -705,15 +705,15 @@ ENTRY(ia64_leave_syscall) | |||
705 | // start restoring the state saved on the kernel stack (struct pt_regs): | 705 | // start restoring the state saved on the kernel stack (struct pt_regs): |
706 | ld8 r9=[r2],PT(CR_IPSR)-PT(R9) | 706 | ld8 r9=[r2],PT(CR_IPSR)-PT(R9) |
707 | ld8 r11=[r3],PT(CR_IIP)-PT(R11) | 707 | ld8 r11=[r3],PT(CR_IIP)-PT(R11) |
708 | mov f6=f0 // clear f6 | 708 | nop.i 0 |
709 | ;; | 709 | ;; |
710 | invala // M0|1 invalidate ALAT | 710 | invala // M0|1 invalidate ALAT |
711 | rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection | 711 | rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection |
712 | mov f9=f0 // clear f9 | 712 | nop.i 0 |
713 | 713 | ||
714 | ld8 r29=[r2],16 // load cr.ipsr | 714 | ld8 r29=[r2],16 // load cr.ipsr |
715 | ld8 r28=[r3],16 // load cr.iip | 715 | ld8 r28=[r3],16 // load cr.iip |
716 | mov f8=f0 // clear f8 | 716 | mov r22=r0 // clear r22 |
717 | ;; | 717 | ;; |
718 | ld8 r30=[r2],16 // M0|1 load cr.ifs | 718 | ld8 r30=[r2],16 // M0|1 load cr.ifs |
719 | ld8 r25=[r3],16 // M0|1 load ar.unat | 719 | ld8 r25=[r3],16 // M0|1 load ar.unat |
@@ -721,15 +721,15 @@ ENTRY(ia64_leave_syscall) | |||
721 | ;; | 721 | ;; |
722 | ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs | 722 | ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs |
723 | (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled | 723 | (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled |
724 | mov f10=f0 // clear f10 | 724 | (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 |
725 | ;; | 725 | ;; |
726 | ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0 | 726 | ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0 |
727 | ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc | 727 | ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc |
728 | mov f11=f0 // clear f11 | 728 | mov f6=f0 // clear f6 |
729 | ;; | 729 | ;; |
730 | ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage) | 730 | ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage) |
731 | ld8 r31=[r3],PT(R1)-PT(PR) // load predicates | 731 | ld8 r31=[r3],PT(R1)-PT(PR) // load predicates |
732 | (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 | 732 | mov f7=f0 // clear f7 |
733 | ;; | 733 | ;; |
734 | ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr | 734 | ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr |
735 | ld8.fill r1=[r3],16 // load r1 | 735 | ld8.fill r1=[r3],16 // load r1 |
@@ -737,24 +737,29 @@ ENTRY(ia64_leave_syscall) | |||
737 | ;; | 737 | ;; |
738 | srlz.d // M0 ensure interruption collection is off | 738 | srlz.d // M0 ensure interruption collection is off |
739 | ld8.fill r13=[r3],16 | 739 | ld8.fill r13=[r3],16 |
740 | mov f7=f0 // clear f7 | 740 | mov f8=f0 // clear f8 |
741 | ;; | 741 | ;; |
742 | ld8.fill r12=[r2] // restore r12 (sp) | 742 | ld8.fill r12=[r2] // restore r12 (sp) |
743 | mov.m ar.ssd=r0 // M2 clear ar.ssd | 743 | mov.m ar.ssd=r0 // M2 clear ar.ssd |
744 | mov r22=r0 // clear r22 | 744 | mov b6=r18 // I0 restore b6 |
745 | 745 | ||
746 | nop.m 0 | ||
747 | mov f9=f0 // clear f9 | ||
748 | shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition | ||
749 | ;; | ||
746 | ld8.fill r15=[r3] // restore r15 | 750 | ld8.fill r15=[r3] // restore r15 |
747 | (pUStk) st1 [r14]=r17 | 751 | (pUStk) st1 [r14]=r17 |
748 | addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 | 752 | addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 |
749 | ;; | 753 | ;; |
750 | (pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8 | 754 | (pUStk) ld4 r17=[r3] // r17 = cpu_data->phys_stacked_size_p8 |
751 | mov.m ar.csd=r0 // M2 clear ar.csd | 755 | mov.m ar.csd=r0 // M2 clear ar.csd |
752 | mov b6=r18 // I0 restore b6 | 756 | mov f10=f0 // clear f10 |
753 | ;; | 757 | ;; |
754 | mov r14=r0 // clear r14 | 758 | mov r14=r0 // clear r14 |
755 | shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition | 759 | mov f11=f0 // clear f11 |
756 | (pKStk) br.cond.dpnt.many skip_rbs_switch | 760 | (pKStk) br.cond.dpnt.many skip_rbs_switch |
757 | 761 | ||
762 | |||
758 | mov.m ar.ccv=r0 // clear ar.ccv | 763 | mov.m ar.ccv=r0 // clear ar.ccv |
759 | (pNonSys) br.cond.dpnt.many dont_preserve_current_frame | 764 | (pNonSys) br.cond.dpnt.many dont_preserve_current_frame |
760 | br.cond.sptk.many rbs_switch | 765 | br.cond.sptk.many rbs_switch |