diff options
author | David Mosberger-Tang <davidm@hpl.hp.com> | 2005-04-25 16:03:16 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-04-25 16:03:16 -0400 |
commit | 30325d17715302a60c9afdaacaafaeb056b7e880 (patch) | |
tree | 8872d59f841a5338e04f15c1183b8fd2ad4bfbb3 /arch/ia64/kernel/entry.S | |
parent | e8d1cb2f280aa53e1c75c8b5fcbf80b3481d0caa (diff) |
[IA64] speed up syscall path a bit more
Recently I noticed that clearing ar.ssd/ar.csd right before srlz.d is
causing significant stalling in the syscall path. The patch below
fixes that by moving the register-writes after srlz.d. On a Madison,
this drops break-based getpid() from 241 to 226 cycles (-15 cycles).
Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/entry.S')
-rw-r--r-- | arch/ia64/kernel/entry.S | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 0272c010a3b..73e23dafe8e 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S | |||
@@ -728,12 +728,8 @@ ENTRY(ia64_leave_syscall) | |||
728 | mov f8=f0 // clear f8 | 728 | mov f8=f0 // clear f8 |
729 | ;; | 729 | ;; |
730 | ld8 r30=[r2],16 // M0|1 load cr.ifs | 730 | ld8 r30=[r2],16 // M0|1 load cr.ifs |
731 | mov.m ar.ssd=r0 // M2 clear ar.ssd | ||
732 | cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs | ||
733 | ;; | ||
734 | ld8 r25=[r3],16 // M0|1 load ar.unat | 731 | ld8 r25=[r3],16 // M0|1 load ar.unat |
735 | mov.m ar.csd=r0 // M2 clear ar.csd | 732 | cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs |
736 | mov r22=r0 // clear r22 | ||
737 | ;; | 733 | ;; |
738 | ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs | 734 | ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs |
739 | (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled | 735 | (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled |
@@ -756,11 +752,15 @@ ENTRY(ia64_leave_syscall) | |||
756 | mov f7=f0 // clear f7 | 752 | mov f7=f0 // clear f7 |
757 | ;; | 753 | ;; |
758 | ld8.fill r12=[r2] // restore r12 (sp) | 754 | ld8.fill r12=[r2] // restore r12 (sp) |
755 | mov.m ar.ssd=r0 // M2 clear ar.ssd | ||
756 | mov r22=r0 // clear r22 | ||
757 | |||
759 | ld8.fill r15=[r3] // restore r15 | 758 | ld8.fill r15=[r3] // restore r15 |
759 | (pUStk) st1 [r14]=r17 | ||
760 | addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 | 760 | addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 |
761 | ;; | 761 | ;; |
762 | (pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8 | 762 | (pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8 |
763 | (pUStk) st1 [r14]=r17 | 763 | mov.m ar.csd=r0 // M2 clear ar.csd |
764 | mov b6=r18 // I0 restore b6 | 764 | mov b6=r18 // I0 restore b6 |
765 | ;; | 765 | ;; |
766 | mov r14=r0 // clear r14 | 766 | mov r14=r0 // clear r14 |