aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Mosberger-Tang <davidm@hpl.hp.com>2005-04-28 00:19:04 -0400
committerTony Luck <tony.luck@intel.com>2005-04-28 00:19:04 -0400
commitf8fa5448fc9b4a7806b1297a0b57808f12fe4d43 (patch)
treeb32f9939ee3b72010e4c3255d58278f85c547a7e
parentc03f058fbf685f2ff630095d2c1e98d331b81e82 (diff)
[IA64] Reschedule break_fault() for better performance.
This patch reorganizes break_fault() to optimistically assume that a system-call is being performed from user-space (which is almost always the case). If it turns out that (a) we're not being called due to a system call or (b) we're being called from within the kernel, we fixup the no-longer-valid assumptions in non_syscall() and .break_fixup(), respectively. With this approach, there are 3 major phases: - Phase 1: Read various control & application registers, in particular the current task pointer from AR.K6. - Phase 2: Do all memory loads (load system-call entry, load current_thread_info()->flags, prefetch kernel register-backing store) and switch to kernel register-stack. - Phase 3: Call ia64_syscall_setup() and invoke syscall-handler. Good for 26-30 cycles of improvement on break-based syscall-path. Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/kernel/ivt.S173
1 files changed, 106 insertions, 67 deletions
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index dc78c356ddd0..386087edabf2 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -687,82 +687,118 @@ ENTRY(break_fault)
687 * to prevent leaking bits from kernel to user level. 687 * to prevent leaking bits from kernel to user level.
688 */ 688 */
689 DBG_FAULT(11) 689 DBG_FAULT(11)
690 mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat. 690 mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc)
691 mov r17=cr.iim 691 mov r29=cr.ipsr // M2 (12 cyc)
692 mov r18=__IA64_BREAK_SYSCALL 692 mov r31=pr // I0 (2 cyc)
693 mov r21=ar.fpsr 693
694 mov r29=cr.ipsr 694 mov r17=cr.iim // M2 (2 cyc)
695 mov r19=b6 695 mov.m r27=ar.rsc // M2 (12 cyc)
696 mov r25=ar.unat 696 mov r18=__IA64_BREAK_SYSCALL // A
697 mov r27=ar.rsc 697
698 mov r26=ar.pfs 698 mov.m ar.rsc=0 // M2
699 mov r28=cr.iip 699 mov.m r21=ar.fpsr // M2 (12 cyc)
700 mov r31=pr // prepare to save predicates 700 mov r19=b6 // I0 (2 cyc)
701 mov r20=r1 701 ;;
702 ;; 702 mov.m r23=ar.bspstore // M2 (12 cyc)
703 mov.m r24=ar.rnat // M2 (5 cyc)
704 mov.i r26=ar.pfs // I0 (2 cyc)
705
706 invala // M0|1
707 nop.m 0 // M
708 mov r20=r1 // A save r1
709
710 nop.m 0
711 movl r30=sys_call_table // X
712
713 mov r28=cr.iip // M2 (2 cyc)
714 cmp.eq p0,p7=r18,r17 // I0 is this a system call?
715(p7) br.cond.spnt non_syscall // B no ->
716 //
717 // From this point on, we are definitely on the syscall-path
718 // and we can use (non-banked) scratch registers.
719 //
720///////////////////////////////////////////////////////////////////////
721 mov r1=r16 // A move task-pointer to "addl"-addressable reg
722 mov r2=r16 // A setup r2 for ia64_syscall_setup
723 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 = &current_thread_info()->flags
724
703 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 725 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
704 cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so) 726 adds r15=-1024,r15 // A subtract 1024 from syscall number
705(p7) br.cond.spnt non_syscall 727 mov r3=NR_syscalls - 1
706 ;; 728 ;;
707 ld1 r17=[r16] // load current->thread.on_ustack flag 729 ld1.bias r17=[r16] // M0|1 r17 = current->thread.on_ustack flag
708 st1 [r16]=r0 // clear current->thread.on_ustack flag 730 ld4 r9=[r9] // M0|1 r9 = current_thread_info()->flags
709 add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT 731 extr.u r8=r29,41,2 // I0 extract ei field from cr.ipsr
732
733 shladd r30=r15,3,r30 // A r30 = sys_call_table + 8*(syscall-1024)
734 addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS
735 cmp.leu p6,p7=r15,r3 // A syscall number in range?
710 ;; 736 ;;
711 invala
712 737
713 /* adjust return address so we skip over the break instruction: */ 738 lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS
739(p6) ld8 r30=[r30] // M0|1 load address of syscall entry point
740 tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT?
714 741
715 extr.u r8=r29,41,2 // extract ei field from cr.ipsr 742 mov.m ar.bspstore=r22 // M2 switch to kernel RBS
716 ;; 743 cmp.eq p8,p9=2,r8 // A isr.ei==2?
717 cmp.eq p6,p7=2,r8 // isr.ei==2?
718 mov r2=r1 // setup r2 for ia64_syscall_setup
719 ;;
720(p6) mov r8=0 // clear ei to 0
721(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped
722(p7) adds r8=1,r8 // increment ei to next slot
723 ;;
724 cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already?
725 dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
726 ;; 744 ;;
727 745
728 // switch from user to kernel RBS: 746(p8) mov r8=0 // A clear ei to 0
729 MINSTATE_START_SAVE_MIN_VIRT 747(p7) movl r30=sys_ni_syscall // X
730 br.call.sptk.many b7=ia64_syscall_setup
731 ;;
732 MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
733 ssm psr.ic | PSR_DEFAULT_BITS
734 ;;
735 srlz.i // guarantee that interruption collection is on
736 mov r3=NR_syscalls - 1
737 ;;
738(p15) ssm psr.i // restore psr.i
739 // p10==true means out registers are more than 8 or r15's Nat is true
740(p10) br.cond.spnt.many ia64_ret_from_syscall
741 ;;
742 movl r16=sys_call_table
743 748
744 adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 749(p8) adds r28=16,r28 // A switch cr.iip to next bundle
745 movl r2=ia64_ret_from_syscall 750(p9) adds r8=1,r8 // A increment ei to next slot
746 ;; 751 nop.i 0
747 shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
748 cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ?
749 mov rp=r2 // set the real return addr
750 ;; 752 ;;
751(p6) ld8 r20=[r20] // load address of syscall entry point
752(p7) movl r20=sys_ni_syscall
753 753
754 add r2=TI_FLAGS+IA64_TASK_SIZE,r13 754 mov.m r25=ar.unat // M2 (5 cyc)
755 ;; 755 dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr
756 ld4 r2=[r2] // r2 = current_thread_info()->flags 756 adds r15=1024,r15 // A restore original syscall number
757 ;; 757 //
758 and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit 758 // If any of the above loads miss in L1D, we'll stall here until
759 // the data arrives.
760 //
761///////////////////////////////////////////////////////////////////////
762 st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
763 mov b6=r30 // I0 setup syscall handler branch reg early
764 cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already?
765
766 and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit
767 mov r18=ar.bsp // M2 (12 cyc)
768(pKStk) br.cond.spnt .break_fixup // B we're already in kernel-mode -- fix up RBS
769 ;;
770.back_from_break_fixup:
771(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute base of memory stack
772 cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
773 br.call.sptk.many b7=ia64_syscall_setup // B
7741:
775 mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
776 nop 0
777 bsw.1 // B (6 cyc) regs are saved, switch to bank 1
759 ;; 778 ;;
760 cmp.eq p8,p0=r2,r0 779
761 mov b6=r20 780 ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection
781 movl r3=ia64_ret_from_syscall // X
762 ;; 782 ;;
763(p8) br.call.sptk.many b6=b6 // ignore this return addr 783
764 br.cond.sptk ia64_trace_syscall 784 srlz.i // M0 ensure interruption collection is on
785 mov rp=r3 // I0 set the real return addr
786(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
787
788(p15) ssm psr.i // M2 restore psr.i
789(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr)
790 br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic
765 // NOT REACHED 791 // NOT REACHED
792///////////////////////////////////////////////////////////////////////
793 // On entry, we optimistically assumed that we're coming from user-space.
794 // For the rare cases where a system-call is done from within the kernel,
795 // we fix things up at this point:
796.break_fixup:
797 add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for pt_regs structure
798 mov ar.rnat=r24 // M2 restore kernel's AR.RNAT
799 ;;
800 mov ar.bspstore=r23 // M2 restore kernel's AR.BSPSTORE
801 br.cond.sptk .back_from_break_fixup
766END(break_fault) 802END(break_fault)
767 803
768 .org ia64_ivt+0x3000 804 .org ia64_ivt+0x3000
@@ -837,8 +873,6 @@ END(interrupt)
837 * - r31: saved pr 873 * - r31: saved pr
838 * - b0: original contents (to be saved) 874 * - b0: original contents (to be saved)
839 * On exit: 875 * On exit:
840 * - executing on bank 1 registers
841 * - psr.ic enabled, interrupts restored
842 * - p10: TRUE if syscall is invoked with more than 8 out 876 * - p10: TRUE if syscall is invoked with more than 8 out
843 * registers or r15's Nat is true 877 * registers or r15's Nat is true
844 * - r1: kernel's gp 878 * - r1: kernel's gp
@@ -846,8 +880,11 @@ END(interrupt)
846 * - r8: -EINVAL if p10 is true 880 * - r8: -EINVAL if p10 is true
847 * - r12: points to kernel stack 881 * - r12: points to kernel stack
848 * - r13: points to current task 882 * - r13: points to current task
883 * - r14: preserved (same as on entry)
884 * - p13: preserved
849 * - p15: TRUE if interrupts need to be re-enabled 885 * - p15: TRUE if interrupts need to be re-enabled
850 * - ar.fpsr: set to kernel settings 886 * - ar.fpsr: set to kernel settings
887 * - b6: preserved (same as on entry)
851 */ 888 */
852GLOBAL_ENTRY(ia64_syscall_setup) 889GLOBAL_ENTRY(ia64_syscall_setup)
853#if PT(B6) != 0 890#if PT(B6) != 0
@@ -915,7 +952,7 @@ GLOBAL_ENTRY(ia64_syscall_setup)
915(p13) mov in5=-1 952(p13) mov in5=-1
916 ;; 953 ;;
917 st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr 954 st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
918 tnat.nz p14,p0=in6 955 tnat.nz p13,p0=in6
919 cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 956 cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8
920 ;; 957 ;;
921 mov r8=1 958 mov r8=1
@@ -930,7 +967,7 @@ GLOBAL_ENTRY(ia64_syscall_setup)
930 movl r1=__gp // establish kernel global pointer 967 movl r1=__gp // establish kernel global pointer
931 ;; 968 ;;
932 st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error) 969 st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
933(p14) mov in6=-1 970(p13) mov in6=-1
934(p8) mov in7=-1 971(p8) mov in7=-1
935 972
936 cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 973 cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
@@ -1002,6 +1039,8 @@ END(dispatch_illegal_op_fault)
1002 FAULT(17) 1039 FAULT(17)
1003 1040
1004ENTRY(non_syscall) 1041ENTRY(non_syscall)
1042 mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER
1043 ;;
1005 SAVE_MIN_WITH_COVER 1044 SAVE_MIN_WITH_COVER
1006 1045
1007 // There is no particular reason for this code to be here, other than that 1046 // There is no particular reason for this code to be here, other than that