aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Mosberger-Tang <davidm@hpl.hp.com>2005-04-28 00:20:51 -0400
committerTony Luck <tony.luck@intel.com>2005-04-28 00:20:51 -0400
commit1ba7be7d691f6df2557d39c5b1a2e14c32e5dd20 (patch)
treef6c805c01be475f21de0cdcada8f69c9076ea61e
parent21bc4f9b34cc1eab3610955207f72c52495ae8ed (diff)
[IA64] Reschedule fsys_bubble_down().
Improvements come from eliminating srlz.i, not scheduling AR/CR-reads too early (while there are others still pending), scheduling the backing-store switch as well as possible, splitting the BBB bundle into a MIB/MBB pair. Why is it safe to eliminate the srlz.i? Observe that we used to clear bits ~PSR_PRESERVED_BITS in PSR.L. Since PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. However, PSR.BE : already is turned off in __kernel_syscall_via_epc() PSR.AC : don't care (kernel normally turns PSR.AC on) PSR.I : already turned off by the time fsys_bubble_down gets invoked PSR.DFL: always 0 (kernel never turns it on) PSR.DFH: don't care --- kernel never touches f32-f127 on its own initiative PSR.DI : always 0 (kernel never turns it on) PSR.SI : always 0 (kernel never turns it on) PSR.DB : don't care --- kernel never enables kernel-level breakpoints PSR.TB : must be 0 already; if it wasn't zero on entry to __kernel_syscall_via_epc, the branch to fsys_bubble_down will trigger a taken branch; the taken-trap-handler then converts the syscall into a break-based system-call. In other words: all the bits we're clearying are either 0 already or are don't cares! Thus, we don't have to write PSR.L at all and we don't have to do a srlz.i either. Good for another ~20 cycle improvement for EPC-based heavy-weight syscalls. Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/kernel/fsys.S73
1 files changed, 33 insertions, 40 deletions
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 0d8650f7fce7..57c6556b1e06 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -549,9 +549,6 @@ GLOBAL_ENTRY(fsys_bubble_down)
549 * - r27: ar.rsc 549 * - r27: ar.rsc
550 * - r29: psr 550 * - r29: psr
551 */ 551 */
552# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
553 | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
554 | IA64_PSR_IC)
555 /* 552 /*
556 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have 553 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have
557 * to synthesize. 554 * to synthesize.
@@ -560,62 +557,58 @@ GLOBAL_ENTRY(fsys_bubble_down)
560 | IA64_PSR_BN | IA64_PSR_I) 557 | IA64_PSR_BN | IA64_PSR_I)
561 558
562 invala 559 invala
563 movl r8=PSR_ONE_BITS 560 movl r14=ia64_ret_from_syscall
564 561
565 mov r25=ar.unat // save ar.unat (5 cyc) 562 nop.m 0
566 movl r9=PSR_PRESERVED_BITS
567
568 mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0
569 movl r28=__kernel_syscall_via_break 563 movl r28=__kernel_syscall_via_break
570 ;; 564 ;;
571 mov r23=ar.bspstore // save ar.bspstore (12 cyc) 565
572 mov r31=pr // save pr (2 cyc)
573 mov r20=r1 // save caller's gp in r20
574 ;;
575 mov r2=r16 // copy current task addr to addl-addressable register 566 mov r2=r16 // copy current task addr to addl-addressable register
576 and r9=r9,r29 567 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
577 mov r19=b6 // save b6 (2 cyc) 568 mov r31=pr // save pr (2 cyc)
578 ;; 569 ;;
579 mov psr.l=r9 // slam the door (17 cyc to srlz.i) 570 st1 [r16]=r0 // clear current->thread.on_ustack flag
580 or r29=r8,r29 // construct cr.ipsr value to save
581 addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS 571 addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS
572 add r3=TI_FLAGS+IA64_TASK_SIZE,r2
582 ;; 573 ;;
583 // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks 574 ld4 r3=[r3] // r2 = current_thread_info()->flags
584 // we may be reading ar.itc after writing to psr.l. Avoid that message with
585 // this directive:
586 dv_serialize_data
587 mov.m r24=ar.rnat // read ar.rnat (5 cyc lat)
588 lfetch.fault.excl.nt1 [r22] 575 lfetch.fault.excl.nt1 [r22]
589 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2 576 nop.i 0
590
591 // ensure previous insn group is issued before we stall for srlz.i:
592 ;; 577 ;;
593 srlz.i // ensure new psr.l has been established 578 mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0
594 ///////////////////////////////////////////////////////////////////////////// 579 nop.m 0
595 ////////// from this point on, execution is not interruptible anymore 580 nop.i 0
596 ///////////////////////////////////////////////////////////////////////////// 581 ;;
597 addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack 582 mov r23=ar.bspstore // save ar.bspstore (12 cyc)
598 cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1 583 mov.m r24=ar.rnat // read ar.rnat (5 cyc lat)
584 nop.i 0
599 ;; 585 ;;
600 st1 [r16]=r0 // clear current->thread.on_ustack flag
601 mov ar.bspstore=r22 // switch to kernel RBS 586 mov ar.bspstore=r22 // switch to kernel RBS
602 mov b6=r18 // copy syscall entry-point to b6 (7 cyc) 587 movl r8=PSR_ONE_BITS // X
603 add r3=TI_FLAGS+IA64_TASK_SIZE,r2
604 ;; 588 ;;
605 ld4 r3=[r3] // r2 = current_thread_info()->flags 589 mov r25=ar.unat // save ar.unat (5 cyc)
590 mov r19=b6 // save b6 (2 cyc)
591 mov r20=r1 // save caller's gp in r20
592 ;;
593 or r29=r8,r29 // construct cr.ipsr value to save
594 mov b6=r18 // copy syscall entry-point to b6 (7 cyc)
595 addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack
596
606 mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc) 597 mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc)
607 mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0 598 cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1
608 br.call.sptk.many b7=ia64_syscall_setup 599 br.call.sptk.many b7=ia64_syscall_setup
609 ;; 600 ;;
610 ssm psr.i 601 mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0
611 movl r2=ia64_ret_from_syscall 602 mov rp=r14 // set the real return addr
603 nop.i 0
612 ;; 604 ;;
613 mov rp=r2 // set the real return addr 605 ssm psr.i
614 tbit.z p8,p0=r3,TIF_SYSCALL_TRACE 606 tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
615 ;;
616(p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8 607(p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8
608
609 nop.m 0
617(p8) br.call.sptk.many b6=b6 // ignore this return addr 610(p8) br.call.sptk.many b6=b6 // ignore this return addr
618 br.cond.sptk ia64_trace_syscall 611 br.cond.spnt ia64_trace_syscall
619END(fsys_bubble_down) 612END(fsys_bubble_down)
620 613
621 .rodata 614 .rodata