diff options
author | David Mosberger-Tang <davidm@hpl.hp.com> | 2005-04-28 00:20:51 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-04-28 00:20:51 -0400 |
commit | 1ba7be7d691f6df2557d39c5b1a2e14c32e5dd20 (patch) | |
tree | f6c805c01be475f21de0cdcada8f69c9076ea61e /arch | |
parent | 21bc4f9b34cc1eab3610955207f72c52495ae8ed (diff) |
[IA64] Reschedule fsys_bubble_down().
Improvements come from eliminating srlz.i, not scheduling AR/CR-reads
too early (while there are others still pending), scheduling the
backing-store switch as well as possible, splitting the BBB bundle
into a MIB/MBB pair.
Why is it safe to eliminate the srlz.i? Observe
that we used to clear bits ~PSR_PRESERVED_BITS in PSR.L. Since
PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. However,
PSR.BE : already is turned off in __kernel_syscall_via_epc()
PSR.AC : don't care (kernel normally turns PSR.AC on)
PSR.I : already turned off by the time fsys_bubble_down gets invoked
PSR.DFL: always 0 (kernel never turns it on)
PSR.DFH: don't care --- kernel never touches f32-f127 on its own
initiative
PSR.DI : always 0 (kernel never turns it on)
PSR.SI : always 0 (kernel never turns it on)
PSR.DB : don't care --- kernel never enables kernel-level breakpoints
PSR.TB : must be 0 already; if it wasn't zero on entry to
__kernel_syscall_via_epc, the branch to fsys_bubble_down
will trigger a taken branch; the taken-trap-handler then
converts the syscall into a break-based system-call.
In other words: all the bits we're clearying are either 0 already or
are don't cares! Thus, we don't have to write PSR.L at all and we
don't have to do a srlz.i either.
Good for another ~20 cycle improvement for EPC-based heavy-weight
syscalls.
Signed-off-by: David Mosberger-Tang <davidm@hpl.hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/ia64/kernel/fsys.S | 73 |
1 files changed, 33 insertions, 40 deletions
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 0d8650f7fce7..57c6556b1e06 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S | |||
@@ -549,9 +549,6 @@ GLOBAL_ENTRY(fsys_bubble_down) | |||
549 | * - r27: ar.rsc | 549 | * - r27: ar.rsc |
550 | * - r29: psr | 550 | * - r29: psr |
551 | */ | 551 | */ |
552 | # define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \ | ||
553 | | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \ | ||
554 | | IA64_PSR_IC) | ||
555 | /* | 552 | /* |
556 | * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have | 553 | * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have |
557 | * to synthesize. | 554 | * to synthesize. |
@@ -560,62 +557,58 @@ GLOBAL_ENTRY(fsys_bubble_down) | |||
560 | | IA64_PSR_BN | IA64_PSR_I) | 557 | | IA64_PSR_BN | IA64_PSR_I) |
561 | 558 | ||
562 | invala | 559 | invala |
563 | movl r8=PSR_ONE_BITS | 560 | movl r14=ia64_ret_from_syscall |
564 | 561 | ||
565 | mov r25=ar.unat // save ar.unat (5 cyc) | 562 | nop.m 0 |
566 | movl r9=PSR_PRESERVED_BITS | ||
567 | |||
568 | mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0 | ||
569 | movl r28=__kernel_syscall_via_break | 563 | movl r28=__kernel_syscall_via_break |
570 | ;; | 564 | ;; |
571 | mov r23=ar.bspstore // save ar.bspstore (12 cyc) | 565 | |
572 | mov r31=pr // save pr (2 cyc) | ||
573 | mov r20=r1 // save caller's gp in r20 | ||
574 | ;; | ||
575 | mov r2=r16 // copy current task addr to addl-addressable register | 566 | mov r2=r16 // copy current task addr to addl-addressable register |
576 | and r9=r9,r29 | 567 | adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 |
577 | mov r19=b6 // save b6 (2 cyc) | 568 | mov r31=pr // save pr (2 cyc) |
578 | ;; | 569 | ;; |
579 | mov psr.l=r9 // slam the door (17 cyc to srlz.i) | 570 | st1 [r16]=r0 // clear current->thread.on_ustack flag |
580 | or r29=r8,r29 // construct cr.ipsr value to save | ||
581 | addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS | 571 | addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS |
572 | add r3=TI_FLAGS+IA64_TASK_SIZE,r2 | ||
582 | ;; | 573 | ;; |
583 | // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks | 574 | ld4 r3=[r3] // r2 = current_thread_info()->flags |
584 | // we may be reading ar.itc after writing to psr.l. Avoid that message with | ||
585 | // this directive: | ||
586 | dv_serialize_data | ||
587 | mov.m r24=ar.rnat // read ar.rnat (5 cyc lat) | ||
588 | lfetch.fault.excl.nt1 [r22] | 575 | lfetch.fault.excl.nt1 [r22] |
589 | adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2 | 576 | nop.i 0 |
590 | |||
591 | // ensure previous insn group is issued before we stall for srlz.i: | ||
592 | ;; | 577 | ;; |
593 | srlz.i // ensure new psr.l has been established | 578 | mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0 |
594 | ///////////////////////////////////////////////////////////////////////////// | 579 | nop.m 0 |
595 | ////////// from this point on, execution is not interruptible anymore | 580 | nop.i 0 |
596 | ///////////////////////////////////////////////////////////////////////////// | 581 | ;; |
597 | addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack | 582 | mov r23=ar.bspstore // save ar.bspstore (12 cyc) |
598 | cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1 | 583 | mov.m r24=ar.rnat // read ar.rnat (5 cyc lat) |
584 | nop.i 0 | ||
599 | ;; | 585 | ;; |
600 | st1 [r16]=r0 // clear current->thread.on_ustack flag | ||
601 | mov ar.bspstore=r22 // switch to kernel RBS | 586 | mov ar.bspstore=r22 // switch to kernel RBS |
602 | mov b6=r18 // copy syscall entry-point to b6 (7 cyc) | 587 | movl r8=PSR_ONE_BITS // X |
603 | add r3=TI_FLAGS+IA64_TASK_SIZE,r2 | ||
604 | ;; | 588 | ;; |
605 | ld4 r3=[r3] // r2 = current_thread_info()->flags | 589 | mov r25=ar.unat // save ar.unat (5 cyc) |
590 | mov r19=b6 // save b6 (2 cyc) | ||
591 | mov r20=r1 // save caller's gp in r20 | ||
592 | ;; | ||
593 | or r29=r8,r29 // construct cr.ipsr value to save | ||
594 | mov b6=r18 // copy syscall entry-point to b6 (7 cyc) | ||
595 | addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack | ||
596 | |||
606 | mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc) | 597 | mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc) |
607 | mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0 | 598 | cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1 |
608 | br.call.sptk.many b7=ia64_syscall_setup | 599 | br.call.sptk.many b7=ia64_syscall_setup |
609 | ;; | 600 | ;; |
610 | ssm psr.i | 601 | mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0 |
611 | movl r2=ia64_ret_from_syscall | 602 | mov rp=r14 // set the real return addr |
603 | nop.i 0 | ||
612 | ;; | 604 | ;; |
613 | mov rp=r2 // set the real return addr | 605 | ssm psr.i |
614 | tbit.z p8,p0=r3,TIF_SYSCALL_TRACE | 606 | tbit.z p8,p0=r3,TIF_SYSCALL_TRACE |
615 | ;; | ||
616 | (p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8 | 607 | (p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8 |
608 | |||
609 | nop.m 0 | ||
617 | (p8) br.call.sptk.many b6=b6 // ignore this return addr | 610 | (p8) br.call.sptk.many b6=b6 // ignore this return addr |
618 | br.cond.sptk ia64_trace_syscall | 611 | br.cond.spnt ia64_trace_syscall |
619 | END(fsys_bubble_down) | 612 | END(fsys_bubble_down) |
620 | 613 | ||
621 | .rodata | 614 | .rodata |