aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2007-02-13 07:26:21 -0500
committerAndi Kleen <andi@basil.nowhere.org>2007-02-13 07:26:21 -0500
commit9226d125d94c7e4964dd41cc5e9ca2ff84091d01 (patch)
tree935d6e80ff843e1d7b54e0fd9386ef2e0d31aa3d /arch
parentc119ecce894120790903ef535dac3e105f3d6cde (diff)
[PATCH] i386: paravirt CPU hypercall batching mode
The VMI ROM has a mode where hypercalls can be queued and batched. This turns out to be a significant win during context switch, but must be done at a specific point before side effects to CPU state are visible to subsequent instructions. This is similar to the MMU batching hooks already provided. The same hooks could be used by the Xen backend to implement a context switch multicall. To explain a bit more about lazy modes in the paravirt patches, basically, the idea is that only one of lazy CPU or MMU mode can be active at any given time. Lazy MMU mode is similar to this lazy CPU mode, and allows for batching of multiple PTE updates (say, inside a remap loop), but to avoid keeping some kind of state machine about when to flush cpu or mmu updates, we just allow one or the other to be active. Although there is no real reason a more comprehensive scheme could not be implemented, there is also no demonstrated need for this extra complexity. Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andi Kleen <ak@suse.de> Cc: Andi Kleen <ak@suse.de> Cc: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Chris Wright <chrisw@sous-sol.org> Signed-off-by: Andrew Morton <akpm@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/kernel/paravirt.c1
-rw-r--r--arch/i386/kernel/process.c25
2 files changed, 18 insertions, 8 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index 7329ec9fcc99..4dfdac4550dd 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -545,6 +545,7 @@ struct paravirt_ops paravirt_ops = {
545 .apic_write_atomic = native_apic_write_atomic, 545 .apic_write_atomic = native_apic_write_atomic,
546 .apic_read = native_apic_read, 546 .apic_read = native_apic_read,
547#endif 547#endif
548 .set_lazy_mode = (void *)native_nop,
548 549
549 .flush_tlb_user = native_flush_tlb, 550 .flush_tlb_user = native_flush_tlb,
550 .flush_tlb_kernel = native_flush_tlb_global, 551 .flush_tlb_kernel = native_flush_tlb_global,
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 23ae198dbbc3..cfae587bf7d2 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -670,14 +670,6 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
670 load_TLS(next, cpu); 670 load_TLS(next, cpu);
671 671
672 /* 672 /*
673 * Restore %gs if needed (which is common)
674 */
675 if (prev->gs | next->gs)
676 loadsegment(gs, next->gs);
677
678 write_pda(pcurrent, next_p);
679
680 /*
681 * Now maybe handle debug registers and/or IO bitmaps 673 * Now maybe handle debug registers and/or IO bitmaps
682 */ 674 */
683 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW) 675 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)
@@ -686,6 +678,15 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
686 678
687 disable_tsc(prev_p, next_p); 679 disable_tsc(prev_p, next_p);
688 680
681 /*
682 * Leave lazy mode, flushing any hypercalls made here.
683 * This must be done before restoring TLS segments so
684 * the GDT and LDT are properly updated, and must be
685 * done before math_state_restore, so the TS bit is up
686 * to date.
687 */
688 arch_leave_lazy_cpu_mode();
689
689 /* If the task has used fpu the last 5 timeslices, just do a full 690 /* If the task has used fpu the last 5 timeslices, just do a full
690 * restore of the math state immediately to avoid the trap; the 691 * restore of the math state immediately to avoid the trap; the
691 * chances of needing FPU soon are obviously high now 692 * chances of needing FPU soon are obviously high now
@@ -693,6 +694,14 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
693 if (next_p->fpu_counter > 5) 694 if (next_p->fpu_counter > 5)
694 math_state_restore(); 695 math_state_restore();
695 696
697 /*
698 * Restore %gs if needed (which is common)
699 */
700 if (prev->gs | next->gs)
701 loadsegment(gs, next->gs);
702
703 write_pda(pcurrent, next_p);
704
696 return prev_p; 705 return prev_p;
697} 706}
698 707