diff options
author | Zachary Amsden <zach@vmware.com> | 2007-02-13 07:26:21 -0500 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2007-02-13 07:26:21 -0500 |
commit | 9226d125d94c7e4964dd41cc5e9ca2ff84091d01 (patch) | |
tree | 935d6e80ff843e1d7b54e0fd9386ef2e0d31aa3d /arch | |
parent | c119ecce894120790903ef535dac3e105f3d6cde (diff) |
[PATCH] i386: paravirt CPU hypercall batching mode
The VMI ROM has a mode where hypercalls can be queued and batched. This turns
out to be a significant win during context switch, but must be done at a
specific point before side effects to CPU state are visible to subsequent
instructions. This is similar to the MMU batching hooks already provided.
The same hooks could be used by the Xen backend to implement a context switch
multicall.
To explain a bit more about lazy modes in the paravirt patches, basically, the
idea is that only one of lazy CPU or MMU mode can be active at any given time.
Lazy MMU mode is similar to this lazy CPU mode, and allows for batching of
multiple PTE updates (say, inside a remap loop), but to avoid keeping some
kind of state machine about when to flush cpu or mmu updates, we just allow
one or the other to be active. Although there is no real reason a more
comprehensive scheme could not be implemented, there is also no demonstrated
need for this extra complexity.
Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/i386/kernel/paravirt.c | 1 | ||||
-rw-r--r-- | arch/i386/kernel/process.c | 25 |
2 files changed, 18 insertions, 8 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 7329ec9fcc99..4dfdac4550dd 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c | |||
@@ -545,6 +545,7 @@ struct paravirt_ops paravirt_ops = { | |||
545 | .apic_write_atomic = native_apic_write_atomic, | 545 | .apic_write_atomic = native_apic_write_atomic, |
546 | .apic_read = native_apic_read, | 546 | .apic_read = native_apic_read, |
547 | #endif | 547 | #endif |
548 | .set_lazy_mode = (void *)native_nop, | ||
548 | 549 | ||
549 | .flush_tlb_user = native_flush_tlb, | 550 | .flush_tlb_user = native_flush_tlb, |
550 | .flush_tlb_kernel = native_flush_tlb_global, | 551 | .flush_tlb_kernel = native_flush_tlb_global, |
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 23ae198dbbc3..cfae587bf7d2 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -670,14 +670,6 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
670 | load_TLS(next, cpu); | 670 | load_TLS(next, cpu); |
671 | 671 | ||
672 | /* | 672 | /* |
673 | * Restore %gs if needed (which is common) | ||
674 | */ | ||
675 | if (prev->gs | next->gs) | ||
676 | loadsegment(gs, next->gs); | ||
677 | |||
678 | write_pda(pcurrent, next_p); | ||
679 | |||
680 | /* | ||
681 | * Now maybe handle debug registers and/or IO bitmaps | 673 | * Now maybe handle debug registers and/or IO bitmaps |
682 | */ | 674 | */ |
683 | if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW) | 675 | if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW) |
@@ -686,6 +678,15 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
686 | 678 | ||
687 | disable_tsc(prev_p, next_p); | 679 | disable_tsc(prev_p, next_p); |
688 | 680 | ||
681 | /* | ||
682 | * Leave lazy mode, flushing any hypercalls made here. | ||
683 | * This must be done before restoring TLS segments so | ||
684 | * the GDT and LDT are properly updated, and must be | ||
685 | * done before math_state_restore, so the TS bit is up | ||
686 | * to date. | ||
687 | */ | ||
688 | arch_leave_lazy_cpu_mode(); | ||
689 | |||
689 | /* If the task has used fpu the last 5 timeslices, just do a full | 690 | /* If the task has used fpu the last 5 timeslices, just do a full |
690 | * restore of the math state immediately to avoid the trap; the | 691 | * restore of the math state immediately to avoid the trap; the |
691 | * chances of needing FPU soon are obviously high now | 692 | * chances of needing FPU soon are obviously high now |
@@ -693,6 +694,14 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
693 | if (next_p->fpu_counter > 5) | 694 | if (next_p->fpu_counter > 5) |
694 | math_state_restore(); | 695 | math_state_restore(); |
695 | 696 | ||
697 | /* | ||
698 | * Restore %gs if needed (which is common) | ||
699 | */ | ||
700 | if (prev->gs | next->gs) | ||
701 | loadsegment(gs, next->gs); | ||
702 | |||
703 | write_pda(pcurrent, next_p); | ||
704 | |||
696 | return prev_p; | 705 | return prev_p; |
697 | } | 706 | } |
698 | 707 | ||