aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2007-02-13 07:26:21 -0500
committerAndi Kleen <andi@basil.nowhere.org>2007-02-13 07:26:21 -0500
commit9226d125d94c7e4964dd41cc5e9ca2ff84091d01 (patch)
tree935d6e80ff843e1d7b54e0fd9386ef2e0d31aa3d
parentc119ecce894120790903ef535dac3e105f3d6cde (diff)
[PATCH] i386: paravirt CPU hypercall batching mode
The VMI ROM has a mode where hypercalls can be queued and batched. This turns out to be a significant win during context switch, but must be done at a specific point before side effects to CPU state are visible to subsequent instructions. This is similar to the MMU batching hooks already provided. The same hooks could be used by the Xen backend to implement a context switch multicall. To explain a bit more about lazy modes in the paravirt patches, basically, the idea is that only one of lazy CPU or MMU mode can be active at any given time. Lazy MMU mode is similar to this lazy CPU mode, and allows for batching of multiple PTE updates (say, inside a remap loop), but to avoid keeping some kind of state machine about when to flush cpu or mmu updates, we just allow one or the other to be active. Although there is no real reason a more comprehensive scheme could not be implemented, there is also no demonstrated need for this extra complexity. Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andi Kleen <ak@suse.de> Cc: Andi Kleen <ak@suse.de> Cc: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Chris Wright <chrisw@sous-sol.org> Signed-off-by: Andrew Morton <akpm@osdl.org>
-rw-r--r--arch/i386/kernel/paravirt.c1
-rw-r--r--arch/i386/kernel/process.c25
-rw-r--r--include/asm-generic/pgtable.h13
-rw-r--r--include/asm-i386/paravirt.h15
-rw-r--r--kernel/sched.c7
5 files changed, 53 insertions, 8 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index 7329ec9fcc99..4dfdac4550dd 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -545,6 +545,7 @@ struct paravirt_ops paravirt_ops = {
545 .apic_write_atomic = native_apic_write_atomic, 545 .apic_write_atomic = native_apic_write_atomic,
546 .apic_read = native_apic_read, 546 .apic_read = native_apic_read,
547#endif 547#endif
548 .set_lazy_mode = (void *)native_nop,
548 549
549 .flush_tlb_user = native_flush_tlb, 550 .flush_tlb_user = native_flush_tlb,
550 .flush_tlb_kernel = native_flush_tlb_global, 551 .flush_tlb_kernel = native_flush_tlb_global,
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 23ae198dbbc3..cfae587bf7d2 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -670,14 +670,6 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
670 load_TLS(next, cpu); 670 load_TLS(next, cpu);
671 671
672 /* 672 /*
673 * Restore %gs if needed (which is common)
674 */
675 if (prev->gs | next->gs)
676 loadsegment(gs, next->gs);
677
678 write_pda(pcurrent, next_p);
679
680 /*
681 * Now maybe handle debug registers and/or IO bitmaps 673 * Now maybe handle debug registers and/or IO bitmaps
682 */ 674 */
683 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW) 675 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)
@@ -686,6 +678,15 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
686 678
687 disable_tsc(prev_p, next_p); 679 disable_tsc(prev_p, next_p);
688 680
681 /*
682 * Leave lazy mode, flushing any hypercalls made here.
683 * This must be done before restoring TLS segments so
684 * the GDT and LDT are properly updated, and must be
685 * done before math_state_restore, so the TS bit is up
686 * to date.
687 */
688 arch_leave_lazy_cpu_mode();
689
689 /* If the task has used fpu the last 5 timeslices, just do a full 690 /* If the task has used fpu the last 5 timeslices, just do a full
690 * restore of the math state immediately to avoid the trap; the 691 * restore of the math state immediately to avoid the trap; the
691 * chances of needing FPU soon are obviously high now 692 * chances of needing FPU soon are obviously high now
@@ -693,6 +694,14 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
693 if (next_p->fpu_counter > 5) 694 if (next_p->fpu_counter > 5)
694 math_state_restore(); 695 math_state_restore();
695 696
697 /*
698 * Restore %gs if needed (which is common)
699 */
700 if (prev->gs | next->gs)
701 loadsegment(gs, next->gs);
702
703 write_pda(pcurrent, next_p);
704
696 return prev_p; 705 return prev_p;
697} 706}
698 707
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 9d774d07d95b..00c23433b39f 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -183,6 +183,19 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
183#endif 183#endif
184 184
185/* 185/*
186 * A facility to provide batching of the reload of page tables with the
187 * actual context switch code for paravirtualized guests. By convention,
188 * only one of the lazy modes (CPU, MMU) should be active at any given
189 * time, entry should never be nested, and entry and exits should always
190 * be paired. This is for sanity of maintaining and reasoning about the
191 * kernel code.
192 */
193#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
194#define arch_enter_lazy_cpu_mode() do {} while (0)
195#define arch_leave_lazy_cpu_mode() do {} while (0)
196#endif
197
198/*
186 * When walking page tables, get the address of the next boundary, 199 * When walking page tables, get the address of the next boundary,
187 * or the end address of the range if that comes earlier. Although no 200 * or the end address of the range if that comes earlier. Although no
188 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. 201 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index 53da276a2ec2..38e5164bd0e7 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -146,6 +146,8 @@ struct paravirt_ops
146 void (fastcall *pmd_clear)(pmd_t *pmdp); 146 void (fastcall *pmd_clear)(pmd_t *pmdp);
147#endif 147#endif
148 148
149 void (fastcall *set_lazy_mode)(int mode);
150
149 /* These two are jmp to, not actually called. */ 151 /* These two are jmp to, not actually called. */
150 void (fastcall *irq_enable_sysexit)(void); 152 void (fastcall *irq_enable_sysexit)(void);
151 void (fastcall *iret)(void); 153 void (fastcall *iret)(void);
@@ -386,6 +388,19 @@ static inline void pmd_clear(pmd_t *pmdp)
386} 388}
387#endif 389#endif
388 390
391/* Lazy mode for batching updates / context switch */
392#define PARAVIRT_LAZY_NONE 0
393#define PARAVIRT_LAZY_MMU 1
394#define PARAVIRT_LAZY_CPU 2
395
396#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE
397#define arch_enter_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_CPU)
398#define arch_leave_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE)
399
400#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
401#define arch_enter_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_MMU)
402#define arch_leave_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE)
403
389/* These all sit in the .parainstructions section to tell us what to patch. */ 404/* These all sit in the .parainstructions section to tell us what to patch. */
390struct paravirt_patch { 405struct paravirt_patch {
391 u8 *instr; /* original instructions */ 406 u8 *instr; /* original instructions */
diff --git a/kernel/sched.c b/kernel/sched.c
index 08f86178aa34..0dc757246d89 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1853,6 +1853,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
1853 struct mm_struct *mm = next->mm; 1853 struct mm_struct *mm = next->mm;
1854 struct mm_struct *oldmm = prev->active_mm; 1854 struct mm_struct *oldmm = prev->active_mm;
1855 1855
1856 /*
1857 * For paravirt, this is coupled with an exit in switch_to to
1858 * combine the page table reload and the switch backend into
1859 * one hypercall.
1860 */
1861 arch_enter_lazy_cpu_mode();
1862
1856 if (!mm) { 1863 if (!mm) {
1857 next->active_mm = oldmm; 1864 next->active_mm = oldmm;
1858 atomic_inc(&oldmm->mm_count); 1865 atomic_inc(&oldmm->mm_count);