aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/enlighten.c
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@xensource.com>2007-10-16 14:51:30 -0400
committerJeremy Fitzhardinge <jeremy@goop.org>2007-10-16 14:51:30 -0400
commit9f79991d4186089e228274196413572cc000143b (patch)
treecd50a308dca1d650ab77c7019ff3bdcadc7ab0ea /arch/x86/xen/enlighten.c
parent91e0c5f3dad47838cb2ecc1865ce789a0b7182b1 (diff)
xen: deal with stale cr3 values when unpinning pagetables
When a pagetable is no longer in use, it must be unpinned so that its pages can be freed. However, this is only possible if there are no stray uses of the pagetable. The code currently deals with all the usual cases, but there's a rare case where a vcpu is changing cr3, but is doing so lazily, and the change hasn't actually happened by the time the pagetable is unpinned, even though it appears to have been completed. This change adds a second per-cpu cr3 variable - xen_current_cr3 - which tracks the actual state of the vcpu cr3. It is only updated once the actual hypercall to set cr3 has been completed. Other processors wishing to unpin a pagetable can check other vcpu's xen_current_cr3 values to see if any cross-cpu IPIs are needed to clean things up. [ Stable folks: 2.6.23 bugfix ] Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Stable Kernel <stable@kernel.org>
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r--arch/x86/xen/enlighten.c55
1 files changed, 38 insertions, 17 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 75fd36971d0b..e334bf7cb327 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -53,7 +53,23 @@ EXPORT_SYMBOL_GPL(hypercall_page);
53 53
54DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); 54DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
55DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); 55DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
56DEFINE_PER_CPU(unsigned long, xen_cr3); 56
57/*
58 * Note about cr3 (pagetable base) values:
59 *
60 * xen_cr3 contains the current logical cr3 value; it contains the
61 * last set cr3. This may not be the current effective cr3, because
62 * its update may be being lazily deferred. However, a vcpu looking
63 * at its own cr3 can use this value knowing that it everything will
64 * be self-consistent.
65 *
66 * xen_current_cr3 contains the actual vcpu cr3; it is set once the
67 * hypercall to set the vcpu cr3 is complete (so it may be a little
68 * out of date, but it will never be set early). If one vcpu is
69 * looking at another vcpu's cr3 value, it should use this variable.
70 */
71DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
72DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
57 73
58struct start_info *xen_start_info; 74struct start_info *xen_start_info;
59EXPORT_SYMBOL_GPL(xen_start_info); 75EXPORT_SYMBOL_GPL(xen_start_info);
@@ -610,32 +626,36 @@ static unsigned long xen_read_cr3(void)
610 return x86_read_percpu(xen_cr3); 626 return x86_read_percpu(xen_cr3);
611} 627}
612 628
629static void set_current_cr3(void *v)
630{
631 x86_write_percpu(xen_current_cr3, (unsigned long)v);
632}
633
613static void xen_write_cr3(unsigned long cr3) 634static void xen_write_cr3(unsigned long cr3)
614{ 635{
636 struct mmuext_op *op;
637 struct multicall_space mcs;
638 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
639
615 BUG_ON(preemptible()); 640 BUG_ON(preemptible());
616 641
617 if (cr3 == x86_read_percpu(xen_cr3)) { 642 mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */
618 /* just a simple tlb flush */
619 xen_flush_tlb();
620 return;
621 }
622 643
644 /* Update while interrupts are disabled, so its atomic with
645 respect to ipis */
623 x86_write_percpu(xen_cr3, cr3); 646 x86_write_percpu(xen_cr3, cr3);
624 647
648 op = mcs.args;
649 op->cmd = MMUEXT_NEW_BASEPTR;
650 op->arg1.mfn = mfn;
625 651
626 { 652 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
627 struct mmuext_op *op;
628 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
629 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
630
631 op = mcs.args;
632 op->cmd = MMUEXT_NEW_BASEPTR;
633 op->arg1.mfn = mfn;
634 653
635 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 654 /* Update xen_update_cr3 once the batch has actually
655 been submitted. */
656 xen_mc_callback(set_current_cr3, (void *)cr3);
636 657
637 xen_mc_issue(PARAVIRT_LAZY_CPU); 658 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
638 }
639} 659}
640 660
641/* Early in boot, while setting up the initial pagetable, assume 661/* Early in boot, while setting up the initial pagetable, assume
@@ -1120,6 +1140,7 @@ asmlinkage void __init xen_start_kernel(void)
1120 /* keep using Xen gdt for now; no urgent need to change it */ 1140 /* keep using Xen gdt for now; no urgent need to change it */
1121 1141
1122 x86_write_percpu(xen_cr3, __pa(pgd)); 1142 x86_write_percpu(xen_cr3, __pa(pgd));
1143 x86_write_percpu(xen_current_cr3, __pa(pgd));
1123 1144
1124#ifdef CONFIG_SMP 1145#ifdef CONFIG_SMP
1125 /* Don't do the full vcpu_info placement stuff until we have a 1146 /* Don't do the full vcpu_info placement stuff until we have a