aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@xensource.com>2007-10-16 14:51:30 -0400
committerJeremy Fitzhardinge <jeremy@goop.org>2007-10-16 14:51:30 -0400
commit9f79991d4186089e228274196413572cc000143b (patch)
treecd50a308dca1d650ab77c7019ff3bdcadc7ab0ea
parent91e0c5f3dad47838cb2ecc1865ce789a0b7182b1 (diff)
xen: deal with stale cr3 values when unpinning pagetables
When a pagetable is no longer in use, it must be unpinned so that its pages can be freed. However, this is only possible if there are no stray uses of the pagetable. The code currently deals with all the usual cases, but there's a rare case where a vcpu is changing cr3, but is doing so lazily, and the change hasn't actually happened by the time the pagetable is unpinned, even though it appears to have been completed. This change adds a second per-cpu cr3 variable - xen_current_cr3 - which tracks the actual state of the vcpu cr3. It is only updated once the actual hypercall to set cr3 has been completed. Other processors wishing to unpin a pagetable can check other vcpu's xen_current_cr3 values to see if any cross-cpu IPIs are needed to clean things up. [ Stable folks: 2.6.23 bugfix ] Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Stable Kernel <stable@kernel.org>
-rw-r--r--arch/x86/xen/enlighten.c55
-rw-r--r--arch/x86/xen/mmu.c29
-rw-r--r--arch/x86/xen/xen-ops.h1
3 files changed, 65 insertions, 20 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 75fd36971d0b..e334bf7cb327 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -53,7 +53,23 @@ EXPORT_SYMBOL_GPL(hypercall_page);
53 53
54DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); 54DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
55DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); 55DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
56DEFINE_PER_CPU(unsigned long, xen_cr3); 56
57/*
58 * Note about cr3 (pagetable base) values:
59 *
60 * xen_cr3 contains the current logical cr3 value; it contains the
61 * last set cr3. This may not be the current effective cr3, because
62 * its update may be being lazily deferred. However, a vcpu looking
63 * at its own cr3 can use this value knowing that it everything will
64 * be self-consistent.
65 *
66 * xen_current_cr3 contains the actual vcpu cr3; it is set once the
67 * hypercall to set the vcpu cr3 is complete (so it may be a little
68 * out of date, but it will never be set early). If one vcpu is
69 * looking at another vcpu's cr3 value, it should use this variable.
70 */
71DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */
72DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
57 73
58struct start_info *xen_start_info; 74struct start_info *xen_start_info;
59EXPORT_SYMBOL_GPL(xen_start_info); 75EXPORT_SYMBOL_GPL(xen_start_info);
@@ -610,32 +626,36 @@ static unsigned long xen_read_cr3(void)
610 return x86_read_percpu(xen_cr3); 626 return x86_read_percpu(xen_cr3);
611} 627}
612 628
629static void set_current_cr3(void *v)
630{
631 x86_write_percpu(xen_current_cr3, (unsigned long)v);
632}
633
613static void xen_write_cr3(unsigned long cr3) 634static void xen_write_cr3(unsigned long cr3)
614{ 635{
636 struct mmuext_op *op;
637 struct multicall_space mcs;
638 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
639
615 BUG_ON(preemptible()); 640 BUG_ON(preemptible());
616 641
617 if (cr3 == x86_read_percpu(xen_cr3)) { 642 mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */
618 /* just a simple tlb flush */
619 xen_flush_tlb();
620 return;
621 }
622 643
644 /* Update while interrupts are disabled, so its atomic with
645 respect to ipis */
623 x86_write_percpu(xen_cr3, cr3); 646 x86_write_percpu(xen_cr3, cr3);
624 647
648 op = mcs.args;
649 op->cmd = MMUEXT_NEW_BASEPTR;
650 op->arg1.mfn = mfn;
625 651
626 { 652 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
627 struct mmuext_op *op;
628 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
629 unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
630
631 op = mcs.args;
632 op->cmd = MMUEXT_NEW_BASEPTR;
633 op->arg1.mfn = mfn;
634 653
635 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 654 /* Update xen_update_cr3 once the batch has actually
655 been submitted. */
656 xen_mc_callback(set_current_cr3, (void *)cr3);
636 657
637 xen_mc_issue(PARAVIRT_LAZY_CPU); 658 xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
638 }
639} 659}
640 660
641/* Early in boot, while setting up the initial pagetable, assume 661/* Early in boot, while setting up the initial pagetable, assume
@@ -1120,6 +1140,7 @@ asmlinkage void __init xen_start_kernel(void)
1120 /* keep using Xen gdt for now; no urgent need to change it */ 1140 /* keep using Xen gdt for now; no urgent need to change it */
1121 1141
1122 x86_write_percpu(xen_cr3, __pa(pgd)); 1142 x86_write_percpu(xen_cr3, __pa(pgd));
1143 x86_write_percpu(xen_current_cr3, __pa(pgd));
1123 1144
1124#ifdef CONFIG_SMP 1145#ifdef CONFIG_SMP
1125 /* Don't do the full vcpu_info placement stuff until we have a 1146 /* Don't do the full vcpu_info placement stuff until we have a
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index c4a391f88980..72f08ab43a4d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -514,20 +514,43 @@ static void drop_other_mm_ref(void *info)
514 514
515 if (__get_cpu_var(cpu_tlbstate).active_mm == mm) 515 if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
516 leave_mm(smp_processor_id()); 516 leave_mm(smp_processor_id());
517
518 /* If this cpu still has a stale cr3 reference, then make sure
519 it has been flushed. */
520 if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
521 load_cr3(swapper_pg_dir);
522 arch_flush_lazy_cpu_mode();
523 }
517} 524}
518 525
519static void drop_mm_ref(struct mm_struct *mm) 526static void drop_mm_ref(struct mm_struct *mm)
520{ 527{
528 cpumask_t mask;
529 unsigned cpu;
530
521 if (current->active_mm == mm) { 531 if (current->active_mm == mm) {
522 if (current->mm == mm) 532 if (current->mm == mm)
523 load_cr3(swapper_pg_dir); 533 load_cr3(swapper_pg_dir);
524 else 534 else
525 leave_mm(smp_processor_id()); 535 leave_mm(smp_processor_id());
536 arch_flush_lazy_cpu_mode();
537 }
538
539 /* Get the "official" set of cpus referring to our pagetable. */
540 mask = mm->cpu_vm_mask;
541
542 /* It's possible that a vcpu may have a stale reference to our
543 cr3, because its in lazy mode, and it hasn't yet flushed
544 its set of pending hypercalls yet. In this case, we can
545 look at its actual current cr3 value, and force it to flush
546 if needed. */
547 for_each_online_cpu(cpu) {
548 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
549 cpu_set(cpu, mask);
526 } 550 }
527 551
528 if (!cpus_empty(mm->cpu_vm_mask)) 552 if (!cpus_empty(mask))
529 xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref, 553 xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
530 mm, 1);
531} 554}
532#else 555#else
533static void drop_mm_ref(struct mm_struct *mm) 556static void drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 3847eed0bb09..b02a909bfd4c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -11,6 +11,7 @@ void xen_copy_trap_info(struct trap_info *traps);
11 11
12DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); 12DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
13DECLARE_PER_CPU(unsigned long, xen_cr3); 13DECLARE_PER_CPU(unsigned long, xen_cr3);
14DECLARE_PER_CPU(unsigned long, xen_current_cr3);
14 15
15extern struct start_info *xen_start_info; 16extern struct start_info *xen_start_info;
16extern struct shared_info *HYPERVISOR_shared_info; 17extern struct shared_info *HYPERVISOR_shared_info;