diff options
-rw-r--r-- | arch/x86/xen/enlighten.c | 55 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 29 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 1 |
3 files changed, 65 insertions, 20 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 75fd36971d0b..e334bf7cb327 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -53,7 +53,23 @@ EXPORT_SYMBOL_GPL(hypercall_page); | |||
53 | 53 | ||
54 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); | 54 | DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); |
55 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | 55 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); |
56 | DEFINE_PER_CPU(unsigned long, xen_cr3); | 56 | |
57 | /* | ||
58 | * Note about cr3 (pagetable base) values: | ||
59 | * | ||
60 | * xen_cr3 contains the current logical cr3 value; it contains the | ||
61 | * last set cr3. This may not be the current effective cr3, because | ||
62 | * its update may be being lazily deferred. However, a vcpu looking | ||
63 | * at its own cr3 can use this value knowing that it everything will | ||
64 | * be self-consistent. | ||
65 | * | ||
66 | * xen_current_cr3 contains the actual vcpu cr3; it is set once the | ||
67 | * hypercall to set the vcpu cr3 is complete (so it may be a little | ||
68 | * out of date, but it will never be set early). If one vcpu is | ||
69 | * looking at another vcpu's cr3 value, it should use this variable. | ||
70 | */ | ||
71 | DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ | ||
72 | DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ | ||
57 | 73 | ||
58 | struct start_info *xen_start_info; | 74 | struct start_info *xen_start_info; |
59 | EXPORT_SYMBOL_GPL(xen_start_info); | 75 | EXPORT_SYMBOL_GPL(xen_start_info); |
@@ -610,32 +626,36 @@ static unsigned long xen_read_cr3(void) | |||
610 | return x86_read_percpu(xen_cr3); | 626 | return x86_read_percpu(xen_cr3); |
611 | } | 627 | } |
612 | 628 | ||
629 | static void set_current_cr3(void *v) | ||
630 | { | ||
631 | x86_write_percpu(xen_current_cr3, (unsigned long)v); | ||
632 | } | ||
633 | |||
613 | static void xen_write_cr3(unsigned long cr3) | 634 | static void xen_write_cr3(unsigned long cr3) |
614 | { | 635 | { |
636 | struct mmuext_op *op; | ||
637 | struct multicall_space mcs; | ||
638 | unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); | ||
639 | |||
615 | BUG_ON(preemptible()); | 640 | BUG_ON(preemptible()); |
616 | 641 | ||
617 | if (cr3 == x86_read_percpu(xen_cr3)) { | 642 | mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ |
618 | /* just a simple tlb flush */ | ||
619 | xen_flush_tlb(); | ||
620 | return; | ||
621 | } | ||
622 | 643 | ||
644 | /* Update while interrupts are disabled, so its atomic with | ||
645 | respect to ipis */ | ||
623 | x86_write_percpu(xen_cr3, cr3); | 646 | x86_write_percpu(xen_cr3, cr3); |
624 | 647 | ||
648 | op = mcs.args; | ||
649 | op->cmd = MMUEXT_NEW_BASEPTR; | ||
650 | op->arg1.mfn = mfn; | ||
625 | 651 | ||
626 | { | 652 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
627 | struct mmuext_op *op; | ||
628 | struct multicall_space mcs = xen_mc_entry(sizeof(*op)); | ||
629 | unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); | ||
630 | |||
631 | op = mcs.args; | ||
632 | op->cmd = MMUEXT_NEW_BASEPTR; | ||
633 | op->arg1.mfn = mfn; | ||
634 | 653 | ||
635 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 654 | /* Update xen_update_cr3 once the batch has actually |
655 | been submitted. */ | ||
656 | xen_mc_callback(set_current_cr3, (void *)cr3); | ||
636 | 657 | ||
637 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 658 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ |
638 | } | ||
639 | } | 659 | } |
640 | 660 | ||
641 | /* Early in boot, while setting up the initial pagetable, assume | 661 | /* Early in boot, while setting up the initial pagetable, assume |
@@ -1120,6 +1140,7 @@ asmlinkage void __init xen_start_kernel(void) | |||
1120 | /* keep using Xen gdt for now; no urgent need to change it */ | 1140 | /* keep using Xen gdt for now; no urgent need to change it */ |
1121 | 1141 | ||
1122 | x86_write_percpu(xen_cr3, __pa(pgd)); | 1142 | x86_write_percpu(xen_cr3, __pa(pgd)); |
1143 | x86_write_percpu(xen_current_cr3, __pa(pgd)); | ||
1123 | 1144 | ||
1124 | #ifdef CONFIG_SMP | 1145 | #ifdef CONFIG_SMP |
1125 | /* Don't do the full vcpu_info placement stuff until we have a | 1146 | /* Don't do the full vcpu_info placement stuff until we have a |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index c4a391f88980..72f08ab43a4d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -514,20 +514,43 @@ static void drop_other_mm_ref(void *info) | |||
514 | 514 | ||
515 | if (__get_cpu_var(cpu_tlbstate).active_mm == mm) | 515 | if (__get_cpu_var(cpu_tlbstate).active_mm == mm) |
516 | leave_mm(smp_processor_id()); | 516 | leave_mm(smp_processor_id()); |
517 | |||
518 | /* If this cpu still has a stale cr3 reference, then make sure | ||
519 | it has been flushed. */ | ||
520 | if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { | ||
521 | load_cr3(swapper_pg_dir); | ||
522 | arch_flush_lazy_cpu_mode(); | ||
523 | } | ||
517 | } | 524 | } |
518 | 525 | ||
519 | static void drop_mm_ref(struct mm_struct *mm) | 526 | static void drop_mm_ref(struct mm_struct *mm) |
520 | { | 527 | { |
528 | cpumask_t mask; | ||
529 | unsigned cpu; | ||
530 | |||
521 | if (current->active_mm == mm) { | 531 | if (current->active_mm == mm) { |
522 | if (current->mm == mm) | 532 | if (current->mm == mm) |
523 | load_cr3(swapper_pg_dir); | 533 | load_cr3(swapper_pg_dir); |
524 | else | 534 | else |
525 | leave_mm(smp_processor_id()); | 535 | leave_mm(smp_processor_id()); |
536 | arch_flush_lazy_cpu_mode(); | ||
537 | } | ||
538 | |||
539 | /* Get the "official" set of cpus referring to our pagetable. */ | ||
540 | mask = mm->cpu_vm_mask; | ||
541 | |||
542 | /* It's possible that a vcpu may have a stale reference to our | ||
543 | cr3, because its in lazy mode, and it hasn't yet flushed | ||
544 | its set of pending hypercalls yet. In this case, we can | ||
545 | look at its actual current cr3 value, and force it to flush | ||
546 | if needed. */ | ||
547 | for_each_online_cpu(cpu) { | ||
548 | if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) | ||
549 | cpu_set(cpu, mask); | ||
526 | } | 550 | } |
527 | 551 | ||
528 | if (!cpus_empty(mm->cpu_vm_mask)) | 552 | if (!cpus_empty(mask)) |
529 | xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref, | 553 | xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); |
530 | mm, 1); | ||
531 | } | 554 | } |
532 | #else | 555 | #else |
533 | static void drop_mm_ref(struct mm_struct *mm) | 556 | static void drop_mm_ref(struct mm_struct *mm) |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 3847eed0bb09..b02a909bfd4c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -11,6 +11,7 @@ void xen_copy_trap_info(struct trap_info *traps); | |||
11 | 11 | ||
12 | DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); | 12 | DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); |
13 | DECLARE_PER_CPU(unsigned long, xen_cr3); | 13 | DECLARE_PER_CPU(unsigned long, xen_cr3); |
14 | DECLARE_PER_CPU(unsigned long, xen_current_cr3); | ||
14 | 15 | ||
15 | extern struct start_info *xen_start_info; | 16 | extern struct start_info *xen_start_info; |
16 | extern struct shared_info *HYPERVISOR_shared_info; | 17 | extern struct shared_info *HYPERVISOR_shared_info; |