diff options
author | Jeremy Fitzhardinge <jeremy@xensource.com> | 2007-07-17 21:37:06 -0400 |
---|---|---|
committer | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-07-18 11:47:44 -0400 |
commit | f87e4cac4f4e940b328d3deb5b53e642e3881f43 (patch) | |
tree | 7409f86561e5f97459378abd2ae21e9a5c82bfea /arch/i386/xen/mmu.c | |
parent | ab55028886dd1dd54585f22bf19a00eb23869340 (diff) |
xen: SMP guest support
This is a fairly straightforward Xen implementation of smp_ops.
Xen has its own IPI mechanisms, and has no dependency on any
APIC-based IPI. The smp_ops hooks and the flush_tlb_others pv_op
allow a Xen guest to avoid all APIC code in arch/i386 (the only apic
operation is a single apic_read for the apic version number).
One subtle point which needs to be addressed is unpinning pagetables
when another cpu may have a lazy tlb reference to the pagetable. Xen
will not allow an in-use pagetable to be unpinned, so we must find any
other cpus with a reference to the pagetable and get them to shoot
down their references.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/i386/xen/mmu.c')
-rw-r--r-- | arch/i386/xen/mmu.c | 69 |
1 files changed, 52 insertions, 17 deletions
diff --git a/arch/i386/xen/mmu.c b/arch/i386/xen/mmu.c index 53501ce2d15c..bc49ef846203 100644 --- a/arch/i386/xen/mmu.c +++ b/arch/i386/xen/mmu.c | |||
@@ -391,8 +391,12 @@ void xen_pgd_pin(pgd_t *pgd) | |||
391 | 391 | ||
392 | xen_mc_batch(); | 392 | xen_mc_batch(); |
393 | 393 | ||
394 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) | 394 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { |
395 | /* re-enable interrupts for kmap_flush_unused */ | ||
396 | xen_mc_issue(0); | ||
395 | kmap_flush_unused(); | 397 | kmap_flush_unused(); |
398 | xen_mc_batch(); | ||
399 | } | ||
396 | 400 | ||
397 | mcs = __xen_mc_entry(sizeof(*op)); | 401 | mcs = __xen_mc_entry(sizeof(*op)); |
398 | op = mcs.args; | 402 | op = mcs.args; |
@@ -474,27 +478,58 @@ void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | |||
474 | spin_unlock(&mm->page_table_lock); | 478 | spin_unlock(&mm->page_table_lock); |
475 | } | 479 | } |
476 | 480 | ||
477 | void xen_exit_mmap(struct mm_struct *mm) | ||
478 | { | ||
479 | struct task_struct *tsk = current; | ||
480 | |||
481 | task_lock(tsk); | ||
482 | 481 | ||
483 | /* | 482 | #ifdef CONFIG_SMP |
484 | * We aggressively remove defunct pgd from cr3. We execute unmap_vmas() | 483 | /* Another cpu may still have their %cr3 pointing at the pagetable, so |
485 | * *much* faster this way, as no tlb flushes means bigger wrpt batches. | 484 | we need to repoint it somewhere else before we can unpin it. */ |
486 | */ | 485 | static void drop_other_mm_ref(void *info) |
487 | if (tsk->active_mm == mm) { | 486 | { |
488 | tsk->active_mm = &init_mm; | 487 | struct mm_struct *mm = info; |
489 | atomic_inc(&init_mm.mm_count); | ||
490 | 488 | ||
491 | switch_mm(mm, &init_mm, tsk); | 489 | if (__get_cpu_var(cpu_tlbstate).active_mm == mm) |
490 | leave_mm(smp_processor_id()); | ||
491 | } | ||
492 | 492 | ||
493 | atomic_dec(&mm->mm_count); | 493 | static void drop_mm_ref(struct mm_struct *mm) |
494 | BUG_ON(atomic_read(&mm->mm_count) == 0); | 494 | { |
495 | if (current->active_mm == mm) { | ||
496 | if (current->mm == mm) | ||
497 | load_cr3(swapper_pg_dir); | ||
498 | else | ||
499 | leave_mm(smp_processor_id()); | ||
495 | } | 500 | } |
496 | 501 | ||
497 | task_unlock(tsk); | 502 | if (!cpus_empty(mm->cpu_vm_mask)) |
503 | xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref, | ||
504 | mm, 1); | ||
505 | } | ||
506 | #else | ||
507 | static void drop_mm_ref(struct mm_struct *mm) | ||
508 | { | ||
509 | if (current->active_mm == mm) | ||
510 | load_cr3(swapper_pg_dir); | ||
511 | } | ||
512 | #endif | ||
513 | |||
514 | /* | ||
515 | * While a process runs, Xen pins its pagetables, which means that the | ||
516 | * hypervisor forces it to be read-only, and it controls all updates | ||
517 | * to it. This means that all pagetable updates have to go via the | ||
518 | * hypervisor, which is moderately expensive. | ||
519 | * | ||
520 | * Since we're pulling the pagetable down, we switch to use init_mm, | ||
521 | * unpin old process pagetable and mark it all read-write, which | ||
522 | * allows further operations on it to be simple memory accesses. | ||
523 | * | ||
524 | * The only subtle point is that another CPU may be still using the | ||
525 | * pagetable because of lazy tlb flushing. This means we need need to | ||
526 | * switch all CPUs off this pagetable before we can unpin it. | ||
527 | */ | ||
528 | void xen_exit_mmap(struct mm_struct *mm) | ||
529 | { | ||
530 | get_cpu(); /* make sure we don't move around */ | ||
531 | drop_mm_ref(mm); | ||
532 | put_cpu(); | ||
498 | 533 | ||
499 | xen_pgd_unpin(mm->pgd); | 534 | xen_pgd_unpin(mm->pgd); |
500 | } | 535 | } |