diff options
author | Jeremy Fitzhardinge <jeremy@xensource.com> | 2007-07-17 21:37:05 -0400 |
---|---|---|
committer | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-07-18 11:47:43 -0400 |
commit | f4f97b3ea90130520afb478cbc2918be2b6587b8 (patch) | |
tree | 1aeebe3230b4a7eef0630eec148927c1adf340a5 /arch/i386/xen/enlighten.c | |
parent | c85b04c3749507546f6d5868976e4793e35c2ec0 (diff) |
xen: Complete pagetable pinning
Xen requires all active pagetables to be marked read-only. When the
base of the pagetable is loaded into %cr3, the hypervisor validates
the entire pagetable and only allows the load to proceed if it all
checks out.
This is pretty slow, so to mitigate this cost Xen has a notion of
pinned pagetables. Pinned pagetables are pagetables which are
considered to be active even if no processor's cr3 is pointing to is.
This means that it must remain read-only and all updates are validated
by the hypervisor. This makes context switches much cheaper, because
the hypervisor doesn't need to revalidate the pagetable each time.
This also adds a new paravirt hook which is called during setup once
the zones and memory allocator have been initialized. When the
init_mm pagetable is first built, the struct page array does not yet
exist, and so there's nowhere to put he init_mm pagetable's PG_pinned
flags. Once the zones are initialized and the struct page array
exists, we can set the PG_pinned flags for those pages.
This patch also adds the Xen support for pte pages allocated out of
highmem (highpte) by implementing xen_kmap_atomic_pte.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Cc: Zach Amsden <zach@vmware.com>
Diffstat (limited to 'arch/i386/xen/enlighten.c')
-rw-r--r-- | arch/i386/xen/enlighten.c | 87 |
1 files changed, 69 insertions, 18 deletions
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c index 25eb3592f11d..86e68e680116 100644 --- a/arch/i386/xen/enlighten.c +++ b/arch/i386/xen/enlighten.c | |||
@@ -21,6 +21,9 @@ | |||
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/bootmem.h> | 22 | #include <linux/bootmem.h> |
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/mm.h> | ||
25 | #include <linux/page-flags.h> | ||
26 | #include <linux/highmem.h> | ||
24 | 27 | ||
25 | #include <xen/interface/xen.h> | 28 | #include <xen/interface/xen.h> |
26 | #include <xen/interface/physdev.h> | 29 | #include <xen/interface/physdev.h> |
@@ -500,32 +503,59 @@ static void xen_write_cr3(unsigned long cr3) | |||
500 | } | 503 | } |
501 | } | 504 | } |
502 | 505 | ||
503 | static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) | 506 | /* Early in boot, while setting up the initial pagetable, assume |
507 | everything is pinned. */ | ||
508 | static void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) | ||
504 | { | 509 | { |
505 | /* XXX pfn isn't necessarily a lowmem page */ | 510 | BUG_ON(mem_map); /* should only be used early */ |
506 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 511 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); |
507 | } | 512 | } |
508 | 513 | ||
509 | static void xen_alloc_pd(u32 pfn) | 514 | /* This needs to make sure the new pte page is pinned iff its being |
515 | attached to a pinned pagetable. */ | ||
516 | static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) | ||
510 | { | 517 | { |
511 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 518 | struct page *page = pfn_to_page(pfn); |
512 | } | ||
513 | 519 | ||
514 | static void xen_release_pd(u32 pfn) | 520 | if (PagePinned(virt_to_page(mm->pgd))) { |
515 | { | 521 | SetPagePinned(page); |
516 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 522 | |
523 | if (!PageHighMem(page)) | ||
524 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | ||
525 | else | ||
526 | /* make sure there are no stray mappings of | ||
527 | this page */ | ||
528 | kmap_flush_unused(); | ||
529 | } | ||
517 | } | 530 | } |
518 | 531 | ||
532 | /* This should never happen until we're OK to use struct page */ | ||
519 | static void xen_release_pt(u32 pfn) | 533 | static void xen_release_pt(u32 pfn) |
520 | { | 534 | { |
521 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 535 | struct page *page = pfn_to_page(pfn); |
536 | |||
537 | if (PagePinned(page)) { | ||
538 | if (!PageHighMem(page)) | ||
539 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | ||
540 | } | ||
522 | } | 541 | } |
523 | 542 | ||
524 | static void xen_alloc_pd_clone(u32 pfn, u32 clonepfn, | 543 | #ifdef CONFIG_HIGHPTE |
525 | u32 start, u32 count) | 544 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) |
526 | { | 545 | { |
527 | xen_alloc_pd(pfn); | 546 | pgprot_t prot = PAGE_KERNEL; |
547 | |||
548 | if (PagePinned(page)) | ||
549 | prot = PAGE_KERNEL_RO; | ||
550 | |||
551 | if (0 && PageHighMem(page)) | ||
552 | printk("mapping highpte %lx type %d prot %s\n", | ||
553 | page_to_pfn(page), type, | ||
554 | (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); | ||
555 | |||
556 | return kmap_atomic_prot(page, type, prot); | ||
528 | } | 557 | } |
558 | #endif | ||
529 | 559 | ||
530 | static __init void xen_pagetable_setup_start(pgd_t *base) | 560 | static __init void xen_pagetable_setup_start(pgd_t *base) |
531 | { | 561 | { |
@@ -553,7 +583,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) | |||
553 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), | 583 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), |
554 | PAGE_SIZE); | 584 | PAGE_SIZE); |
555 | 585 | ||
556 | xen_alloc_pd(PFN_DOWN(__pa(pmd))); | 586 | make_lowmem_page_readonly(pmd); |
557 | 587 | ||
558 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); | 588 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); |
559 | } else | 589 | } else |
@@ -574,6 +604,10 @@ static __init void xen_pagetable_setup_start(pgd_t *base) | |||
574 | 604 | ||
575 | static __init void xen_pagetable_setup_done(pgd_t *base) | 605 | static __init void xen_pagetable_setup_done(pgd_t *base) |
576 | { | 606 | { |
607 | /* This will work as long as patching hasn't happened yet | ||
608 | (which it hasn't) */ | ||
609 | paravirt_ops.alloc_pt = xen_alloc_pt; | ||
610 | |||
577 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 611 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
578 | /* | 612 | /* |
579 | * Create a mapping for the shared info page. | 613 | * Create a mapping for the shared info page. |
@@ -591,7 +625,19 @@ static __init void xen_pagetable_setup_done(pgd_t *base) | |||
591 | HYPERVISOR_shared_info = | 625 | HYPERVISOR_shared_info = |
592 | (struct shared_info *)__va(xen_start_info->shared_info); | 626 | (struct shared_info *)__va(xen_start_info->shared_info); |
593 | 627 | ||
594 | xen_pgd_pin(base); | 628 | /* Actually pin the pagetable down, but we can't set PG_pinned |
629 | yet because the page structures don't exist yet. */ | ||
630 | { | ||
631 | struct mmuext_op op; | ||
632 | #ifdef CONFIG_X86_PAE | ||
633 | op.cmd = MMUEXT_PIN_L3_TABLE; | ||
634 | #else | ||
635 | op.cmd = MMUEXT_PIN_L3_TABLE; | ||
636 | #endif | ||
637 | op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base))); | ||
638 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | ||
639 | BUG(); | ||
640 | } | ||
595 | 641 | ||
596 | xen_vcpu_setup(smp_processor_id()); | 642 | xen_vcpu_setup(smp_processor_id()); |
597 | } | 643 | } |
@@ -608,6 +654,7 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { | |||
608 | .memory_setup = xen_memory_setup, | 654 | .memory_setup = xen_memory_setup, |
609 | .arch_setup = xen_arch_setup, | 655 | .arch_setup = xen_arch_setup, |
610 | .init_IRQ = xen_init_IRQ, | 656 | .init_IRQ = xen_init_IRQ, |
657 | .post_allocator_init = xen_mark_init_mm_pinned, | ||
611 | 658 | ||
612 | .time_init = xen_time_init, | 659 | .time_init = xen_time_init, |
613 | .set_wallclock = xen_set_wallclock, | 660 | .set_wallclock = xen_set_wallclock, |
@@ -688,11 +735,15 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { | |||
688 | .pagetable_setup_start = xen_pagetable_setup_start, | 735 | .pagetable_setup_start = xen_pagetable_setup_start, |
689 | .pagetable_setup_done = xen_pagetable_setup_done, | 736 | .pagetable_setup_done = xen_pagetable_setup_done, |
690 | 737 | ||
691 | .alloc_pt = xen_alloc_pt, | 738 | .alloc_pt = xen_alloc_pt_init, |
692 | .alloc_pd = xen_alloc_pd, | ||
693 | .alloc_pd_clone = xen_alloc_pd_clone, | ||
694 | .release_pd = xen_release_pd, | ||
695 | .release_pt = xen_release_pt, | 739 | .release_pt = xen_release_pt, |
740 | .alloc_pd = paravirt_nop, | ||
741 | .alloc_pd_clone = paravirt_nop, | ||
742 | .release_pd = paravirt_nop, | ||
743 | |||
744 | #ifdef CONFIG_HIGHPTE | ||
745 | .kmap_atomic_pte = xen_kmap_atomic_pte, | ||
746 | #endif | ||
696 | 747 | ||
697 | .set_pte = xen_set_pte, | 748 | .set_pte = xen_set_pte, |
698 | .set_pte_at = xen_set_pte_at, | 749 | .set_pte_at = xen_set_pte_at, |