diff options
author | Jeremy Fitzhardinge <jeremy@xensource.com> | 2007-10-16 14:51:30 -0400 |
---|---|---|
committer | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-10-16 14:51:30 -0400 |
commit | 74260714c56de4f967fcb2f17a8656bc574b75be (patch) | |
tree | f02bcd991285a20a543fae69f916577c8447b8f4 /arch/x86/xen/enlighten.c | |
parent | 9f79991d4186089e228274196413572cc000143b (diff) |
xen: lock pte pages while pinning/unpinning
When a pagetable is created, it is made globally visible in the rmap
prio tree before it is pinned via arch_dup_mmap(), and remains in the
rmap tree while it is unpinned with arch_exit_mmap().
This means that other CPUs may race with the pinning/unpinning
process, and see a pte between when it gets marked RO and actually
pinned, causing any pte updates to fail with write-protect faults.
As a result, all pte pages must be properly locked, and only unlocked
once the pinning/unpinning process has finished.
In order to avoid taking spinlocks for the whole pagetable - which may
overflow the PREEMPT_BITS portion of preempt counter - it locks and pins
each pte page individually, and then finally pins the whole pagetable.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hugh Dickens <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andi Kleen <ak@suse.de>
Cc: Keir Fraser <keir@xensource.com>
Cc: Jan Beulich <jbeulich@novell.com>
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r-- | arch/x86/xen/enlighten.c | 30 |
1 files changed, 21 insertions, 9 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e334bf7cb327..4186cb6a7f5a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -666,6 +666,15 @@ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) | |||
666 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 666 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); |
667 | } | 667 | } |
668 | 668 | ||
669 | static void pin_pagetable_pfn(unsigned level, unsigned long pfn) | ||
670 | { | ||
671 | struct mmuext_op op; | ||
672 | op.cmd = level; | ||
673 | op.arg1.mfn = pfn_to_mfn(pfn); | ||
674 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | ||
675 | BUG(); | ||
676 | } | ||
677 | |||
669 | /* This needs to make sure the new pte page is pinned iff its being | 678 | /* This needs to make sure the new pte page is pinned iff its being |
670 | attached to a pinned pagetable. */ | 679 | attached to a pinned pagetable. */ |
671 | static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) | 680 | static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) |
@@ -675,9 +684,10 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) | |||
675 | if (PagePinned(virt_to_page(mm->pgd))) { | 684 | if (PagePinned(virt_to_page(mm->pgd))) { |
676 | SetPagePinned(page); | 685 | SetPagePinned(page); |
677 | 686 | ||
678 | if (!PageHighMem(page)) | 687 | if (!PageHighMem(page)) { |
679 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); | 688 | make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); |
680 | else | 689 | pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); |
690 | } else | ||
681 | /* make sure there are no stray mappings of | 691 | /* make sure there are no stray mappings of |
682 | this page */ | 692 | this page */ |
683 | kmap_flush_unused(); | 693 | kmap_flush_unused(); |
@@ -690,8 +700,10 @@ static void xen_release_pt(u32 pfn) | |||
690 | struct page *page = pfn_to_page(pfn); | 700 | struct page *page = pfn_to_page(pfn); |
691 | 701 | ||
692 | if (PagePinned(page)) { | 702 | if (PagePinned(page)) { |
693 | if (!PageHighMem(page)) | 703 | if (!PageHighMem(page)) { |
704 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); | ||
694 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); | 705 | make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); |
706 | } | ||
695 | } | 707 | } |
696 | } | 708 | } |
697 | 709 | ||
@@ -806,15 +818,15 @@ static __init void xen_pagetable_setup_done(pgd_t *base) | |||
806 | /* Actually pin the pagetable down, but we can't set PG_pinned | 818 | /* Actually pin the pagetable down, but we can't set PG_pinned |
807 | yet because the page structures don't exist yet. */ | 819 | yet because the page structures don't exist yet. */ |
808 | { | 820 | { |
809 | struct mmuext_op op; | 821 | unsigned level; |
822 | |||
810 | #ifdef CONFIG_X86_PAE | 823 | #ifdef CONFIG_X86_PAE |
811 | op.cmd = MMUEXT_PIN_L3_TABLE; | 824 | level = MMUEXT_PIN_L3_TABLE; |
812 | #else | 825 | #else |
813 | op.cmd = MMUEXT_PIN_L3_TABLE; | 826 | level = MMUEXT_PIN_L2_TABLE; |
814 | #endif | 827 | #endif |
815 | op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base))); | 828 | |
816 | if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) | 829 | pin_pagetable_pfn(level, PFN_DOWN(__pa(base))); |
817 | BUG(); | ||
818 | } | 830 | } |
819 | } | 831 | } |
820 | 832 | ||