aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/xen/mmu.c41
1 files changed, 35 insertions, 6 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index aa37469da696..d3752b6ce6e6 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -590,8 +590,6 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
590 pmdidx_limit = 0; 590 pmdidx_limit = 0;
591#endif 591#endif
592 592
593 flush |= (*func)(virt_to_page(pgd), PT_PGD);
594
595 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { 593 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
596 pud_t *pud; 594 pud_t *pud;
597 595
@@ -637,7 +635,11 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
637 } 635 }
638 } 636 }
639 } 637 }
638
640out: 639out:
640 /* Do the top level last, so that the callbacks can use it as
641 a cue to do final things like tlb flushes. */
642 flush |= (*func)(virt_to_page(pgd), PT_PGD);
641 643
642 return flush; 644 return flush;
643} 645}
@@ -691,6 +693,26 @@ static int pin_page(struct page *page, enum pt_level level)
691 693
692 flush = 0; 694 flush = 0;
693 695
696 /*
697 * We need to hold the pagetable lock between the time
698 * we make the pagetable RO and when we actually pin
699 * it. If we don't, then other users may come in and
700 * attempt to update the pagetable by writing it,
701 * which will fail because the memory is RO but not
702 * pinned, so Xen won't do the trap'n'emulate.
703 *
704 * If we're using split pte locks, we can't hold the
705 * entire pagetable's worth of locks during the
706 * traverse, because we may wrap the preempt count (8
707 * bits). The solution is to mark RO and pin each PTE
708 * page while holding the lock. This means the number
709 * of locks we end up holding is never more than a
710 * batch size (~32 entries, at present).
711 *
712 * If we're not using split pte locks, we needn't pin
713 * the PTE pages independently, because we're
714 * protected by the overall pagetable lock.
715 */
694 ptl = NULL; 716 ptl = NULL;
695 if (level == PT_PTE) 717 if (level == PT_PTE)
696 ptl = lock_pte(page); 718 ptl = lock_pte(page);
@@ -699,10 +721,9 @@ static int pin_page(struct page *page, enum pt_level level)
699 pfn_pte(pfn, PAGE_KERNEL_RO), 721 pfn_pte(pfn, PAGE_KERNEL_RO),
700 level == PT_PGD ? UVMF_TLB_FLUSH : 0); 722 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
701 723
702 if (level == PT_PTE) 724 if (ptl) {
703 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); 725 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
704 726
705 if (ptl) {
706 /* Queue a deferred unlock for when this batch 727 /* Queue a deferred unlock for when this batch
707 is completed. */ 728 is completed. */
708 xen_mc_callback(do_unlock, ptl); 729 xen_mc_callback(do_unlock, ptl);
@@ -796,10 +817,18 @@ static int unpin_page(struct page *page, enum pt_level level)
796 spinlock_t *ptl = NULL; 817 spinlock_t *ptl = NULL;
797 struct multicall_space mcs; 818 struct multicall_space mcs;
798 819
820 /*
821 * Do the converse to pin_page. If we're using split
822 * pte locks, we must be holding the lock for while
823 * the pte page is unpinned but still RO to prevent
824 * concurrent updates from seeing it in this
825 * partially-pinned state.
826 */
799 if (level == PT_PTE) { 827 if (level == PT_PTE) {
800 ptl = lock_pte(page); 828 ptl = lock_pte(page);
801 829
802 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); 830 if (ptl)
831 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
803 } 832 }
804 833
805 mcs = __xen_mc_entry(0); 834 mcs = __xen_mc_entry(0);
@@ -837,7 +866,7 @@ static void xen_pgd_unpin(pgd_t *pgd)
837 866
838#ifdef CONFIG_X86_PAE 867#ifdef CONFIG_X86_PAE
839 /* Need to make sure unshared kernel PMD is unpinned */ 868 /* Need to make sure unshared kernel PMD is unpinned */
840 pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); 869 unpin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD);
841#endif 870#endif
842 871
843 pgd_walk(pgd, unpin_page, USER_LIMIT); 872 pgd_walk(pgd, unpin_page, USER_LIMIT);