aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/xen/mmu.c
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2008-10-08 16:01:39 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-09 08:25:19 -0400
commiteefb47f6a1e855653d275cb90592a3587ea93a09 (patch)
treef6b5b611a8900d975874ab0ac9e7f5df935ef862 /arch/x86/xen/mmu.c
parentd19c8e516e0a17e049bcfbe96f86e040254ddf14 (diff)
xen: use spin_lock_nest_lock when pinning a pagetable
When pinning/unpinning a pagetable with split pte locks, we can end up holding multiple pte locks at once (we need to hold the locks while there's a pending batched hypercall affecting the pte page). Because all the pte locks are in the same lock class, lockdep thinks that we're potentially taking a lock recursively. This warning is spurious because we always take the pte locks while holding mm->page_table_lock. lockdep now has spin_lock_nest_lock to express this kind of dominant lock use, so use it here so that lockdep knows what's going on. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/xen/mmu.c')
-rw-r--r--arch/x86/xen/mmu.c74
1 files changed, 48 insertions, 26 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 64e58681767e..ae173f6edd8b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -651,9 +651,12 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
651 * For 64-bit, we must skip the Xen hole in the middle of the address 651 * For 64-bit, we must skip the Xen hole in the middle of the address
652 * space, just after the big x86-64 virtual hole. 652 * space, just after the big x86-64 virtual hole.
653 */ 653 */
654static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), 654static int xen_pgd_walk(struct mm_struct *mm,
655 int (*func)(struct mm_struct *mm, struct page *,
656 enum pt_level),
655 unsigned long limit) 657 unsigned long limit)
656{ 658{
659 pgd_t *pgd = mm->pgd;
657 int flush = 0; 660 int flush = 0;
658 unsigned hole_low, hole_high; 661 unsigned hole_low, hole_high;
659 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; 662 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -698,7 +701,7 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
698 pud = pud_offset(&pgd[pgdidx], 0); 701 pud = pud_offset(&pgd[pgdidx], 0);
699 702
700 if (PTRS_PER_PUD > 1) /* not folded */ 703 if (PTRS_PER_PUD > 1) /* not folded */
701 flush |= (*func)(virt_to_page(pud), PT_PUD); 704 flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
702 705
703 for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { 706 for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
704 pmd_t *pmd; 707 pmd_t *pmd;
@@ -713,7 +716,7 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
713 pmd = pmd_offset(&pud[pudidx], 0); 716 pmd = pmd_offset(&pud[pudidx], 0);
714 717
715 if (PTRS_PER_PMD > 1) /* not folded */ 718 if (PTRS_PER_PMD > 1) /* not folded */
716 flush |= (*func)(virt_to_page(pmd), PT_PMD); 719 flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
717 720
718 for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { 721 for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
719 struct page *pte; 722 struct page *pte;
@@ -727,7 +730,7 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
727 continue; 730 continue;
728 731
729 pte = pmd_page(pmd[pmdidx]); 732 pte = pmd_page(pmd[pmdidx]);
730 flush |= (*func)(pte, PT_PTE); 733 flush |= (*func)(mm, pte, PT_PTE);
731 } 734 }
732 } 735 }
733 } 736 }
@@ -735,20 +738,20 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
735out: 738out:
736 /* Do the top level last, so that the callbacks can use it as 739 /* Do the top level last, so that the callbacks can use it as
737 a cue to do final things like tlb flushes. */ 740 a cue to do final things like tlb flushes. */
738 flush |= (*func)(virt_to_page(pgd), PT_PGD); 741 flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
739 742
740 return flush; 743 return flush;
741} 744}
742 745
743/* If we're using split pte locks, then take the page's lock and 746/* If we're using split pte locks, then take the page's lock and
744 return a pointer to it. Otherwise return NULL. */ 747 return a pointer to it. Otherwise return NULL. */
745static spinlock_t *xen_pte_lock(struct page *page) 748static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
746{ 749{
747 spinlock_t *ptl = NULL; 750 spinlock_t *ptl = NULL;
748 751
749#if USE_SPLIT_PTLOCKS 752#if USE_SPLIT_PTLOCKS
750 ptl = __pte_lockptr(page); 753 ptl = __pte_lockptr(page);
751 spin_lock(ptl); 754 spin_lock_nest_lock(ptl, &mm->page_table_lock);
752#endif 755#endif
753 756
754 return ptl; 757 return ptl;
@@ -772,7 +775,8 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
772 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 775 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
773} 776}
774 777
775static int xen_pin_page(struct page *page, enum pt_level level) 778static int xen_pin_page(struct mm_struct *mm, struct page *page,
779 enum pt_level level)
776{ 780{
777 unsigned pgfl = TestSetPagePinned(page); 781 unsigned pgfl = TestSetPagePinned(page);
778 int flush; 782 int flush;
@@ -813,7 +817,7 @@ static int xen_pin_page(struct page *page, enum pt_level level)
813 */ 817 */
814 ptl = NULL; 818 ptl = NULL;
815 if (level == PT_PTE) 819 if (level == PT_PTE)
816 ptl = xen_pte_lock(page); 820 ptl = xen_pte_lock(page, mm);
817 821
818 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, 822 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
819 pfn_pte(pfn, PAGE_KERNEL_RO), 823 pfn_pte(pfn, PAGE_KERNEL_RO),
@@ -834,11 +838,11 @@ static int xen_pin_page(struct page *page, enum pt_level level)
834/* This is called just after a mm has been created, but it has not 838/* This is called just after a mm has been created, but it has not
835 been used yet. We need to make sure that its pagetable is all 839 been used yet. We need to make sure that its pagetable is all
836 read-only, and can be pinned. */ 840 read-only, and can be pinned. */
837void xen_pgd_pin(pgd_t *pgd) 841static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
838{ 842{
839 xen_mc_batch(); 843 xen_mc_batch();
840 844
841 if (xen_pgd_walk(pgd, xen_pin_page, USER_LIMIT)) { 845 if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
842 /* re-enable interrupts for kmap_flush_unused */ 846 /* re-enable interrupts for kmap_flush_unused */
843 xen_mc_issue(0); 847 xen_mc_issue(0);
844 kmap_flush_unused(); 848 kmap_flush_unused();
@@ -852,25 +856,35 @@ void xen_pgd_pin(pgd_t *pgd)
852 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); 856 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
853 857
854 if (user_pgd) { 858 if (user_pgd) {
855 xen_pin_page(virt_to_page(user_pgd), PT_PGD); 859 xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
856 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); 860 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
857 } 861 }
858 } 862 }
859#else /* CONFIG_X86_32 */ 863#else /* CONFIG_X86_32 */
860#ifdef CONFIG_X86_PAE 864#ifdef CONFIG_X86_PAE
861 /* Need to make sure unshared kernel PMD is pinnable */ 865 /* Need to make sure unshared kernel PMD is pinnable */
862 xen_pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); 866 xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
867 PT_PMD);
863#endif 868#endif
864 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); 869 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
865#endif /* CONFIG_X86_64 */ 870#endif /* CONFIG_X86_64 */
866 xen_mc_issue(0); 871 xen_mc_issue(0);
867} 872}
868 873
874static void xen_pgd_pin(struct mm_struct *mm)
875{
876 __xen_pgd_pin(mm, mm->pgd);
877}
878
869/* 879/*
870 * On save, we need to pin all pagetables to make sure they get their 880 * On save, we need to pin all pagetables to make sure they get their
871 * mfns turned into pfns. Search the list for any unpinned pgds and pin 881 * mfns turned into pfns. Search the list for any unpinned pgds and pin
872 * them (unpinned pgds are not currently in use, probably because the 882 * them (unpinned pgds are not currently in use, probably because the
873 * process is under construction or destruction). 883 * process is under construction or destruction).
884 *
885 * Expected to be called in stop_machine() ("equivalent to taking
886 * every spinlock in the system"), so the locking doesn't really
887 * matter all that much.
874 */ 888 */
875void xen_mm_pin_all(void) 889void xen_mm_pin_all(void)
876{ 890{
@@ -881,7 +895,7 @@ void xen_mm_pin_all(void)
881 895
882 list_for_each_entry(page, &pgd_list, lru) { 896 list_for_each_entry(page, &pgd_list, lru) {
883 if (!PagePinned(page)) { 897 if (!PagePinned(page)) {
884 xen_pgd_pin((pgd_t *)page_address(page)); 898 __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
885 SetPageSavePinned(page); 899 SetPageSavePinned(page);
886 } 900 }
887 } 901 }
@@ -894,7 +908,8 @@ void xen_mm_pin_all(void)
894 * that's before we have page structures to store the bits. So do all 908 * that's before we have page structures to store the bits. So do all
895 * the book-keeping now. 909 * the book-keeping now.
896 */ 910 */
897static __init int xen_mark_pinned(struct page *page, enum pt_level level) 911static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
912 enum pt_level level)
898{ 913{
899 SetPagePinned(page); 914 SetPagePinned(page);
900 return 0; 915 return 0;
@@ -902,10 +917,11 @@ static __init int xen_mark_pinned(struct page *page, enum pt_level level)
902 917
903void __init xen_mark_init_mm_pinned(void) 918void __init xen_mark_init_mm_pinned(void)
904{ 919{
905 xen_pgd_walk(init_mm.pgd, xen_mark_pinned, FIXADDR_TOP); 920 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
906} 921}
907 922
908static int xen_unpin_page(struct page *page, enum pt_level level) 923static int xen_unpin_page(struct mm_struct *mm, struct page *page,
924 enum pt_level level)
909{ 925{
910 unsigned pgfl = TestClearPagePinned(page); 926 unsigned pgfl = TestClearPagePinned(page);
911 927
@@ -923,7 +939,7 @@ static int xen_unpin_page(struct page *page, enum pt_level level)
923 * partially-pinned state. 939 * partially-pinned state.
924 */ 940 */
925 if (level == PT_PTE) { 941 if (level == PT_PTE) {
926 ptl = xen_pte_lock(page); 942 ptl = xen_pte_lock(page, mm);
927 943
928 if (ptl) 944 if (ptl)
929 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); 945 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
@@ -945,7 +961,7 @@ static int xen_unpin_page(struct page *page, enum pt_level level)
945} 961}
946 962
947/* Release a pagetables pages back as normal RW */ 963/* Release a pagetables pages back as normal RW */
948static void xen_pgd_unpin(pgd_t *pgd) 964static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
949{ 965{
950 xen_mc_batch(); 966 xen_mc_batch();
951 967
@@ -957,21 +973,27 @@ static void xen_pgd_unpin(pgd_t *pgd)
957 973
958 if (user_pgd) { 974 if (user_pgd) {
959 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); 975 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
960 xen_unpin_page(virt_to_page(user_pgd), PT_PGD); 976 xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
961 } 977 }
962 } 978 }
963#endif 979#endif
964 980
965#ifdef CONFIG_X86_PAE 981#ifdef CONFIG_X86_PAE
966 /* Need to make sure unshared kernel PMD is unpinned */ 982 /* Need to make sure unshared kernel PMD is unpinned */
967 xen_unpin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); 983 xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
984 PT_PMD);
968#endif 985#endif
969 986
970 xen_pgd_walk(pgd, xen_unpin_page, USER_LIMIT); 987 xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT);
971 988
972 xen_mc_issue(0); 989 xen_mc_issue(0);
973} 990}
974 991
992static void xen_pgd_unpin(struct mm_struct *mm)
993{
994 __xen_pgd_unpin(mm, mm->pgd);
995}
996
975/* 997/*
976 * On resume, undo any pinning done at save, so that the rest of the 998 * On resume, undo any pinning done at save, so that the rest of the
977 * kernel doesn't see any unexpected pinned pagetables. 999 * kernel doesn't see any unexpected pinned pagetables.
@@ -986,7 +1008,7 @@ void xen_mm_unpin_all(void)
986 list_for_each_entry(page, &pgd_list, lru) { 1008 list_for_each_entry(page, &pgd_list, lru) {
987 if (PageSavePinned(page)) { 1009 if (PageSavePinned(page)) {
988 BUG_ON(!PagePinned(page)); 1010 BUG_ON(!PagePinned(page));
989 xen_pgd_unpin((pgd_t *)page_address(page)); 1011 __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
990 ClearPageSavePinned(page); 1012 ClearPageSavePinned(page);
991 } 1013 }
992 } 1014 }
@@ -997,14 +1019,14 @@ void xen_mm_unpin_all(void)
997void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) 1019void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
998{ 1020{
999 spin_lock(&next->page_table_lock); 1021 spin_lock(&next->page_table_lock);
1000 xen_pgd_pin(next->pgd); 1022 xen_pgd_pin(next);
1001 spin_unlock(&next->page_table_lock); 1023 spin_unlock(&next->page_table_lock);
1002} 1024}
1003 1025
1004void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 1026void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
1005{ 1027{
1006 spin_lock(&mm->page_table_lock); 1028 spin_lock(&mm->page_table_lock);
1007 xen_pgd_pin(mm->pgd); 1029 xen_pgd_pin(mm);
1008 spin_unlock(&mm->page_table_lock); 1030 spin_unlock(&mm->page_table_lock);
1009} 1031}
1010 1032
@@ -1095,7 +1117,7 @@ void xen_exit_mmap(struct mm_struct *mm)
1095 1117
1096 /* pgd may not be pinned in the error exit path of execve */ 1118 /* pgd may not be pinned in the error exit path of execve */
1097 if (xen_page_pinned(mm->pgd)) 1119 if (xen_page_pinned(mm->pgd))
1098 xen_pgd_unpin(mm->pgd); 1120 xen_pgd_unpin(mm);
1099 1121
1100 spin_unlock(&mm->page_table_lock); 1122 spin_unlock(&mm->page_table_lock);
1101} 1123}