diff options
| author | Jeremy Fitzhardinge <jeremy@goop.org> | 2008-10-08 16:01:39 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-10-09 08:25:19 -0400 |
| commit | eefb47f6a1e855653d275cb90592a3587ea93a09 (patch) | |
| tree | f6b5b611a8900d975874ab0ac9e7f5df935ef862 | |
| parent | d19c8e516e0a17e049bcfbe96f86e040254ddf14 (diff) | |
xen: use spin_lock_nest_lock when pinning a pagetable
When pinning/unpinning a pagetable with split pte locks, we can end up
holding multiple pte locks at once (we need to hold the locks while
there's a pending batched hypercall affecting the pte page). Because
all the pte locks are in the same lock class, lockdep thinks that
we're potentially taking a lock recursively.
This warning is spurious because we always take the pte locks while
holding mm->page_table_lock. lockdep now has spin_lock_nest_lock to
express this kind of dominant lock use, so use it here so that lockdep
knows what's going on.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | arch/x86/xen/mmu.c | 74 | ||||
| -rw-r--r-- | arch/x86/xen/mmu.h | 3 |
2 files changed, 48 insertions, 29 deletions
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 64e58681767e..ae173f6edd8b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
| @@ -651,9 +651,12 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) | |||
| 651 | * For 64-bit, we must skip the Xen hole in the middle of the address | 651 | * For 64-bit, we must skip the Xen hole in the middle of the address |
| 652 | * space, just after the big x86-64 virtual hole. | 652 | * space, just after the big x86-64 virtual hole. |
| 653 | */ | 653 | */ |
| 654 | static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | 654 | static int xen_pgd_walk(struct mm_struct *mm, |
| 655 | int (*func)(struct mm_struct *mm, struct page *, | ||
| 656 | enum pt_level), | ||
| 655 | unsigned long limit) | 657 | unsigned long limit) |
| 656 | { | 658 | { |
| 659 | pgd_t *pgd = mm->pgd; | ||
| 657 | int flush = 0; | 660 | int flush = 0; |
| 658 | unsigned hole_low, hole_high; | 661 | unsigned hole_low, hole_high; |
| 659 | unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; | 662 | unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; |
| @@ -698,7 +701,7 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
| 698 | pud = pud_offset(&pgd[pgdidx], 0); | 701 | pud = pud_offset(&pgd[pgdidx], 0); |
| 699 | 702 | ||
| 700 | if (PTRS_PER_PUD > 1) /* not folded */ | 703 | if (PTRS_PER_PUD > 1) /* not folded */ |
| 701 | flush |= (*func)(virt_to_page(pud), PT_PUD); | 704 | flush |= (*func)(mm, virt_to_page(pud), PT_PUD); |
| 702 | 705 | ||
| 703 | for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { | 706 | for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { |
| 704 | pmd_t *pmd; | 707 | pmd_t *pmd; |
| @@ -713,7 +716,7 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
| 713 | pmd = pmd_offset(&pud[pudidx], 0); | 716 | pmd = pmd_offset(&pud[pudidx], 0); |
| 714 | 717 | ||
| 715 | if (PTRS_PER_PMD > 1) /* not folded */ | 718 | if (PTRS_PER_PMD > 1) /* not folded */ |
| 716 | flush |= (*func)(virt_to_page(pmd), PT_PMD); | 719 | flush |= (*func)(mm, virt_to_page(pmd), PT_PMD); |
| 717 | 720 | ||
| 718 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { | 721 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { |
| 719 | struct page *pte; | 722 | struct page *pte; |
| @@ -727,7 +730,7 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
| 727 | continue; | 730 | continue; |
| 728 | 731 | ||
| 729 | pte = pmd_page(pmd[pmdidx]); | 732 | pte = pmd_page(pmd[pmdidx]); |
| 730 | flush |= (*func)(pte, PT_PTE); | 733 | flush |= (*func)(mm, pte, PT_PTE); |
| 731 | } | 734 | } |
| 732 | } | 735 | } |
| 733 | } | 736 | } |
| @@ -735,20 +738,20 @@ static int xen_pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | |||
| 735 | out: | 738 | out: |
| 736 | /* Do the top level last, so that the callbacks can use it as | 739 | /* Do the top level last, so that the callbacks can use it as |
| 737 | a cue to do final things like tlb flushes. */ | 740 | a cue to do final things like tlb flushes. */ |
| 738 | flush |= (*func)(virt_to_page(pgd), PT_PGD); | 741 | flush |= (*func)(mm, virt_to_page(pgd), PT_PGD); |
| 739 | 742 | ||
| 740 | return flush; | 743 | return flush; |
| 741 | } | 744 | } |
| 742 | 745 | ||
| 743 | /* If we're using split pte locks, then take the page's lock and | 746 | /* If we're using split pte locks, then take the page's lock and |
| 744 | return a pointer to it. Otherwise return NULL. */ | 747 | return a pointer to it. Otherwise return NULL. */ |
| 745 | static spinlock_t *xen_pte_lock(struct page *page) | 748 | static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) |
| 746 | { | 749 | { |
| 747 | spinlock_t *ptl = NULL; | 750 | spinlock_t *ptl = NULL; |
| 748 | 751 | ||
| 749 | #if USE_SPLIT_PTLOCKS | 752 | #if USE_SPLIT_PTLOCKS |
| 750 | ptl = __pte_lockptr(page); | 753 | ptl = __pte_lockptr(page); |
| 751 | spin_lock(ptl); | 754 | spin_lock_nest_lock(ptl, &mm->page_table_lock); |
| 752 | #endif | 755 | #endif |
| 753 | 756 | ||
| 754 | return ptl; | 757 | return ptl; |
| @@ -772,7 +775,8 @@ static void xen_do_pin(unsigned level, unsigned long pfn) | |||
| 772 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 775 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
| 773 | } | 776 | } |
| 774 | 777 | ||
| 775 | static int xen_pin_page(struct page *page, enum pt_level level) | 778 | static int xen_pin_page(struct mm_struct *mm, struct page *page, |
| 779 | enum pt_level level) | ||
| 776 | { | 780 | { |
| 777 | unsigned pgfl = TestSetPagePinned(page); | 781 | unsigned pgfl = TestSetPagePinned(page); |
| 778 | int flush; | 782 | int flush; |
| @@ -813,7 +817,7 @@ static int xen_pin_page(struct page *page, enum pt_level level) | |||
| 813 | */ | 817 | */ |
| 814 | ptl = NULL; | 818 | ptl = NULL; |
| 815 | if (level == PT_PTE) | 819 | if (level == PT_PTE) |
| 816 | ptl = xen_pte_lock(page); | 820 | ptl = xen_pte_lock(page, mm); |
| 817 | 821 | ||
| 818 | MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, | 822 | MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, |
| 819 | pfn_pte(pfn, PAGE_KERNEL_RO), | 823 | pfn_pte(pfn, PAGE_KERNEL_RO), |
| @@ -834,11 +838,11 @@ static int xen_pin_page(struct page *page, enum pt_level level) | |||
| 834 | /* This is called just after a mm has been created, but it has not | 838 | /* This is called just after a mm has been created, but it has not |
| 835 | been used yet. We need to make sure that its pagetable is all | 839 | been used yet. We need to make sure that its pagetable is all |
| 836 | read-only, and can be pinned. */ | 840 | read-only, and can be pinned. */ |
| 837 | void xen_pgd_pin(pgd_t *pgd) | 841 | static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) |
| 838 | { | 842 | { |
| 839 | xen_mc_batch(); | 843 | xen_mc_batch(); |
| 840 | 844 | ||
| 841 | if (xen_pgd_walk(pgd, xen_pin_page, USER_LIMIT)) { | 845 | if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { |
| 842 | /* re-enable interrupts for kmap_flush_unused */ | 846 | /* re-enable interrupts for kmap_flush_unused */ |
| 843 | xen_mc_issue(0); | 847 | xen_mc_issue(0); |
| 844 | kmap_flush_unused(); | 848 | kmap_flush_unused(); |
| @@ -852,25 +856,35 @@ void xen_pgd_pin(pgd_t *pgd) | |||
| 852 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); | 856 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); |
| 853 | 857 | ||
| 854 | if (user_pgd) { | 858 | if (user_pgd) { |
| 855 | xen_pin_page(virt_to_page(user_pgd), PT_PGD); | 859 | xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); |
| 856 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | 860 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); |
| 857 | } | 861 | } |
| 858 | } | 862 | } |
| 859 | #else /* CONFIG_X86_32 */ | 863 | #else /* CONFIG_X86_32 */ |
| 860 | #ifdef CONFIG_X86_PAE | 864 | #ifdef CONFIG_X86_PAE |
| 861 | /* Need to make sure unshared kernel PMD is pinnable */ | 865 | /* Need to make sure unshared kernel PMD is pinnable */ |
| 862 | xen_pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 866 | xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), |
| 867 | PT_PMD); | ||
| 863 | #endif | 868 | #endif |
| 864 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); | 869 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
| 865 | #endif /* CONFIG_X86_64 */ | 870 | #endif /* CONFIG_X86_64 */ |
| 866 | xen_mc_issue(0); | 871 | xen_mc_issue(0); |
| 867 | } | 872 | } |
| 868 | 873 | ||
| 874 | static void xen_pgd_pin(struct mm_struct *mm) | ||
| 875 | { | ||
| 876 | __xen_pgd_pin(mm, mm->pgd); | ||
| 877 | } | ||
| 878 | |||
| 869 | /* | 879 | /* |
| 870 | * On save, we need to pin all pagetables to make sure they get their | 880 | * On save, we need to pin all pagetables to make sure they get their |
| 871 | * mfns turned into pfns. Search the list for any unpinned pgds and pin | 881 | * mfns turned into pfns. Search the list for any unpinned pgds and pin |
| 872 | * them (unpinned pgds are not currently in use, probably because the | 882 | * them (unpinned pgds are not currently in use, probably because the |
| 873 | * process is under construction or destruction). | 883 | * process is under construction or destruction). |
| 884 | * | ||
| 885 | * Expected to be called in stop_machine() ("equivalent to taking | ||
| 886 | * every spinlock in the system"), so the locking doesn't really | ||
| 887 | * matter all that much. | ||
| 874 | */ | 888 | */ |
| 875 | void xen_mm_pin_all(void) | 889 | void xen_mm_pin_all(void) |
| 876 | { | 890 | { |
| @@ -881,7 +895,7 @@ void xen_mm_pin_all(void) | |||
| 881 | 895 | ||
| 882 | list_for_each_entry(page, &pgd_list, lru) { | 896 | list_for_each_entry(page, &pgd_list, lru) { |
| 883 | if (!PagePinned(page)) { | 897 | if (!PagePinned(page)) { |
| 884 | xen_pgd_pin((pgd_t *)page_address(page)); | 898 | __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page)); |
| 885 | SetPageSavePinned(page); | 899 | SetPageSavePinned(page); |
| 886 | } | 900 | } |
| 887 | } | 901 | } |
| @@ -894,7 +908,8 @@ void xen_mm_pin_all(void) | |||
| 894 | * that's before we have page structures to store the bits. So do all | 908 | * that's before we have page structures to store the bits. So do all |
| 895 | * the book-keeping now. | 909 | * the book-keeping now. |
| 896 | */ | 910 | */ |
| 897 | static __init int xen_mark_pinned(struct page *page, enum pt_level level) | 911 | static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, |
| 912 | enum pt_level level) | ||
| 898 | { | 913 | { |
| 899 | SetPagePinned(page); | 914 | SetPagePinned(page); |
| 900 | return 0; | 915 | return 0; |
| @@ -902,10 +917,11 @@ static __init int xen_mark_pinned(struct page *page, enum pt_level level) | |||
| 902 | 917 | ||
| 903 | void __init xen_mark_init_mm_pinned(void) | 918 | void __init xen_mark_init_mm_pinned(void) |
| 904 | { | 919 | { |
| 905 | xen_pgd_walk(init_mm.pgd, xen_mark_pinned, FIXADDR_TOP); | 920 | xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); |
| 906 | } | 921 | } |
| 907 | 922 | ||
| 908 | static int xen_unpin_page(struct page *page, enum pt_level level) | 923 | static int xen_unpin_page(struct mm_struct *mm, struct page *page, |
| 924 | enum pt_level level) | ||
| 909 | { | 925 | { |
| 910 | unsigned pgfl = TestClearPagePinned(page); | 926 | unsigned pgfl = TestClearPagePinned(page); |
| 911 | 927 | ||
| @@ -923,7 +939,7 @@ static int xen_unpin_page(struct page *page, enum pt_level level) | |||
| 923 | * partially-pinned state. | 939 | * partially-pinned state. |
| 924 | */ | 940 | */ |
| 925 | if (level == PT_PTE) { | 941 | if (level == PT_PTE) { |
| 926 | ptl = xen_pte_lock(page); | 942 | ptl = xen_pte_lock(page, mm); |
| 927 | 943 | ||
| 928 | if (ptl) | 944 | if (ptl) |
| 929 | xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); | 945 | xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); |
| @@ -945,7 +961,7 @@ static int xen_unpin_page(struct page *page, enum pt_level level) | |||
| 945 | } | 961 | } |
| 946 | 962 | ||
| 947 | /* Release a pagetables pages back as normal RW */ | 963 | /* Release a pagetables pages back as normal RW */ |
| 948 | static void xen_pgd_unpin(pgd_t *pgd) | 964 | static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) |
| 949 | { | 965 | { |
| 950 | xen_mc_batch(); | 966 | xen_mc_batch(); |
| 951 | 967 | ||
| @@ -957,21 +973,27 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
| 957 | 973 | ||
| 958 | if (user_pgd) { | 974 | if (user_pgd) { |
| 959 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | 975 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); |
| 960 | xen_unpin_page(virt_to_page(user_pgd), PT_PGD); | 976 | xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); |
| 961 | } | 977 | } |
| 962 | } | 978 | } |
| 963 | #endif | 979 | #endif |
| 964 | 980 | ||
| 965 | #ifdef CONFIG_X86_PAE | 981 | #ifdef CONFIG_X86_PAE |
| 966 | /* Need to make sure unshared kernel PMD is unpinned */ | 982 | /* Need to make sure unshared kernel PMD is unpinned */ |
| 967 | xen_unpin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | 983 | xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), |
| 984 | PT_PMD); | ||
| 968 | #endif | 985 | #endif |
| 969 | 986 | ||
| 970 | xen_pgd_walk(pgd, xen_unpin_page, USER_LIMIT); | 987 | xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT); |
| 971 | 988 | ||
| 972 | xen_mc_issue(0); | 989 | xen_mc_issue(0); |
| 973 | } | 990 | } |
| 974 | 991 | ||
| 992 | static void xen_pgd_unpin(struct mm_struct *mm) | ||
| 993 | { | ||
| 994 | __xen_pgd_unpin(mm, mm->pgd); | ||
| 995 | } | ||
| 996 | |||
| 975 | /* | 997 | /* |
| 976 | * On resume, undo any pinning done at save, so that the rest of the | 998 | * On resume, undo any pinning done at save, so that the rest of the |
| 977 | * kernel doesn't see any unexpected pinned pagetables. | 999 | * kernel doesn't see any unexpected pinned pagetables. |
| @@ -986,7 +1008,7 @@ void xen_mm_unpin_all(void) | |||
| 986 | list_for_each_entry(page, &pgd_list, lru) { | 1008 | list_for_each_entry(page, &pgd_list, lru) { |
| 987 | if (PageSavePinned(page)) { | 1009 | if (PageSavePinned(page)) { |
| 988 | BUG_ON(!PagePinned(page)); | 1010 | BUG_ON(!PagePinned(page)); |
| 989 | xen_pgd_unpin((pgd_t *)page_address(page)); | 1011 | __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page)); |
| 990 | ClearPageSavePinned(page); | 1012 | ClearPageSavePinned(page); |
| 991 | } | 1013 | } |
| 992 | } | 1014 | } |
| @@ -997,14 +1019,14 @@ void xen_mm_unpin_all(void) | |||
| 997 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 1019 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
| 998 | { | 1020 | { |
| 999 | spin_lock(&next->page_table_lock); | 1021 | spin_lock(&next->page_table_lock); |
| 1000 | xen_pgd_pin(next->pgd); | 1022 | xen_pgd_pin(next); |
| 1001 | spin_unlock(&next->page_table_lock); | 1023 | spin_unlock(&next->page_table_lock); |
| 1002 | } | 1024 | } |
| 1003 | 1025 | ||
| 1004 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | 1026 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) |
| 1005 | { | 1027 | { |
| 1006 | spin_lock(&mm->page_table_lock); | 1028 | spin_lock(&mm->page_table_lock); |
| 1007 | xen_pgd_pin(mm->pgd); | 1029 | xen_pgd_pin(mm); |
| 1008 | spin_unlock(&mm->page_table_lock); | 1030 | spin_unlock(&mm->page_table_lock); |
| 1009 | } | 1031 | } |
| 1010 | 1032 | ||
| @@ -1095,7 +1117,7 @@ void xen_exit_mmap(struct mm_struct *mm) | |||
| 1095 | 1117 | ||
| 1096 | /* pgd may not be pinned in the error exit path of execve */ | 1118 | /* pgd may not be pinned in the error exit path of execve */ |
| 1097 | if (xen_page_pinned(mm->pgd)) | 1119 | if (xen_page_pinned(mm->pgd)) |
| 1098 | xen_pgd_unpin(mm->pgd); | 1120 | xen_pgd_unpin(mm); |
| 1099 | 1121 | ||
| 1100 | spin_unlock(&mm->page_table_lock); | 1122 | spin_unlock(&mm->page_table_lock); |
| 1101 | } | 1123 | } |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 0f59bd03f9e3..98d71659da5a 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
| @@ -18,9 +18,6 @@ void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); | |||
| 18 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); | 18 | void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); |
| 19 | void xen_exit_mmap(struct mm_struct *mm); | 19 | void xen_exit_mmap(struct mm_struct *mm); |
| 20 | 20 | ||
| 21 | void xen_pgd_pin(pgd_t *pgd); | ||
| 22 | //void xen_pgd_unpin(pgd_t *pgd); | ||
| 23 | |||
| 24 | pteval_t xen_pte_val(pte_t); | 21 | pteval_t xen_pte_val(pte_t); |
| 25 | pmdval_t xen_pmd_val(pmd_t); | 22 | pmdval_t xen_pmd_val(pmd_t); |
| 26 | pgdval_t xen_pgd_val(pgd_t); | 23 | pgdval_t xen_pgd_val(pgd_t); |
