diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2013-11-14 17:31:04 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-14 19:32:15 -0500 |
commit | c4088ebdca64c9a2e34a38177d2249805ede1f4b (patch) | |
tree | 9e761c697ad6fab039adb5280bf26fb41f8e17aa /mm/huge_memory.c | |
parent | cb900f41215447433cbc456d1c4294e858a84d7c (diff) |
mm: convert the rest to new page table lock api
Only trivial cases left. Let's convert them altogether.
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 108 |
1 files changed, 60 insertions, 48 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c2082ab4fc93..bccd5a628ea6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -710,6 +710,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
710 | struct page *page) | 710 | struct page *page) |
711 | { | 711 | { |
712 | pgtable_t pgtable; | 712 | pgtable_t pgtable; |
713 | spinlock_t *ptl; | ||
713 | 714 | ||
714 | VM_BUG_ON(!PageCompound(page)); | 715 | VM_BUG_ON(!PageCompound(page)); |
715 | pgtable = pte_alloc_one(mm, haddr); | 716 | pgtable = pte_alloc_one(mm, haddr); |
@@ -724,9 +725,9 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
724 | */ | 725 | */ |
725 | __SetPageUptodate(page); | 726 | __SetPageUptodate(page); |
726 | 727 | ||
727 | spin_lock(&mm->page_table_lock); | 728 | ptl = pmd_lock(mm, pmd); |
728 | if (unlikely(!pmd_none(*pmd))) { | 729 | if (unlikely(!pmd_none(*pmd))) { |
729 | spin_unlock(&mm->page_table_lock); | 730 | spin_unlock(ptl); |
730 | mem_cgroup_uncharge_page(page); | 731 | mem_cgroup_uncharge_page(page); |
731 | put_page(page); | 732 | put_page(page); |
732 | pte_free(mm, pgtable); | 733 | pte_free(mm, pgtable); |
@@ -739,7 +740,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
739 | set_pmd_at(mm, haddr, pmd, entry); | 740 | set_pmd_at(mm, haddr, pmd, entry); |
740 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); | 741 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); |
741 | atomic_long_inc(&mm->nr_ptes); | 742 | atomic_long_inc(&mm->nr_ptes); |
742 | spin_unlock(&mm->page_table_lock); | 743 | spin_unlock(ptl); |
743 | } | 744 | } |
744 | 745 | ||
745 | return 0; | 746 | return 0; |
@@ -759,6 +760,7 @@ static inline struct page *alloc_hugepage_vma(int defrag, | |||
759 | HPAGE_PMD_ORDER, vma, haddr, nd); | 760 | HPAGE_PMD_ORDER, vma, haddr, nd); |
760 | } | 761 | } |
761 | 762 | ||
763 | /* Caller must hold page table lock. */ | ||
762 | static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, | 764 | static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, |
763 | struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, | 765 | struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, |
764 | struct page *zero_page) | 766 | struct page *zero_page) |
@@ -790,6 +792,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
790 | return VM_FAULT_OOM; | 792 | return VM_FAULT_OOM; |
791 | if (!(flags & FAULT_FLAG_WRITE) && | 793 | if (!(flags & FAULT_FLAG_WRITE) && |
792 | transparent_hugepage_use_zero_page()) { | 794 | transparent_hugepage_use_zero_page()) { |
795 | spinlock_t *ptl; | ||
793 | pgtable_t pgtable; | 796 | pgtable_t pgtable; |
794 | struct page *zero_page; | 797 | struct page *zero_page; |
795 | bool set; | 798 | bool set; |
@@ -802,10 +805,10 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
802 | count_vm_event(THP_FAULT_FALLBACK); | 805 | count_vm_event(THP_FAULT_FALLBACK); |
803 | return VM_FAULT_FALLBACK; | 806 | return VM_FAULT_FALLBACK; |
804 | } | 807 | } |
805 | spin_lock(&mm->page_table_lock); | 808 | ptl = pmd_lock(mm, pmd); |
806 | set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, | 809 | set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, |
807 | zero_page); | 810 | zero_page); |
808 | spin_unlock(&mm->page_table_lock); | 811 | spin_unlock(ptl); |
809 | if (!set) { | 812 | if (!set) { |
810 | pte_free(mm, pgtable); | 813 | pte_free(mm, pgtable); |
811 | put_huge_zero_page(); | 814 | put_huge_zero_page(); |
@@ -838,6 +841,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
838 | pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, | 841 | pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, |
839 | struct vm_area_struct *vma) | 842 | struct vm_area_struct *vma) |
840 | { | 843 | { |
844 | spinlock_t *dst_ptl, *src_ptl; | ||
841 | struct page *src_page; | 845 | struct page *src_page; |
842 | pmd_t pmd; | 846 | pmd_t pmd; |
843 | pgtable_t pgtable; | 847 | pgtable_t pgtable; |
@@ -848,8 +852,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
848 | if (unlikely(!pgtable)) | 852 | if (unlikely(!pgtable)) |
849 | goto out; | 853 | goto out; |
850 | 854 | ||
851 | spin_lock(&dst_mm->page_table_lock); | 855 | dst_ptl = pmd_lock(dst_mm, dst_pmd); |
852 | spin_lock_nested(&src_mm->page_table_lock, SINGLE_DEPTH_NESTING); | 856 | src_ptl = pmd_lockptr(src_mm, src_pmd); |
857 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | ||
853 | 858 | ||
854 | ret = -EAGAIN; | 859 | ret = -EAGAIN; |
855 | pmd = *src_pmd; | 860 | pmd = *src_pmd; |
@@ -858,7 +863,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
858 | goto out_unlock; | 863 | goto out_unlock; |
859 | } | 864 | } |
860 | /* | 865 | /* |
861 | * mm->page_table_lock is enough to be sure that huge zero pmd is not | 866 | * When page table lock is held, the huge zero pmd should not be |
862 | * under splitting since we don't split the page itself, only pmd to | 867 | * under splitting since we don't split the page itself, only pmd to |
863 | * a page table. | 868 | * a page table. |
864 | */ | 869 | */ |
@@ -879,8 +884,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
879 | } | 884 | } |
880 | if (unlikely(pmd_trans_splitting(pmd))) { | 885 | if (unlikely(pmd_trans_splitting(pmd))) { |
881 | /* split huge page running from under us */ | 886 | /* split huge page running from under us */ |
882 | spin_unlock(&src_mm->page_table_lock); | 887 | spin_unlock(src_ptl); |
883 | spin_unlock(&dst_mm->page_table_lock); | 888 | spin_unlock(dst_ptl); |
884 | pte_free(dst_mm, pgtable); | 889 | pte_free(dst_mm, pgtable); |
885 | 890 | ||
886 | wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */ | 891 | wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */ |
@@ -900,8 +905,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
900 | 905 | ||
901 | ret = 0; | 906 | ret = 0; |
902 | out_unlock: | 907 | out_unlock: |
903 | spin_unlock(&src_mm->page_table_lock); | 908 | spin_unlock(src_ptl); |
904 | spin_unlock(&dst_mm->page_table_lock); | 909 | spin_unlock(dst_ptl); |
905 | out: | 910 | out: |
906 | return ret; | 911 | return ret; |
907 | } | 912 | } |
@@ -912,10 +917,11 @@ void huge_pmd_set_accessed(struct mm_struct *mm, | |||
912 | pmd_t *pmd, pmd_t orig_pmd, | 917 | pmd_t *pmd, pmd_t orig_pmd, |
913 | int dirty) | 918 | int dirty) |
914 | { | 919 | { |
920 | spinlock_t *ptl; | ||
915 | pmd_t entry; | 921 | pmd_t entry; |
916 | unsigned long haddr; | 922 | unsigned long haddr; |
917 | 923 | ||
918 | spin_lock(&mm->page_table_lock); | 924 | ptl = pmd_lock(mm, pmd); |
919 | if (unlikely(!pmd_same(*pmd, orig_pmd))) | 925 | if (unlikely(!pmd_same(*pmd, orig_pmd))) |
920 | goto unlock; | 926 | goto unlock; |
921 | 927 | ||
@@ -925,13 +931,14 @@ void huge_pmd_set_accessed(struct mm_struct *mm, | |||
925 | update_mmu_cache_pmd(vma, address, pmd); | 931 | update_mmu_cache_pmd(vma, address, pmd); |
926 | 932 | ||
927 | unlock: | 933 | unlock: |
928 | spin_unlock(&mm->page_table_lock); | 934 | spin_unlock(ptl); |
929 | } | 935 | } |
930 | 936 | ||
931 | static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, | 937 | static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, |
932 | struct vm_area_struct *vma, unsigned long address, | 938 | struct vm_area_struct *vma, unsigned long address, |
933 | pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr) | 939 | pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr) |
934 | { | 940 | { |
941 | spinlock_t *ptl; | ||
935 | pgtable_t pgtable; | 942 | pgtable_t pgtable; |
936 | pmd_t _pmd; | 943 | pmd_t _pmd; |
937 | struct page *page; | 944 | struct page *page; |
@@ -958,7 +965,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, | |||
958 | mmun_end = haddr + HPAGE_PMD_SIZE; | 965 | mmun_end = haddr + HPAGE_PMD_SIZE; |
959 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 966 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
960 | 967 | ||
961 | spin_lock(&mm->page_table_lock); | 968 | ptl = pmd_lock(mm, pmd); |
962 | if (unlikely(!pmd_same(*pmd, orig_pmd))) | 969 | if (unlikely(!pmd_same(*pmd, orig_pmd))) |
963 | goto out_free_page; | 970 | goto out_free_page; |
964 | 971 | ||
@@ -985,7 +992,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, | |||
985 | } | 992 | } |
986 | smp_wmb(); /* make pte visible before pmd */ | 993 | smp_wmb(); /* make pte visible before pmd */ |
987 | pmd_populate(mm, pmd, pgtable); | 994 | pmd_populate(mm, pmd, pgtable); |
988 | spin_unlock(&mm->page_table_lock); | 995 | spin_unlock(ptl); |
989 | put_huge_zero_page(); | 996 | put_huge_zero_page(); |
990 | inc_mm_counter(mm, MM_ANONPAGES); | 997 | inc_mm_counter(mm, MM_ANONPAGES); |
991 | 998 | ||
@@ -995,7 +1002,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, | |||
995 | out: | 1002 | out: |
996 | return ret; | 1003 | return ret; |
997 | out_free_page: | 1004 | out_free_page: |
998 | spin_unlock(&mm->page_table_lock); | 1005 | spin_unlock(ptl); |
999 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 1006 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
1000 | mem_cgroup_uncharge_page(page); | 1007 | mem_cgroup_uncharge_page(page); |
1001 | put_page(page); | 1008 | put_page(page); |
@@ -1009,6 +1016,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
1009 | struct page *page, | 1016 | struct page *page, |
1010 | unsigned long haddr) | 1017 | unsigned long haddr) |
1011 | { | 1018 | { |
1019 | spinlock_t *ptl; | ||
1012 | pgtable_t pgtable; | 1020 | pgtable_t pgtable; |
1013 | pmd_t _pmd; | 1021 | pmd_t _pmd; |
1014 | int ret = 0, i; | 1022 | int ret = 0, i; |
@@ -1055,7 +1063,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
1055 | mmun_end = haddr + HPAGE_PMD_SIZE; | 1063 | mmun_end = haddr + HPAGE_PMD_SIZE; |
1056 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 1064 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
1057 | 1065 | ||
1058 | spin_lock(&mm->page_table_lock); | 1066 | ptl = pmd_lock(mm, pmd); |
1059 | if (unlikely(!pmd_same(*pmd, orig_pmd))) | 1067 | if (unlikely(!pmd_same(*pmd, orig_pmd))) |
1060 | goto out_free_pages; | 1068 | goto out_free_pages; |
1061 | VM_BUG_ON(!PageHead(page)); | 1069 | VM_BUG_ON(!PageHead(page)); |
@@ -1081,7 +1089,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
1081 | smp_wmb(); /* make pte visible before pmd */ | 1089 | smp_wmb(); /* make pte visible before pmd */ |
1082 | pmd_populate(mm, pmd, pgtable); | 1090 | pmd_populate(mm, pmd, pgtable); |
1083 | page_remove_rmap(page); | 1091 | page_remove_rmap(page); |
1084 | spin_unlock(&mm->page_table_lock); | 1092 | spin_unlock(ptl); |
1085 | 1093 | ||
1086 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 1094 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
1087 | 1095 | ||
@@ -1092,7 +1100,7 @@ out: | |||
1092 | return ret; | 1100 | return ret; |
1093 | 1101 | ||
1094 | out_free_pages: | 1102 | out_free_pages: |
1095 | spin_unlock(&mm->page_table_lock); | 1103 | spin_unlock(ptl); |
1096 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 1104 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
1097 | mem_cgroup_uncharge_start(); | 1105 | mem_cgroup_uncharge_start(); |
1098 | for (i = 0; i < HPAGE_PMD_NR; i++) { | 1106 | for (i = 0; i < HPAGE_PMD_NR; i++) { |
@@ -1107,17 +1115,19 @@ out_free_pages: | |||
1107 | int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | 1115 | int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, |
1108 | unsigned long address, pmd_t *pmd, pmd_t orig_pmd) | 1116 | unsigned long address, pmd_t *pmd, pmd_t orig_pmd) |
1109 | { | 1117 | { |
1118 | spinlock_t *ptl; | ||
1110 | int ret = 0; | 1119 | int ret = 0; |
1111 | struct page *page = NULL, *new_page; | 1120 | struct page *page = NULL, *new_page; |
1112 | unsigned long haddr; | 1121 | unsigned long haddr; |
1113 | unsigned long mmun_start; /* For mmu_notifiers */ | 1122 | unsigned long mmun_start; /* For mmu_notifiers */ |
1114 | unsigned long mmun_end; /* For mmu_notifiers */ | 1123 | unsigned long mmun_end; /* For mmu_notifiers */ |
1115 | 1124 | ||
1125 | ptl = pmd_lockptr(mm, pmd); | ||
1116 | VM_BUG_ON(!vma->anon_vma); | 1126 | VM_BUG_ON(!vma->anon_vma); |
1117 | haddr = address & HPAGE_PMD_MASK; | 1127 | haddr = address & HPAGE_PMD_MASK; |
1118 | if (is_huge_zero_pmd(orig_pmd)) | 1128 | if (is_huge_zero_pmd(orig_pmd)) |
1119 | goto alloc; | 1129 | goto alloc; |
1120 | spin_lock(&mm->page_table_lock); | 1130 | spin_lock(ptl); |
1121 | if (unlikely(!pmd_same(*pmd, orig_pmd))) | 1131 | if (unlikely(!pmd_same(*pmd, orig_pmd))) |
1122 | goto out_unlock; | 1132 | goto out_unlock; |
1123 | 1133 | ||
@@ -1133,7 +1143,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1133 | goto out_unlock; | 1143 | goto out_unlock; |
1134 | } | 1144 | } |
1135 | get_page(page); | 1145 | get_page(page); |
1136 | spin_unlock(&mm->page_table_lock); | 1146 | spin_unlock(ptl); |
1137 | alloc: | 1147 | alloc: |
1138 | if (transparent_hugepage_enabled(vma) && | 1148 | if (transparent_hugepage_enabled(vma) && |
1139 | !transparent_hugepage_debug_cow()) | 1149 | !transparent_hugepage_debug_cow()) |
@@ -1180,11 +1190,11 @@ alloc: | |||
1180 | mmun_end = haddr + HPAGE_PMD_SIZE; | 1190 | mmun_end = haddr + HPAGE_PMD_SIZE; |
1181 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 1191 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
1182 | 1192 | ||
1183 | spin_lock(&mm->page_table_lock); | 1193 | spin_lock(ptl); |
1184 | if (page) | 1194 | if (page) |
1185 | put_page(page); | 1195 | put_page(page); |
1186 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { | 1196 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { |
1187 | spin_unlock(&mm->page_table_lock); | 1197 | spin_unlock(ptl); |
1188 | mem_cgroup_uncharge_page(new_page); | 1198 | mem_cgroup_uncharge_page(new_page); |
1189 | put_page(new_page); | 1199 | put_page(new_page); |
1190 | goto out_mn; | 1200 | goto out_mn; |
@@ -1206,13 +1216,13 @@ alloc: | |||
1206 | } | 1216 | } |
1207 | ret |= VM_FAULT_WRITE; | 1217 | ret |= VM_FAULT_WRITE; |
1208 | } | 1218 | } |
1209 | spin_unlock(&mm->page_table_lock); | 1219 | spin_unlock(ptl); |
1210 | out_mn: | 1220 | out_mn: |
1211 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 1221 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
1212 | out: | 1222 | out: |
1213 | return ret; | 1223 | return ret; |
1214 | out_unlock: | 1224 | out_unlock: |
1215 | spin_unlock(&mm->page_table_lock); | 1225 | spin_unlock(ptl); |
1216 | return ret; | 1226 | return ret; |
1217 | } | 1227 | } |
1218 | 1228 | ||
@@ -1224,7 +1234,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
1224 | struct mm_struct *mm = vma->vm_mm; | 1234 | struct mm_struct *mm = vma->vm_mm; |
1225 | struct page *page = NULL; | 1235 | struct page *page = NULL; |
1226 | 1236 | ||
1227 | assert_spin_locked(&mm->page_table_lock); | 1237 | assert_spin_locked(pmd_lockptr(mm, pmd)); |
1228 | 1238 | ||
1229 | if (flags & FOLL_WRITE && !pmd_write(*pmd)) | 1239 | if (flags & FOLL_WRITE && !pmd_write(*pmd)) |
1230 | goto out; | 1240 | goto out; |
@@ -1271,6 +1281,7 @@ out: | |||
1271 | int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | 1281 | int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, |
1272 | unsigned long addr, pmd_t pmd, pmd_t *pmdp) | 1282 | unsigned long addr, pmd_t pmd, pmd_t *pmdp) |
1273 | { | 1283 | { |
1284 | spinlock_t *ptl; | ||
1274 | struct anon_vma *anon_vma = NULL; | 1285 | struct anon_vma *anon_vma = NULL; |
1275 | struct page *page; | 1286 | struct page *page; |
1276 | unsigned long haddr = addr & HPAGE_PMD_MASK; | 1287 | unsigned long haddr = addr & HPAGE_PMD_MASK; |
@@ -1280,7 +1291,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1280 | bool migrated = false; | 1291 | bool migrated = false; |
1281 | int flags = 0; | 1292 | int flags = 0; |
1282 | 1293 | ||
1283 | spin_lock(&mm->page_table_lock); | 1294 | ptl = pmd_lock(mm, pmdp); |
1284 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1295 | if (unlikely(!pmd_same(pmd, *pmdp))) |
1285 | goto out_unlock; | 1296 | goto out_unlock; |
1286 | 1297 | ||
@@ -1318,7 +1329,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1318 | * relock and check_same as the page may no longer be mapped. | 1329 | * relock and check_same as the page may no longer be mapped. |
1319 | * As the fault is being retried, do not account for it. | 1330 | * As the fault is being retried, do not account for it. |
1320 | */ | 1331 | */ |
1321 | spin_unlock(&mm->page_table_lock); | 1332 | spin_unlock(ptl); |
1322 | wait_on_page_locked(page); | 1333 | wait_on_page_locked(page); |
1323 | page_nid = -1; | 1334 | page_nid = -1; |
1324 | goto out; | 1335 | goto out; |
@@ -1326,13 +1337,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1326 | 1337 | ||
1327 | /* Page is misplaced, serialise migrations and parallel THP splits */ | 1338 | /* Page is misplaced, serialise migrations and parallel THP splits */ |
1328 | get_page(page); | 1339 | get_page(page); |
1329 | spin_unlock(&mm->page_table_lock); | 1340 | spin_unlock(ptl); |
1330 | if (!page_locked) | 1341 | if (!page_locked) |
1331 | lock_page(page); | 1342 | lock_page(page); |
1332 | anon_vma = page_lock_anon_vma_read(page); | 1343 | anon_vma = page_lock_anon_vma_read(page); |
1333 | 1344 | ||
1334 | /* Confirm the PMD did not change while page_table_lock was released */ | 1345 | /* Confirm the PMD did not change while page_table_lock was released */ |
1335 | spin_lock(&mm->page_table_lock); | 1346 | spin_lock(ptl); |
1336 | if (unlikely(!pmd_same(pmd, *pmdp))) { | 1347 | if (unlikely(!pmd_same(pmd, *pmdp))) { |
1337 | unlock_page(page); | 1348 | unlock_page(page); |
1338 | put_page(page); | 1349 | put_page(page); |
@@ -1344,7 +1355,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1344 | * Migrate the THP to the requested node, returns with page unlocked | 1355 | * Migrate the THP to the requested node, returns with page unlocked |
1345 | * and pmd_numa cleared. | 1356 | * and pmd_numa cleared. |
1346 | */ | 1357 | */ |
1347 | spin_unlock(&mm->page_table_lock); | 1358 | spin_unlock(ptl); |
1348 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1359 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
1349 | pmdp, pmd, addr, page, target_nid); | 1360 | pmdp, pmd, addr, page, target_nid); |
1350 | if (migrated) { | 1361 | if (migrated) { |
@@ -1361,7 +1372,7 @@ clear_pmdnuma: | |||
1361 | update_mmu_cache_pmd(vma, addr, pmdp); | 1372 | update_mmu_cache_pmd(vma, addr, pmdp); |
1362 | unlock_page(page); | 1373 | unlock_page(page); |
1363 | out_unlock: | 1374 | out_unlock: |
1364 | spin_unlock(&mm->page_table_lock); | 1375 | spin_unlock(ptl); |
1365 | 1376 | ||
1366 | out: | 1377 | out: |
1367 | if (anon_vma) | 1378 | if (anon_vma) |
@@ -2371,7 +2382,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2371 | pte_t *pte; | 2382 | pte_t *pte; |
2372 | pgtable_t pgtable; | 2383 | pgtable_t pgtable; |
2373 | struct page *new_page; | 2384 | struct page *new_page; |
2374 | spinlock_t *ptl; | 2385 | spinlock_t *pmd_ptl, *pte_ptl; |
2375 | int isolated; | 2386 | int isolated; |
2376 | unsigned long hstart, hend; | 2387 | unsigned long hstart, hend; |
2377 | unsigned long mmun_start; /* For mmu_notifiers */ | 2388 | unsigned long mmun_start; /* For mmu_notifiers */ |
@@ -2414,12 +2425,12 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2414 | anon_vma_lock_write(vma->anon_vma); | 2425 | anon_vma_lock_write(vma->anon_vma); |
2415 | 2426 | ||
2416 | pte = pte_offset_map(pmd, address); | 2427 | pte = pte_offset_map(pmd, address); |
2417 | ptl = pte_lockptr(mm, pmd); | 2428 | pte_ptl = pte_lockptr(mm, pmd); |
2418 | 2429 | ||
2419 | mmun_start = address; | 2430 | mmun_start = address; |
2420 | mmun_end = address + HPAGE_PMD_SIZE; | 2431 | mmun_end = address + HPAGE_PMD_SIZE; |
2421 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 2432 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
2422 | spin_lock(&mm->page_table_lock); /* probably unnecessary */ | 2433 | pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */ |
2423 | /* | 2434 | /* |
2424 | * After this gup_fast can't run anymore. This also removes | 2435 | * After this gup_fast can't run anymore. This also removes |
2425 | * any huge TLB entry from the CPU so we won't allow | 2436 | * any huge TLB entry from the CPU so we won't allow |
@@ -2427,16 +2438,16 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2427 | * to avoid the risk of CPU bugs in that area. | 2438 | * to avoid the risk of CPU bugs in that area. |
2428 | */ | 2439 | */ |
2429 | _pmd = pmdp_clear_flush(vma, address, pmd); | 2440 | _pmd = pmdp_clear_flush(vma, address, pmd); |
2430 | spin_unlock(&mm->page_table_lock); | 2441 | spin_unlock(pmd_ptl); |
2431 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2442 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2432 | 2443 | ||
2433 | spin_lock(ptl); | 2444 | spin_lock(pte_ptl); |
2434 | isolated = __collapse_huge_page_isolate(vma, address, pte); | 2445 | isolated = __collapse_huge_page_isolate(vma, address, pte); |
2435 | spin_unlock(ptl); | 2446 | spin_unlock(pte_ptl); |
2436 | 2447 | ||
2437 | if (unlikely(!isolated)) { | 2448 | if (unlikely(!isolated)) { |
2438 | pte_unmap(pte); | 2449 | pte_unmap(pte); |
2439 | spin_lock(&mm->page_table_lock); | 2450 | spin_lock(pmd_ptl); |
2440 | BUG_ON(!pmd_none(*pmd)); | 2451 | BUG_ON(!pmd_none(*pmd)); |
2441 | /* | 2452 | /* |
2442 | * We can only use set_pmd_at when establishing | 2453 | * We can only use set_pmd_at when establishing |
@@ -2444,7 +2455,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2444 | * points to regular pagetables. Use pmd_populate for that | 2455 | * points to regular pagetables. Use pmd_populate for that |
2445 | */ | 2456 | */ |
2446 | pmd_populate(mm, pmd, pmd_pgtable(_pmd)); | 2457 | pmd_populate(mm, pmd, pmd_pgtable(_pmd)); |
2447 | spin_unlock(&mm->page_table_lock); | 2458 | spin_unlock(pmd_ptl); |
2448 | anon_vma_unlock_write(vma->anon_vma); | 2459 | anon_vma_unlock_write(vma->anon_vma); |
2449 | goto out; | 2460 | goto out; |
2450 | } | 2461 | } |
@@ -2455,7 +2466,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2455 | */ | 2466 | */ |
2456 | anon_vma_unlock_write(vma->anon_vma); | 2467 | anon_vma_unlock_write(vma->anon_vma); |
2457 | 2468 | ||
2458 | __collapse_huge_page_copy(pte, new_page, vma, address, ptl); | 2469 | __collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl); |
2459 | pte_unmap(pte); | 2470 | pte_unmap(pte); |
2460 | __SetPageUptodate(new_page); | 2471 | __SetPageUptodate(new_page); |
2461 | pgtable = pmd_pgtable(_pmd); | 2472 | pgtable = pmd_pgtable(_pmd); |
@@ -2470,13 +2481,13 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2470 | */ | 2481 | */ |
2471 | smp_wmb(); | 2482 | smp_wmb(); |
2472 | 2483 | ||
2473 | spin_lock(&mm->page_table_lock); | 2484 | spin_lock(pmd_ptl); |
2474 | BUG_ON(!pmd_none(*pmd)); | 2485 | BUG_ON(!pmd_none(*pmd)); |
2475 | page_add_new_anon_rmap(new_page, vma, address); | 2486 | page_add_new_anon_rmap(new_page, vma, address); |
2476 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | 2487 | pgtable_trans_huge_deposit(mm, pmd, pgtable); |
2477 | set_pmd_at(mm, address, pmd, _pmd); | 2488 | set_pmd_at(mm, address, pmd, _pmd); |
2478 | update_mmu_cache_pmd(vma, address, pmd); | 2489 | update_mmu_cache_pmd(vma, address, pmd); |
2479 | spin_unlock(&mm->page_table_lock); | 2490 | spin_unlock(pmd_ptl); |
2480 | 2491 | ||
2481 | *hpage = NULL; | 2492 | *hpage = NULL; |
2482 | 2493 | ||
@@ -2805,6 +2816,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, | |||
2805 | void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, | 2816 | void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, |
2806 | pmd_t *pmd) | 2817 | pmd_t *pmd) |
2807 | { | 2818 | { |
2819 | spinlock_t *ptl; | ||
2808 | struct page *page; | 2820 | struct page *page; |
2809 | struct mm_struct *mm = vma->vm_mm; | 2821 | struct mm_struct *mm = vma->vm_mm; |
2810 | unsigned long haddr = address & HPAGE_PMD_MASK; | 2822 | unsigned long haddr = address & HPAGE_PMD_MASK; |
@@ -2817,22 +2829,22 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, | |||
2817 | mmun_end = haddr + HPAGE_PMD_SIZE; | 2829 | mmun_end = haddr + HPAGE_PMD_SIZE; |
2818 | again: | 2830 | again: |
2819 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 2831 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
2820 | spin_lock(&mm->page_table_lock); | 2832 | ptl = pmd_lock(mm, pmd); |
2821 | if (unlikely(!pmd_trans_huge(*pmd))) { | 2833 | if (unlikely(!pmd_trans_huge(*pmd))) { |
2822 | spin_unlock(&mm->page_table_lock); | 2834 | spin_unlock(ptl); |
2823 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2835 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2824 | return; | 2836 | return; |
2825 | } | 2837 | } |
2826 | if (is_huge_zero_pmd(*pmd)) { | 2838 | if (is_huge_zero_pmd(*pmd)) { |
2827 | __split_huge_zero_page_pmd(vma, haddr, pmd); | 2839 | __split_huge_zero_page_pmd(vma, haddr, pmd); |
2828 | spin_unlock(&mm->page_table_lock); | 2840 | spin_unlock(ptl); |
2829 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2841 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2830 | return; | 2842 | return; |
2831 | } | 2843 | } |
2832 | page = pmd_page(*pmd); | 2844 | page = pmd_page(*pmd); |
2833 | VM_BUG_ON(!page_count(page)); | 2845 | VM_BUG_ON(!page_count(page)); |
2834 | get_page(page); | 2846 | get_page(page); |
2835 | spin_unlock(&mm->page_table_lock); | 2847 | spin_unlock(ptl); |
2836 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 2848 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
2837 | 2849 | ||
2838 | split_huge_page(page); | 2850 | split_huge_page(page); |