aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2013-11-14 17:31:04 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-14 19:32:15 -0500
commitc4088ebdca64c9a2e34a38177d2249805ede1f4b (patch)
tree9e761c697ad6fab039adb5280bf26fb41f8e17aa /mm/huge_memory.c
parentcb900f41215447433cbc456d1c4294e858a84d7c (diff)
mm: convert the rest to new page table lock api
Only trivial cases left. Let's convert them altogether. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Tested-by: Alex Thorlton <athorlton@sgi.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Dave Jones <davej@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kees Cook <keescook@chromium.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@redhat.com> Cc: Robin Holt <robinmholt@gmail.com> Cc: Sedat Dilek <sedat.dilek@gmail.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c108
1 files changed, 60 insertions, 48 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c2082ab4fc93..bccd5a628ea6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -710,6 +710,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
710 struct page *page) 710 struct page *page)
711{ 711{
712 pgtable_t pgtable; 712 pgtable_t pgtable;
713 spinlock_t *ptl;
713 714
714 VM_BUG_ON(!PageCompound(page)); 715 VM_BUG_ON(!PageCompound(page));
715 pgtable = pte_alloc_one(mm, haddr); 716 pgtable = pte_alloc_one(mm, haddr);
@@ -724,9 +725,9 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
724 */ 725 */
725 __SetPageUptodate(page); 726 __SetPageUptodate(page);
726 727
727 spin_lock(&mm->page_table_lock); 728 ptl = pmd_lock(mm, pmd);
728 if (unlikely(!pmd_none(*pmd))) { 729 if (unlikely(!pmd_none(*pmd))) {
729 spin_unlock(&mm->page_table_lock); 730 spin_unlock(ptl);
730 mem_cgroup_uncharge_page(page); 731 mem_cgroup_uncharge_page(page);
731 put_page(page); 732 put_page(page);
732 pte_free(mm, pgtable); 733 pte_free(mm, pgtable);
@@ -739,7 +740,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
739 set_pmd_at(mm, haddr, pmd, entry); 740 set_pmd_at(mm, haddr, pmd, entry);
740 add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); 741 add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
741 atomic_long_inc(&mm->nr_ptes); 742 atomic_long_inc(&mm->nr_ptes);
742 spin_unlock(&mm->page_table_lock); 743 spin_unlock(ptl);
743 } 744 }
744 745
745 return 0; 746 return 0;
@@ -759,6 +760,7 @@ static inline struct page *alloc_hugepage_vma(int defrag,
759 HPAGE_PMD_ORDER, vma, haddr, nd); 760 HPAGE_PMD_ORDER, vma, haddr, nd);
760} 761}
761 762
763/* Caller must hold page table lock. */
762static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, 764static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
763 struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, 765 struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
764 struct page *zero_page) 766 struct page *zero_page)
@@ -790,6 +792,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
790 return VM_FAULT_OOM; 792 return VM_FAULT_OOM;
791 if (!(flags & FAULT_FLAG_WRITE) && 793 if (!(flags & FAULT_FLAG_WRITE) &&
792 transparent_hugepage_use_zero_page()) { 794 transparent_hugepage_use_zero_page()) {
795 spinlock_t *ptl;
793 pgtable_t pgtable; 796 pgtable_t pgtable;
794 struct page *zero_page; 797 struct page *zero_page;
795 bool set; 798 bool set;
@@ -802,10 +805,10 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
802 count_vm_event(THP_FAULT_FALLBACK); 805 count_vm_event(THP_FAULT_FALLBACK);
803 return VM_FAULT_FALLBACK; 806 return VM_FAULT_FALLBACK;
804 } 807 }
805 spin_lock(&mm->page_table_lock); 808 ptl = pmd_lock(mm, pmd);
806 set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, 809 set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
807 zero_page); 810 zero_page);
808 spin_unlock(&mm->page_table_lock); 811 spin_unlock(ptl);
809 if (!set) { 812 if (!set) {
810 pte_free(mm, pgtable); 813 pte_free(mm, pgtable);
811 put_huge_zero_page(); 814 put_huge_zero_page();
@@ -838,6 +841,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
838 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, 841 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
839 struct vm_area_struct *vma) 842 struct vm_area_struct *vma)
840{ 843{
844 spinlock_t *dst_ptl, *src_ptl;
841 struct page *src_page; 845 struct page *src_page;
842 pmd_t pmd; 846 pmd_t pmd;
843 pgtable_t pgtable; 847 pgtable_t pgtable;
@@ -848,8 +852,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
848 if (unlikely(!pgtable)) 852 if (unlikely(!pgtable))
849 goto out; 853 goto out;
850 854
851 spin_lock(&dst_mm->page_table_lock); 855 dst_ptl = pmd_lock(dst_mm, dst_pmd);
852 spin_lock_nested(&src_mm->page_table_lock, SINGLE_DEPTH_NESTING); 856 src_ptl = pmd_lockptr(src_mm, src_pmd);
857 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
853 858
854 ret = -EAGAIN; 859 ret = -EAGAIN;
855 pmd = *src_pmd; 860 pmd = *src_pmd;
@@ -858,7 +863,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
858 goto out_unlock; 863 goto out_unlock;
859 } 864 }
860 /* 865 /*
861 * mm->page_table_lock is enough to be sure that huge zero pmd is not 866 * When page table lock is held, the huge zero pmd should not be
862 * under splitting since we don't split the page itself, only pmd to 867 * under splitting since we don't split the page itself, only pmd to
863 * a page table. 868 * a page table.
864 */ 869 */
@@ -879,8 +884,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
879 } 884 }
880 if (unlikely(pmd_trans_splitting(pmd))) { 885 if (unlikely(pmd_trans_splitting(pmd))) {
881 /* split huge page running from under us */ 886 /* split huge page running from under us */
882 spin_unlock(&src_mm->page_table_lock); 887 spin_unlock(src_ptl);
883 spin_unlock(&dst_mm->page_table_lock); 888 spin_unlock(dst_ptl);
884 pte_free(dst_mm, pgtable); 889 pte_free(dst_mm, pgtable);
885 890
886 wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */ 891 wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */
@@ -900,8 +905,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
900 905
901 ret = 0; 906 ret = 0;
902out_unlock: 907out_unlock:
903 spin_unlock(&src_mm->page_table_lock); 908 spin_unlock(src_ptl);
904 spin_unlock(&dst_mm->page_table_lock); 909 spin_unlock(dst_ptl);
905out: 910out:
906 return ret; 911 return ret;
907} 912}
@@ -912,10 +917,11 @@ void huge_pmd_set_accessed(struct mm_struct *mm,
912 pmd_t *pmd, pmd_t orig_pmd, 917 pmd_t *pmd, pmd_t orig_pmd,
913 int dirty) 918 int dirty)
914{ 919{
920 spinlock_t *ptl;
915 pmd_t entry; 921 pmd_t entry;
916 unsigned long haddr; 922 unsigned long haddr;
917 923
918 spin_lock(&mm->page_table_lock); 924 ptl = pmd_lock(mm, pmd);
919 if (unlikely(!pmd_same(*pmd, orig_pmd))) 925 if (unlikely(!pmd_same(*pmd, orig_pmd)))
920 goto unlock; 926 goto unlock;
921 927
@@ -925,13 +931,14 @@ void huge_pmd_set_accessed(struct mm_struct *mm,
925 update_mmu_cache_pmd(vma, address, pmd); 931 update_mmu_cache_pmd(vma, address, pmd);
926 932
927unlock: 933unlock:
928 spin_unlock(&mm->page_table_lock); 934 spin_unlock(ptl);
929} 935}
930 936
931static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, 937static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
932 struct vm_area_struct *vma, unsigned long address, 938 struct vm_area_struct *vma, unsigned long address,
933 pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr) 939 pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr)
934{ 940{
941 spinlock_t *ptl;
935 pgtable_t pgtable; 942 pgtable_t pgtable;
936 pmd_t _pmd; 943 pmd_t _pmd;
937 struct page *page; 944 struct page *page;
@@ -958,7 +965,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
958 mmun_end = haddr + HPAGE_PMD_SIZE; 965 mmun_end = haddr + HPAGE_PMD_SIZE;
959 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 966 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
960 967
961 spin_lock(&mm->page_table_lock); 968 ptl = pmd_lock(mm, pmd);
962 if (unlikely(!pmd_same(*pmd, orig_pmd))) 969 if (unlikely(!pmd_same(*pmd, orig_pmd)))
963 goto out_free_page; 970 goto out_free_page;
964 971
@@ -985,7 +992,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
985 } 992 }
986 smp_wmb(); /* make pte visible before pmd */ 993 smp_wmb(); /* make pte visible before pmd */
987 pmd_populate(mm, pmd, pgtable); 994 pmd_populate(mm, pmd, pgtable);
988 spin_unlock(&mm->page_table_lock); 995 spin_unlock(ptl);
989 put_huge_zero_page(); 996 put_huge_zero_page();
990 inc_mm_counter(mm, MM_ANONPAGES); 997 inc_mm_counter(mm, MM_ANONPAGES);
991 998
@@ -995,7 +1002,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
995out: 1002out:
996 return ret; 1003 return ret;
997out_free_page: 1004out_free_page:
998 spin_unlock(&mm->page_table_lock); 1005 spin_unlock(ptl);
999 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 1006 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1000 mem_cgroup_uncharge_page(page); 1007 mem_cgroup_uncharge_page(page);
1001 put_page(page); 1008 put_page(page);
@@ -1009,6 +1016,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
1009 struct page *page, 1016 struct page *page,
1010 unsigned long haddr) 1017 unsigned long haddr)
1011{ 1018{
1019 spinlock_t *ptl;
1012 pgtable_t pgtable; 1020 pgtable_t pgtable;
1013 pmd_t _pmd; 1021 pmd_t _pmd;
1014 int ret = 0, i; 1022 int ret = 0, i;
@@ -1055,7 +1063,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
1055 mmun_end = haddr + HPAGE_PMD_SIZE; 1063 mmun_end = haddr + HPAGE_PMD_SIZE;
1056 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 1064 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
1057 1065
1058 spin_lock(&mm->page_table_lock); 1066 ptl = pmd_lock(mm, pmd);
1059 if (unlikely(!pmd_same(*pmd, orig_pmd))) 1067 if (unlikely(!pmd_same(*pmd, orig_pmd)))
1060 goto out_free_pages; 1068 goto out_free_pages;
1061 VM_BUG_ON(!PageHead(page)); 1069 VM_BUG_ON(!PageHead(page));
@@ -1081,7 +1089,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
1081 smp_wmb(); /* make pte visible before pmd */ 1089 smp_wmb(); /* make pte visible before pmd */
1082 pmd_populate(mm, pmd, pgtable); 1090 pmd_populate(mm, pmd, pgtable);
1083 page_remove_rmap(page); 1091 page_remove_rmap(page);
1084 spin_unlock(&mm->page_table_lock); 1092 spin_unlock(ptl);
1085 1093
1086 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 1094 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1087 1095
@@ -1092,7 +1100,7 @@ out:
1092 return ret; 1100 return ret;
1093 1101
1094out_free_pages: 1102out_free_pages:
1095 spin_unlock(&mm->page_table_lock); 1103 spin_unlock(ptl);
1096 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 1104 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1097 mem_cgroup_uncharge_start(); 1105 mem_cgroup_uncharge_start();
1098 for (i = 0; i < HPAGE_PMD_NR; i++) { 1106 for (i = 0; i < HPAGE_PMD_NR; i++) {
@@ -1107,17 +1115,19 @@ out_free_pages:
1107int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, 1115int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1108 unsigned long address, pmd_t *pmd, pmd_t orig_pmd) 1116 unsigned long address, pmd_t *pmd, pmd_t orig_pmd)
1109{ 1117{
1118 spinlock_t *ptl;
1110 int ret = 0; 1119 int ret = 0;
1111 struct page *page = NULL, *new_page; 1120 struct page *page = NULL, *new_page;
1112 unsigned long haddr; 1121 unsigned long haddr;
1113 unsigned long mmun_start; /* For mmu_notifiers */ 1122 unsigned long mmun_start; /* For mmu_notifiers */
1114 unsigned long mmun_end; /* For mmu_notifiers */ 1123 unsigned long mmun_end; /* For mmu_notifiers */
1115 1124
1125 ptl = pmd_lockptr(mm, pmd);
1116 VM_BUG_ON(!vma->anon_vma); 1126 VM_BUG_ON(!vma->anon_vma);
1117 haddr = address & HPAGE_PMD_MASK; 1127 haddr = address & HPAGE_PMD_MASK;
1118 if (is_huge_zero_pmd(orig_pmd)) 1128 if (is_huge_zero_pmd(orig_pmd))
1119 goto alloc; 1129 goto alloc;
1120 spin_lock(&mm->page_table_lock); 1130 spin_lock(ptl);
1121 if (unlikely(!pmd_same(*pmd, orig_pmd))) 1131 if (unlikely(!pmd_same(*pmd, orig_pmd)))
1122 goto out_unlock; 1132 goto out_unlock;
1123 1133
@@ -1133,7 +1143,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1133 goto out_unlock; 1143 goto out_unlock;
1134 } 1144 }
1135 get_page(page); 1145 get_page(page);
1136 spin_unlock(&mm->page_table_lock); 1146 spin_unlock(ptl);
1137alloc: 1147alloc:
1138 if (transparent_hugepage_enabled(vma) && 1148 if (transparent_hugepage_enabled(vma) &&
1139 !transparent_hugepage_debug_cow()) 1149 !transparent_hugepage_debug_cow())
@@ -1180,11 +1190,11 @@ alloc:
1180 mmun_end = haddr + HPAGE_PMD_SIZE; 1190 mmun_end = haddr + HPAGE_PMD_SIZE;
1181 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 1191 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
1182 1192
1183 spin_lock(&mm->page_table_lock); 1193 spin_lock(ptl);
1184 if (page) 1194 if (page)
1185 put_page(page); 1195 put_page(page);
1186 if (unlikely(!pmd_same(*pmd, orig_pmd))) { 1196 if (unlikely(!pmd_same(*pmd, orig_pmd))) {
1187 spin_unlock(&mm->page_table_lock); 1197 spin_unlock(ptl);
1188 mem_cgroup_uncharge_page(new_page); 1198 mem_cgroup_uncharge_page(new_page);
1189 put_page(new_page); 1199 put_page(new_page);
1190 goto out_mn; 1200 goto out_mn;
@@ -1206,13 +1216,13 @@ alloc:
1206 } 1216 }
1207 ret |= VM_FAULT_WRITE; 1217 ret |= VM_FAULT_WRITE;
1208 } 1218 }
1209 spin_unlock(&mm->page_table_lock); 1219 spin_unlock(ptl);
1210out_mn: 1220out_mn:
1211 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 1221 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1212out: 1222out:
1213 return ret; 1223 return ret;
1214out_unlock: 1224out_unlock:
1215 spin_unlock(&mm->page_table_lock); 1225 spin_unlock(ptl);
1216 return ret; 1226 return ret;
1217} 1227}
1218 1228
@@ -1224,7 +1234,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
1224 struct mm_struct *mm = vma->vm_mm; 1234 struct mm_struct *mm = vma->vm_mm;
1225 struct page *page = NULL; 1235 struct page *page = NULL;
1226 1236
1227 assert_spin_locked(&mm->page_table_lock); 1237 assert_spin_locked(pmd_lockptr(mm, pmd));
1228 1238
1229 if (flags & FOLL_WRITE && !pmd_write(*pmd)) 1239 if (flags & FOLL_WRITE && !pmd_write(*pmd))
1230 goto out; 1240 goto out;
@@ -1271,6 +1281,7 @@ out:
1271int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, 1281int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1272 unsigned long addr, pmd_t pmd, pmd_t *pmdp) 1282 unsigned long addr, pmd_t pmd, pmd_t *pmdp)
1273{ 1283{
1284 spinlock_t *ptl;
1274 struct anon_vma *anon_vma = NULL; 1285 struct anon_vma *anon_vma = NULL;
1275 struct page *page; 1286 struct page *page;
1276 unsigned long haddr = addr & HPAGE_PMD_MASK; 1287 unsigned long haddr = addr & HPAGE_PMD_MASK;
@@ -1280,7 +1291,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1280 bool migrated = false; 1291 bool migrated = false;
1281 int flags = 0; 1292 int flags = 0;
1282 1293
1283 spin_lock(&mm->page_table_lock); 1294 ptl = pmd_lock(mm, pmdp);
1284 if (unlikely(!pmd_same(pmd, *pmdp))) 1295 if (unlikely(!pmd_same(pmd, *pmdp)))
1285 goto out_unlock; 1296 goto out_unlock;
1286 1297
@@ -1318,7 +1329,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1318 * relock and check_same as the page may no longer be mapped. 1329 * relock and check_same as the page may no longer be mapped.
1319 * As the fault is being retried, do not account for it. 1330 * As the fault is being retried, do not account for it.
1320 */ 1331 */
1321 spin_unlock(&mm->page_table_lock); 1332 spin_unlock(ptl);
1322 wait_on_page_locked(page); 1333 wait_on_page_locked(page);
1323 page_nid = -1; 1334 page_nid = -1;
1324 goto out; 1335 goto out;
@@ -1326,13 +1337,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1326 1337
1327 /* Page is misplaced, serialise migrations and parallel THP splits */ 1338 /* Page is misplaced, serialise migrations and parallel THP splits */
1328 get_page(page); 1339 get_page(page);
1329 spin_unlock(&mm->page_table_lock); 1340 spin_unlock(ptl);
1330 if (!page_locked) 1341 if (!page_locked)
1331 lock_page(page); 1342 lock_page(page);
1332 anon_vma = page_lock_anon_vma_read(page); 1343 anon_vma = page_lock_anon_vma_read(page);
1333 1344
1334 /* Confirm the PMD did not change while page_table_lock was released */ 1345 /* Confirm the PMD did not change while page_table_lock was released */
1335 spin_lock(&mm->page_table_lock); 1346 spin_lock(ptl);
1336 if (unlikely(!pmd_same(pmd, *pmdp))) { 1347 if (unlikely(!pmd_same(pmd, *pmdp))) {
1337 unlock_page(page); 1348 unlock_page(page);
1338 put_page(page); 1349 put_page(page);
@@ -1344,7 +1355,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1344 * Migrate the THP to the requested node, returns with page unlocked 1355 * Migrate the THP to the requested node, returns with page unlocked
1345 * and pmd_numa cleared. 1356 * and pmd_numa cleared.
1346 */ 1357 */
1347 spin_unlock(&mm->page_table_lock); 1358 spin_unlock(ptl);
1348 migrated = migrate_misplaced_transhuge_page(mm, vma, 1359 migrated = migrate_misplaced_transhuge_page(mm, vma,
1349 pmdp, pmd, addr, page, target_nid); 1360 pmdp, pmd, addr, page, target_nid);
1350 if (migrated) { 1361 if (migrated) {
@@ -1361,7 +1372,7 @@ clear_pmdnuma:
1361 update_mmu_cache_pmd(vma, addr, pmdp); 1372 update_mmu_cache_pmd(vma, addr, pmdp);
1362 unlock_page(page); 1373 unlock_page(page);
1363out_unlock: 1374out_unlock:
1364 spin_unlock(&mm->page_table_lock); 1375 spin_unlock(ptl);
1365 1376
1366out: 1377out:
1367 if (anon_vma) 1378 if (anon_vma)
@@ -2371,7 +2382,7 @@ static void collapse_huge_page(struct mm_struct *mm,
2371 pte_t *pte; 2382 pte_t *pte;
2372 pgtable_t pgtable; 2383 pgtable_t pgtable;
2373 struct page *new_page; 2384 struct page *new_page;
2374 spinlock_t *ptl; 2385 spinlock_t *pmd_ptl, *pte_ptl;
2375 int isolated; 2386 int isolated;
2376 unsigned long hstart, hend; 2387 unsigned long hstart, hend;
2377 unsigned long mmun_start; /* For mmu_notifiers */ 2388 unsigned long mmun_start; /* For mmu_notifiers */
@@ -2414,12 +2425,12 @@ static void collapse_huge_page(struct mm_struct *mm,
2414 anon_vma_lock_write(vma->anon_vma); 2425 anon_vma_lock_write(vma->anon_vma);
2415 2426
2416 pte = pte_offset_map(pmd, address); 2427 pte = pte_offset_map(pmd, address);
2417 ptl = pte_lockptr(mm, pmd); 2428 pte_ptl = pte_lockptr(mm, pmd);
2418 2429
2419 mmun_start = address; 2430 mmun_start = address;
2420 mmun_end = address + HPAGE_PMD_SIZE; 2431 mmun_end = address + HPAGE_PMD_SIZE;
2421 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 2432 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2422 spin_lock(&mm->page_table_lock); /* probably unnecessary */ 2433 pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
2423 /* 2434 /*
2424 * After this gup_fast can't run anymore. This also removes 2435 * After this gup_fast can't run anymore. This also removes
2425 * any huge TLB entry from the CPU so we won't allow 2436 * any huge TLB entry from the CPU so we won't allow
@@ -2427,16 +2438,16 @@ static void collapse_huge_page(struct mm_struct *mm,
2427 * to avoid the risk of CPU bugs in that area. 2438 * to avoid the risk of CPU bugs in that area.
2428 */ 2439 */
2429 _pmd = pmdp_clear_flush(vma, address, pmd); 2440 _pmd = pmdp_clear_flush(vma, address, pmd);
2430 spin_unlock(&mm->page_table_lock); 2441 spin_unlock(pmd_ptl);
2431 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 2442 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2432 2443
2433 spin_lock(ptl); 2444 spin_lock(pte_ptl);
2434 isolated = __collapse_huge_page_isolate(vma, address, pte); 2445 isolated = __collapse_huge_page_isolate(vma, address, pte);
2435 spin_unlock(ptl); 2446 spin_unlock(pte_ptl);
2436 2447
2437 if (unlikely(!isolated)) { 2448 if (unlikely(!isolated)) {
2438 pte_unmap(pte); 2449 pte_unmap(pte);
2439 spin_lock(&mm->page_table_lock); 2450 spin_lock(pmd_ptl);
2440 BUG_ON(!pmd_none(*pmd)); 2451 BUG_ON(!pmd_none(*pmd));
2441 /* 2452 /*
2442 * We can only use set_pmd_at when establishing 2453 * We can only use set_pmd_at when establishing
@@ -2444,7 +2455,7 @@ static void collapse_huge_page(struct mm_struct *mm,
2444 * points to regular pagetables. Use pmd_populate for that 2455 * points to regular pagetables. Use pmd_populate for that
2445 */ 2456 */
2446 pmd_populate(mm, pmd, pmd_pgtable(_pmd)); 2457 pmd_populate(mm, pmd, pmd_pgtable(_pmd));
2447 spin_unlock(&mm->page_table_lock); 2458 spin_unlock(pmd_ptl);
2448 anon_vma_unlock_write(vma->anon_vma); 2459 anon_vma_unlock_write(vma->anon_vma);
2449 goto out; 2460 goto out;
2450 } 2461 }
@@ -2455,7 +2466,7 @@ static void collapse_huge_page(struct mm_struct *mm,
2455 */ 2466 */
2456 anon_vma_unlock_write(vma->anon_vma); 2467 anon_vma_unlock_write(vma->anon_vma);
2457 2468
2458 __collapse_huge_page_copy(pte, new_page, vma, address, ptl); 2469 __collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl);
2459 pte_unmap(pte); 2470 pte_unmap(pte);
2460 __SetPageUptodate(new_page); 2471 __SetPageUptodate(new_page);
2461 pgtable = pmd_pgtable(_pmd); 2472 pgtable = pmd_pgtable(_pmd);
@@ -2470,13 +2481,13 @@ static void collapse_huge_page(struct mm_struct *mm,
2470 */ 2481 */
2471 smp_wmb(); 2482 smp_wmb();
2472 2483
2473 spin_lock(&mm->page_table_lock); 2484 spin_lock(pmd_ptl);
2474 BUG_ON(!pmd_none(*pmd)); 2485 BUG_ON(!pmd_none(*pmd));
2475 page_add_new_anon_rmap(new_page, vma, address); 2486 page_add_new_anon_rmap(new_page, vma, address);
2476 pgtable_trans_huge_deposit(mm, pmd, pgtable); 2487 pgtable_trans_huge_deposit(mm, pmd, pgtable);
2477 set_pmd_at(mm, address, pmd, _pmd); 2488 set_pmd_at(mm, address, pmd, _pmd);
2478 update_mmu_cache_pmd(vma, address, pmd); 2489 update_mmu_cache_pmd(vma, address, pmd);
2479 spin_unlock(&mm->page_table_lock); 2490 spin_unlock(pmd_ptl);
2480 2491
2481 *hpage = NULL; 2492 *hpage = NULL;
2482 2493
@@ -2805,6 +2816,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
2805void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, 2816void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
2806 pmd_t *pmd) 2817 pmd_t *pmd)
2807{ 2818{
2819 spinlock_t *ptl;
2808 struct page *page; 2820 struct page *page;
2809 struct mm_struct *mm = vma->vm_mm; 2821 struct mm_struct *mm = vma->vm_mm;
2810 unsigned long haddr = address & HPAGE_PMD_MASK; 2822 unsigned long haddr = address & HPAGE_PMD_MASK;
@@ -2817,22 +2829,22 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
2817 mmun_end = haddr + HPAGE_PMD_SIZE; 2829 mmun_end = haddr + HPAGE_PMD_SIZE;
2818again: 2830again:
2819 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 2831 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2820 spin_lock(&mm->page_table_lock); 2832 ptl = pmd_lock(mm, pmd);
2821 if (unlikely(!pmd_trans_huge(*pmd))) { 2833 if (unlikely(!pmd_trans_huge(*pmd))) {
2822 spin_unlock(&mm->page_table_lock); 2834 spin_unlock(ptl);
2823 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 2835 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2824 return; 2836 return;
2825 } 2837 }
2826 if (is_huge_zero_pmd(*pmd)) { 2838 if (is_huge_zero_pmd(*pmd)) {
2827 __split_huge_zero_page_pmd(vma, haddr, pmd); 2839 __split_huge_zero_page_pmd(vma, haddr, pmd);
2828 spin_unlock(&mm->page_table_lock); 2840 spin_unlock(ptl);
2829 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 2841 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2830 return; 2842 return;
2831 } 2843 }
2832 page = pmd_page(*pmd); 2844 page = pmd_page(*pmd);
2833 VM_BUG_ON(!page_count(page)); 2845 VM_BUG_ON(!page_count(page));
2834 get_page(page); 2846 get_page(page);
2835 spin_unlock(&mm->page_table_lock); 2847 spin_unlock(ptl);
2836 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 2848 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2837 2849
2838 split_huge_page(page); 2850 split_huge_page(page);