aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c60
1 files changed, 47 insertions, 13 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bccd5a628ea6..95d1acb0f3d2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -882,6 +882,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
882 ret = 0; 882 ret = 0;
883 goto out_unlock; 883 goto out_unlock;
884 } 884 }
885
885 if (unlikely(pmd_trans_splitting(pmd))) { 886 if (unlikely(pmd_trans_splitting(pmd))) {
886 /* split huge page running from under us */ 887 /* split huge page running from under us */
887 spin_unlock(src_ptl); 888 spin_unlock(src_ptl);
@@ -1153,7 +1154,7 @@ alloc:
1153 new_page = NULL; 1154 new_page = NULL;
1154 1155
1155 if (unlikely(!new_page)) { 1156 if (unlikely(!new_page)) {
1156 if (is_huge_zero_pmd(orig_pmd)) { 1157 if (!page) {
1157 ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, 1158 ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
1158 address, pmd, orig_pmd, haddr); 1159 address, pmd, orig_pmd, haddr);
1159 } else { 1160 } else {
@@ -1180,7 +1181,7 @@ alloc:
1180 1181
1181 count_vm_event(THP_FAULT_ALLOC); 1182 count_vm_event(THP_FAULT_ALLOC);
1182 1183
1183 if (is_huge_zero_pmd(orig_pmd)) 1184 if (!page)
1184 clear_huge_page(new_page, haddr, HPAGE_PMD_NR); 1185 clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
1185 else 1186 else
1186 copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); 1187 copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
@@ -1206,7 +1207,7 @@ alloc:
1206 page_add_new_anon_rmap(new_page, vma, haddr); 1207 page_add_new_anon_rmap(new_page, vma, haddr);
1207 set_pmd_at(mm, haddr, pmd, entry); 1208 set_pmd_at(mm, haddr, pmd, entry);
1208 update_mmu_cache_pmd(vma, address, pmd); 1209 update_mmu_cache_pmd(vma, address, pmd);
1209 if (is_huge_zero_pmd(orig_pmd)) { 1210 if (!page) {
1210 add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); 1211 add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
1211 put_huge_zero_page(); 1212 put_huge_zero_page();
1212 } else { 1213 } else {
@@ -1243,6 +1244,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
1243 if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) 1244 if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
1244 return ERR_PTR(-EFAULT); 1245 return ERR_PTR(-EFAULT);
1245 1246
1247 /* Full NUMA hinting faults to serialise migration in fault paths */
1248 if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
1249 goto out;
1250
1246 page = pmd_page(*pmd); 1251 page = pmd_page(*pmd);
1247 VM_BUG_ON(!PageHead(page)); 1252 VM_BUG_ON(!PageHead(page));
1248 if (flags & FOLL_TOUCH) { 1253 if (flags & FOLL_TOUCH) {
@@ -1295,6 +1300,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1295 if (unlikely(!pmd_same(pmd, *pmdp))) 1300 if (unlikely(!pmd_same(pmd, *pmdp)))
1296 goto out_unlock; 1301 goto out_unlock;
1297 1302
1303 /*
1304 * If there are potential migrations, wait for completion and retry
1305 * without disrupting NUMA hinting information. Do not relock and
1306 * check_same as the page may no longer be mapped.
1307 */
1308 if (unlikely(pmd_trans_migrating(*pmdp))) {
1309 spin_unlock(ptl);
1310 wait_migrate_huge_page(vma->anon_vma, pmdp);
1311 goto out;
1312 }
1313
1298 page = pmd_page(pmd); 1314 page = pmd_page(pmd);
1299 BUG_ON(is_huge_zero_page(page)); 1315 BUG_ON(is_huge_zero_page(page));
1300 page_nid = page_to_nid(page); 1316 page_nid = page_to_nid(page);
@@ -1323,23 +1339,22 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1323 /* If the page was locked, there are no parallel migrations */ 1339 /* If the page was locked, there are no parallel migrations */
1324 if (page_locked) 1340 if (page_locked)
1325 goto clear_pmdnuma; 1341 goto clear_pmdnuma;
1342 }
1326 1343
1327 /* 1344 /* Migration could have started since the pmd_trans_migrating check */
1328 * Otherwise wait for potential migrations and retry. We do 1345 if (!page_locked) {
1329 * relock and check_same as the page may no longer be mapped.
1330 * As the fault is being retried, do not account for it.
1331 */
1332 spin_unlock(ptl); 1346 spin_unlock(ptl);
1333 wait_on_page_locked(page); 1347 wait_on_page_locked(page);
1334 page_nid = -1; 1348 page_nid = -1;
1335 goto out; 1349 goto out;
1336 } 1350 }
1337 1351
1338 /* Page is misplaced, serialise migrations and parallel THP splits */ 1352 /*
1353 * Page is misplaced. Page lock serialises migrations. Acquire anon_vma
1354 * to serialises splits
1355 */
1339 get_page(page); 1356 get_page(page);
1340 spin_unlock(ptl); 1357 spin_unlock(ptl);
1341 if (!page_locked)
1342 lock_page(page);
1343 anon_vma = page_lock_anon_vma_read(page); 1358 anon_vma = page_lock_anon_vma_read(page);
1344 1359
1345 /* Confirm the PMD did not change while page_table_lock was released */ 1360 /* Confirm the PMD did not change while page_table_lock was released */
@@ -1351,6 +1366,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1351 goto out_unlock; 1366 goto out_unlock;
1352 } 1367 }
1353 1368
1369 /* Bail if we fail to protect against THP splits for any reason */
1370 if (unlikely(!anon_vma)) {
1371 put_page(page);
1372 page_nid = -1;
1373 goto clear_pmdnuma;
1374 }
1375
1354 /* 1376 /*
1355 * Migrate the THP to the requested node, returns with page unlocked 1377 * Migrate the THP to the requested node, returns with page unlocked
1356 * and pmd_numa cleared. 1378 * and pmd_numa cleared.
@@ -1481,8 +1503,18 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
1481 pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); 1503 pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
1482 VM_BUG_ON(!pmd_none(*new_pmd)); 1504 VM_BUG_ON(!pmd_none(*new_pmd));
1483 set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); 1505 set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
1484 if (new_ptl != old_ptl) 1506 if (new_ptl != old_ptl) {
1507 pgtable_t pgtable;
1508
1509 /*
1510 * Move preallocated PTE page table if new_pmd is on
1511 * different PMD page table.
1512 */
1513 pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
1514 pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
1515
1485 spin_unlock(new_ptl); 1516 spin_unlock(new_ptl);
1517 }
1486 spin_unlock(old_ptl); 1518 spin_unlock(old_ptl);
1487 } 1519 }
1488out: 1520out:
@@ -1507,6 +1539,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1507 ret = 1; 1539 ret = 1;
1508 if (!prot_numa) { 1540 if (!prot_numa) {
1509 entry = pmdp_get_and_clear(mm, addr, pmd); 1541 entry = pmdp_get_and_clear(mm, addr, pmd);
1542 if (pmd_numa(entry))
1543 entry = pmd_mknonnuma(entry);
1510 entry = pmd_modify(entry, newprot); 1544 entry = pmd_modify(entry, newprot);
1511 ret = HPAGE_PMD_NR; 1545 ret = HPAGE_PMD_NR;
1512 BUG_ON(pmd_write(entry)); 1546 BUG_ON(pmd_write(entry));
@@ -1521,7 +1555,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1521 */ 1555 */
1522 if (!is_huge_zero_page(page) && 1556 if (!is_huge_zero_page(page) &&
1523 !pmd_numa(*pmd)) { 1557 !pmd_numa(*pmd)) {
1524 entry = pmdp_get_and_clear(mm, addr, pmd); 1558 entry = *pmd;
1525 entry = pmd_mknuma(entry); 1559 entry = pmd_mknuma(entry);
1526 ret = HPAGE_PMD_NR; 1560 ret = HPAGE_PMD_NR;
1527 } 1561 }