aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c57
1 files changed, 47 insertions, 10 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bccd5a628ea6..7de1bf85f683 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -882,6 +882,10 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
882 ret = 0; 882 ret = 0;
883 goto out_unlock; 883 goto out_unlock;
884 } 884 }
885
886 /* mmap_sem prevents this happening but warn if that changes */
887 WARN_ON(pmd_trans_migrating(pmd));
888
885 if (unlikely(pmd_trans_splitting(pmd))) { 889 if (unlikely(pmd_trans_splitting(pmd))) {
886 /* split huge page running from under us */ 890 /* split huge page running from under us */
887 spin_unlock(src_ptl); 891 spin_unlock(src_ptl);
@@ -1243,6 +1247,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
1243 if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) 1247 if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
1244 return ERR_PTR(-EFAULT); 1248 return ERR_PTR(-EFAULT);
1245 1249
1250 /* Full NUMA hinting faults to serialise migration in fault paths */
1251 if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
1252 goto out;
1253
1246 page = pmd_page(*pmd); 1254 page = pmd_page(*pmd);
1247 VM_BUG_ON(!PageHead(page)); 1255 VM_BUG_ON(!PageHead(page));
1248 if (flags & FOLL_TOUCH) { 1256 if (flags & FOLL_TOUCH) {
@@ -1295,6 +1303,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1295 if (unlikely(!pmd_same(pmd, *pmdp))) 1303 if (unlikely(!pmd_same(pmd, *pmdp)))
1296 goto out_unlock; 1304 goto out_unlock;
1297 1305
1306 /*
1307 * If there are potential migrations, wait for completion and retry
1308 * without disrupting NUMA hinting information. Do not relock and
1309 * check_same as the page may no longer be mapped.
1310 */
1311 if (unlikely(pmd_trans_migrating(*pmdp))) {
1312 spin_unlock(ptl);
1313 wait_migrate_huge_page(vma->anon_vma, pmdp);
1314 goto out;
1315 }
1316
1298 page = pmd_page(pmd); 1317 page = pmd_page(pmd);
1299 BUG_ON(is_huge_zero_page(page)); 1318 BUG_ON(is_huge_zero_page(page));
1300 page_nid = page_to_nid(page); 1319 page_nid = page_to_nid(page);
@@ -1323,23 +1342,22 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1323 /* If the page was locked, there are no parallel migrations */ 1342 /* If the page was locked, there are no parallel migrations */
1324 if (page_locked) 1343 if (page_locked)
1325 goto clear_pmdnuma; 1344 goto clear_pmdnuma;
1345 }
1326 1346
1327 /* 1347 /* Migration could have started since the pmd_trans_migrating check */
1328 * Otherwise wait for potential migrations and retry. We do 1348 if (!page_locked) {
1329 * relock and check_same as the page may no longer be mapped.
1330 * As the fault is being retried, do not account for it.
1331 */
1332 spin_unlock(ptl); 1349 spin_unlock(ptl);
1333 wait_on_page_locked(page); 1350 wait_on_page_locked(page);
1334 page_nid = -1; 1351 page_nid = -1;
1335 goto out; 1352 goto out;
1336 } 1353 }
1337 1354
1338 /* Page is misplaced, serialise migrations and parallel THP splits */ 1355 /*
1356 * Page is misplaced. Page lock serialises migrations. Acquire anon_vma
1357 * to serialises splits
1358 */
1339 get_page(page); 1359 get_page(page);
1340 spin_unlock(ptl); 1360 spin_unlock(ptl);
1341 if (!page_locked)
1342 lock_page(page);
1343 anon_vma = page_lock_anon_vma_read(page); 1361 anon_vma = page_lock_anon_vma_read(page);
1344 1362
1345 /* Confirm the PMD did not change while page_table_lock was released */ 1363 /* Confirm the PMD did not change while page_table_lock was released */
@@ -1351,6 +1369,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1351 goto out_unlock; 1369 goto out_unlock;
1352 } 1370 }
1353 1371
1372 /* Bail if we fail to protect against THP splits for any reason */
1373 if (unlikely(!anon_vma)) {
1374 put_page(page);
1375 page_nid = -1;
1376 goto clear_pmdnuma;
1377 }
1378
1354 /* 1379 /*
1355 * Migrate the THP to the requested node, returns with page unlocked 1380 * Migrate the THP to the requested node, returns with page unlocked
1356 * and pmd_numa cleared. 1381 * and pmd_numa cleared.
@@ -1481,8 +1506,18 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
1481 pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); 1506 pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
1482 VM_BUG_ON(!pmd_none(*new_pmd)); 1507 VM_BUG_ON(!pmd_none(*new_pmd));
1483 set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); 1508 set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
1484 if (new_ptl != old_ptl) 1509 if (new_ptl != old_ptl) {
1510 pgtable_t pgtable;
1511
1512 /*
1513 * Move preallocated PTE page table if new_pmd is on
1514 * different PMD page table.
1515 */
1516 pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
1517 pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
1518
1485 spin_unlock(new_ptl); 1519 spin_unlock(new_ptl);
1520 }
1486 spin_unlock(old_ptl); 1521 spin_unlock(old_ptl);
1487 } 1522 }
1488out: 1523out:
@@ -1507,6 +1542,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1507 ret = 1; 1542 ret = 1;
1508 if (!prot_numa) { 1543 if (!prot_numa) {
1509 entry = pmdp_get_and_clear(mm, addr, pmd); 1544 entry = pmdp_get_and_clear(mm, addr, pmd);
1545 if (pmd_numa(entry))
1546 entry = pmd_mknonnuma(entry);
1510 entry = pmd_modify(entry, newprot); 1547 entry = pmd_modify(entry, newprot);
1511 ret = HPAGE_PMD_NR; 1548 ret = HPAGE_PMD_NR;
1512 BUG_ON(pmd_write(entry)); 1549 BUG_ON(pmd_write(entry));
@@ -1521,7 +1558,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1521 */ 1558 */
1522 if (!is_huge_zero_page(page) && 1559 if (!is_huge_zero_page(page) &&
1523 !pmd_numa(*pmd)) { 1560 !pmd_numa(*pmd)) {
1524 entry = pmdp_get_and_clear(mm, addr, pmd); 1561 entry = *pmd;
1525 entry = pmd_mknuma(entry); 1562 entry = pmd_mknuma(entry);
1526 ret = HPAGE_PMD_NR; 1563 ret = HPAGE_PMD_NR;
1527 } 1564 }