diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 57 |
1 files changed, 47 insertions, 10 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bccd5a628ea6..7de1bf85f683 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -882,6 +882,10 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
882 | ret = 0; | 882 | ret = 0; |
883 | goto out_unlock; | 883 | goto out_unlock; |
884 | } | 884 | } |
885 | |||
886 | /* mmap_sem prevents this happening but warn if that changes */ | ||
887 | WARN_ON(pmd_trans_migrating(pmd)); | ||
888 | |||
885 | if (unlikely(pmd_trans_splitting(pmd))) { | 889 | if (unlikely(pmd_trans_splitting(pmd))) { |
886 | /* split huge page running from under us */ | 890 | /* split huge page running from under us */ |
887 | spin_unlock(src_ptl); | 891 | spin_unlock(src_ptl); |
@@ -1243,6 +1247,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
1243 | if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) | 1247 | if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) |
1244 | return ERR_PTR(-EFAULT); | 1248 | return ERR_PTR(-EFAULT); |
1245 | 1249 | ||
1250 | /* Full NUMA hinting faults to serialise migration in fault paths */ | ||
1251 | if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) | ||
1252 | goto out; | ||
1253 | |||
1246 | page = pmd_page(*pmd); | 1254 | page = pmd_page(*pmd); |
1247 | VM_BUG_ON(!PageHead(page)); | 1255 | VM_BUG_ON(!PageHead(page)); |
1248 | if (flags & FOLL_TOUCH) { | 1256 | if (flags & FOLL_TOUCH) { |
@@ -1295,6 +1303,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1295 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1303 | if (unlikely(!pmd_same(pmd, *pmdp))) |
1296 | goto out_unlock; | 1304 | goto out_unlock; |
1297 | 1305 | ||
1306 | /* | ||
1307 | * If there are potential migrations, wait for completion and retry | ||
1308 | * without disrupting NUMA hinting information. Do not relock and | ||
1309 | * check_same as the page may no longer be mapped. | ||
1310 | */ | ||
1311 | if (unlikely(pmd_trans_migrating(*pmdp))) { | ||
1312 | spin_unlock(ptl); | ||
1313 | wait_migrate_huge_page(vma->anon_vma, pmdp); | ||
1314 | goto out; | ||
1315 | } | ||
1316 | |||
1298 | page = pmd_page(pmd); | 1317 | page = pmd_page(pmd); |
1299 | BUG_ON(is_huge_zero_page(page)); | 1318 | BUG_ON(is_huge_zero_page(page)); |
1300 | page_nid = page_to_nid(page); | 1319 | page_nid = page_to_nid(page); |
@@ -1323,23 +1342,22 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1323 | /* If the page was locked, there are no parallel migrations */ | 1342 | /* If the page was locked, there are no parallel migrations */ |
1324 | if (page_locked) | 1343 | if (page_locked) |
1325 | goto clear_pmdnuma; | 1344 | goto clear_pmdnuma; |
1345 | } | ||
1326 | 1346 | ||
1327 | /* | 1347 | /* Migration could have started since the pmd_trans_migrating check */ |
1328 | * Otherwise wait for potential migrations and retry. We do | 1348 | if (!page_locked) { |
1329 | * relock and check_same as the page may no longer be mapped. | ||
1330 | * As the fault is being retried, do not account for it. | ||
1331 | */ | ||
1332 | spin_unlock(ptl); | 1349 | spin_unlock(ptl); |
1333 | wait_on_page_locked(page); | 1350 | wait_on_page_locked(page); |
1334 | page_nid = -1; | 1351 | page_nid = -1; |
1335 | goto out; | 1352 | goto out; |
1336 | } | 1353 | } |
1337 | 1354 | ||
1338 | /* Page is misplaced, serialise migrations and parallel THP splits */ | 1355 | /* |
1356 | * Page is misplaced. Page lock serialises migrations. Acquire anon_vma | ||
1357 | * to serialises splits | ||
1358 | */ | ||
1339 | get_page(page); | 1359 | get_page(page); |
1340 | spin_unlock(ptl); | 1360 | spin_unlock(ptl); |
1341 | if (!page_locked) | ||
1342 | lock_page(page); | ||
1343 | anon_vma = page_lock_anon_vma_read(page); | 1361 | anon_vma = page_lock_anon_vma_read(page); |
1344 | 1362 | ||
1345 | /* Confirm the PMD did not change while page_table_lock was released */ | 1363 | /* Confirm the PMD did not change while page_table_lock was released */ |
@@ -1351,6 +1369,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1351 | goto out_unlock; | 1369 | goto out_unlock; |
1352 | } | 1370 | } |
1353 | 1371 | ||
1372 | /* Bail if we fail to protect against THP splits for any reason */ | ||
1373 | if (unlikely(!anon_vma)) { | ||
1374 | put_page(page); | ||
1375 | page_nid = -1; | ||
1376 | goto clear_pmdnuma; | ||
1377 | } | ||
1378 | |||
1354 | /* | 1379 | /* |
1355 | * Migrate the THP to the requested node, returns with page unlocked | 1380 | * Migrate the THP to the requested node, returns with page unlocked |
1356 | * and pmd_numa cleared. | 1381 | * and pmd_numa cleared. |
@@ -1481,8 +1506,18 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, | |||
1481 | pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); | 1506 | pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); |
1482 | VM_BUG_ON(!pmd_none(*new_pmd)); | 1507 | VM_BUG_ON(!pmd_none(*new_pmd)); |
1483 | set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); | 1508 | set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); |
1484 | if (new_ptl != old_ptl) | 1509 | if (new_ptl != old_ptl) { |
1510 | pgtable_t pgtable; | ||
1511 | |||
1512 | /* | ||
1513 | * Move preallocated PTE page table if new_pmd is on | ||
1514 | * different PMD page table. | ||
1515 | */ | ||
1516 | pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); | ||
1517 | pgtable_trans_huge_deposit(mm, new_pmd, pgtable); | ||
1518 | |||
1485 | spin_unlock(new_ptl); | 1519 | spin_unlock(new_ptl); |
1520 | } | ||
1486 | spin_unlock(old_ptl); | 1521 | spin_unlock(old_ptl); |
1487 | } | 1522 | } |
1488 | out: | 1523 | out: |
@@ -1507,6 +1542,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1507 | ret = 1; | 1542 | ret = 1; |
1508 | if (!prot_numa) { | 1543 | if (!prot_numa) { |
1509 | entry = pmdp_get_and_clear(mm, addr, pmd); | 1544 | entry = pmdp_get_and_clear(mm, addr, pmd); |
1545 | if (pmd_numa(entry)) | ||
1546 | entry = pmd_mknonnuma(entry); | ||
1510 | entry = pmd_modify(entry, newprot); | 1547 | entry = pmd_modify(entry, newprot); |
1511 | ret = HPAGE_PMD_NR; | 1548 | ret = HPAGE_PMD_NR; |
1512 | BUG_ON(pmd_write(entry)); | 1549 | BUG_ON(pmd_write(entry)); |
@@ -1521,7 +1558,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1521 | */ | 1558 | */ |
1522 | if (!is_huge_zero_page(page) && | 1559 | if (!is_huge_zero_page(page) && |
1523 | !pmd_numa(*pmd)) { | 1560 | !pmd_numa(*pmd)) { |
1524 | entry = pmdp_get_and_clear(mm, addr, pmd); | 1561 | entry = *pmd; |
1525 | entry = pmd_mknuma(entry); | 1562 | entry = pmd_mknuma(entry); |
1526 | ret = HPAGE_PMD_NR; | 1563 | ret = HPAGE_PMD_NR; |
1527 | } | 1564 | } |