diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 60 |
1 files changed, 47 insertions, 13 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bccd5a628ea6..95d1acb0f3d2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -882,6 +882,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
882 | ret = 0; | 882 | ret = 0; |
883 | goto out_unlock; | 883 | goto out_unlock; |
884 | } | 884 | } |
885 | |||
885 | if (unlikely(pmd_trans_splitting(pmd))) { | 886 | if (unlikely(pmd_trans_splitting(pmd))) { |
886 | /* split huge page running from under us */ | 887 | /* split huge page running from under us */ |
887 | spin_unlock(src_ptl); | 888 | spin_unlock(src_ptl); |
@@ -1153,7 +1154,7 @@ alloc: | |||
1153 | new_page = NULL; | 1154 | new_page = NULL; |
1154 | 1155 | ||
1155 | if (unlikely(!new_page)) { | 1156 | if (unlikely(!new_page)) { |
1156 | if (is_huge_zero_pmd(orig_pmd)) { | 1157 | if (!page) { |
1157 | ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, | 1158 | ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, |
1158 | address, pmd, orig_pmd, haddr); | 1159 | address, pmd, orig_pmd, haddr); |
1159 | } else { | 1160 | } else { |
@@ -1180,7 +1181,7 @@ alloc: | |||
1180 | 1181 | ||
1181 | count_vm_event(THP_FAULT_ALLOC); | 1182 | count_vm_event(THP_FAULT_ALLOC); |
1182 | 1183 | ||
1183 | if (is_huge_zero_pmd(orig_pmd)) | 1184 | if (!page) |
1184 | clear_huge_page(new_page, haddr, HPAGE_PMD_NR); | 1185 | clear_huge_page(new_page, haddr, HPAGE_PMD_NR); |
1185 | else | 1186 | else |
1186 | copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); | 1187 | copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); |
@@ -1206,7 +1207,7 @@ alloc: | |||
1206 | page_add_new_anon_rmap(new_page, vma, haddr); | 1207 | page_add_new_anon_rmap(new_page, vma, haddr); |
1207 | set_pmd_at(mm, haddr, pmd, entry); | 1208 | set_pmd_at(mm, haddr, pmd, entry); |
1208 | update_mmu_cache_pmd(vma, address, pmd); | 1209 | update_mmu_cache_pmd(vma, address, pmd); |
1209 | if (is_huge_zero_pmd(orig_pmd)) { | 1210 | if (!page) { |
1210 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); | 1211 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); |
1211 | put_huge_zero_page(); | 1212 | put_huge_zero_page(); |
1212 | } else { | 1213 | } else { |
@@ -1243,6 +1244,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
1243 | if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) | 1244 | if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) |
1244 | return ERR_PTR(-EFAULT); | 1245 | return ERR_PTR(-EFAULT); |
1245 | 1246 | ||
1247 | /* Full NUMA hinting faults to serialise migration in fault paths */ | ||
1248 | if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) | ||
1249 | goto out; | ||
1250 | |||
1246 | page = pmd_page(*pmd); | 1251 | page = pmd_page(*pmd); |
1247 | VM_BUG_ON(!PageHead(page)); | 1252 | VM_BUG_ON(!PageHead(page)); |
1248 | if (flags & FOLL_TOUCH) { | 1253 | if (flags & FOLL_TOUCH) { |
@@ -1295,6 +1300,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1295 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1300 | if (unlikely(!pmd_same(pmd, *pmdp))) |
1296 | goto out_unlock; | 1301 | goto out_unlock; |
1297 | 1302 | ||
1303 | /* | ||
1304 | * If there are potential migrations, wait for completion and retry | ||
1305 | * without disrupting NUMA hinting information. Do not relock and | ||
1306 | * check_same as the page may no longer be mapped. | ||
1307 | */ | ||
1308 | if (unlikely(pmd_trans_migrating(*pmdp))) { | ||
1309 | spin_unlock(ptl); | ||
1310 | wait_migrate_huge_page(vma->anon_vma, pmdp); | ||
1311 | goto out; | ||
1312 | } | ||
1313 | |||
1298 | page = pmd_page(pmd); | 1314 | page = pmd_page(pmd); |
1299 | BUG_ON(is_huge_zero_page(page)); | 1315 | BUG_ON(is_huge_zero_page(page)); |
1300 | page_nid = page_to_nid(page); | 1316 | page_nid = page_to_nid(page); |
@@ -1323,23 +1339,22 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1323 | /* If the page was locked, there are no parallel migrations */ | 1339 | /* If the page was locked, there are no parallel migrations */ |
1324 | if (page_locked) | 1340 | if (page_locked) |
1325 | goto clear_pmdnuma; | 1341 | goto clear_pmdnuma; |
1342 | } | ||
1326 | 1343 | ||
1327 | /* | 1344 | /* Migration could have started since the pmd_trans_migrating check */ |
1328 | * Otherwise wait for potential migrations and retry. We do | 1345 | if (!page_locked) { |
1329 | * relock and check_same as the page may no longer be mapped. | ||
1330 | * As the fault is being retried, do not account for it. | ||
1331 | */ | ||
1332 | spin_unlock(ptl); | 1346 | spin_unlock(ptl); |
1333 | wait_on_page_locked(page); | 1347 | wait_on_page_locked(page); |
1334 | page_nid = -1; | 1348 | page_nid = -1; |
1335 | goto out; | 1349 | goto out; |
1336 | } | 1350 | } |
1337 | 1351 | ||
1338 | /* Page is misplaced, serialise migrations and parallel THP splits */ | 1352 | /* |
1353 | * Page is misplaced. Page lock serialises migrations. Acquire anon_vma | ||
1354 | * to serialises splits | ||
1355 | */ | ||
1339 | get_page(page); | 1356 | get_page(page); |
1340 | spin_unlock(ptl); | 1357 | spin_unlock(ptl); |
1341 | if (!page_locked) | ||
1342 | lock_page(page); | ||
1343 | anon_vma = page_lock_anon_vma_read(page); | 1358 | anon_vma = page_lock_anon_vma_read(page); |
1344 | 1359 | ||
1345 | /* Confirm the PMD did not change while page_table_lock was released */ | 1360 | /* Confirm the PMD did not change while page_table_lock was released */ |
@@ -1351,6 +1366,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1351 | goto out_unlock; | 1366 | goto out_unlock; |
1352 | } | 1367 | } |
1353 | 1368 | ||
1369 | /* Bail if we fail to protect against THP splits for any reason */ | ||
1370 | if (unlikely(!anon_vma)) { | ||
1371 | put_page(page); | ||
1372 | page_nid = -1; | ||
1373 | goto clear_pmdnuma; | ||
1374 | } | ||
1375 | |||
1354 | /* | 1376 | /* |
1355 | * Migrate the THP to the requested node, returns with page unlocked | 1377 | * Migrate the THP to the requested node, returns with page unlocked |
1356 | * and pmd_numa cleared. | 1378 | * and pmd_numa cleared. |
@@ -1481,8 +1503,18 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, | |||
1481 | pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); | 1503 | pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); |
1482 | VM_BUG_ON(!pmd_none(*new_pmd)); | 1504 | VM_BUG_ON(!pmd_none(*new_pmd)); |
1483 | set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); | 1505 | set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); |
1484 | if (new_ptl != old_ptl) | 1506 | if (new_ptl != old_ptl) { |
1507 | pgtable_t pgtable; | ||
1508 | |||
1509 | /* | ||
1510 | * Move preallocated PTE page table if new_pmd is on | ||
1511 | * different PMD page table. | ||
1512 | */ | ||
1513 | pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); | ||
1514 | pgtable_trans_huge_deposit(mm, new_pmd, pgtable); | ||
1515 | |||
1485 | spin_unlock(new_ptl); | 1516 | spin_unlock(new_ptl); |
1517 | } | ||
1486 | spin_unlock(old_ptl); | 1518 | spin_unlock(old_ptl); |
1487 | } | 1519 | } |
1488 | out: | 1520 | out: |
@@ -1507,6 +1539,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1507 | ret = 1; | 1539 | ret = 1; |
1508 | if (!prot_numa) { | 1540 | if (!prot_numa) { |
1509 | entry = pmdp_get_and_clear(mm, addr, pmd); | 1541 | entry = pmdp_get_and_clear(mm, addr, pmd); |
1542 | if (pmd_numa(entry)) | ||
1543 | entry = pmd_mknonnuma(entry); | ||
1510 | entry = pmd_modify(entry, newprot); | 1544 | entry = pmd_modify(entry, newprot); |
1511 | ret = HPAGE_PMD_NR; | 1545 | ret = HPAGE_PMD_NR; |
1512 | BUG_ON(pmd_write(entry)); | 1546 | BUG_ON(pmd_write(entry)); |
@@ -1521,7 +1555,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1521 | */ | 1555 | */ |
1522 | if (!is_huge_zero_page(page) && | 1556 | if (!is_huge_zero_page(page) && |
1523 | !pmd_numa(*pmd)) { | 1557 | !pmd_numa(*pmd)) { |
1524 | entry = pmdp_get_and_clear(mm, addr, pmd); | 1558 | entry = *pmd; |
1525 | entry = pmd_mknuma(entry); | 1559 | entry = pmd_mknuma(entry); |
1526 | ret = HPAGE_PMD_NR; | 1560 | ret = HPAGE_PMD_NR; |
1527 | } | 1561 | } |