diff options
-rw-r--r-- | mm/huge_memory.c | 70 | ||||
-rw-r--r-- | mm/memory.c | 53 | ||||
-rw-r--r-- | mm/migrate.c | 19 | ||||
-rw-r--r-- | mm/mprotect.c | 2 |
4 files changed, 81 insertions, 63 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 610e3df2768a..cca80d96e509 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1278,64 +1278,90 @@ out: | |||
1278 | int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | 1278 | int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, |
1279 | unsigned long addr, pmd_t pmd, pmd_t *pmdp) | 1279 | unsigned long addr, pmd_t pmd, pmd_t *pmdp) |
1280 | { | 1280 | { |
1281 | struct anon_vma *anon_vma = NULL; | ||
1281 | struct page *page; | 1282 | struct page *page; |
1282 | unsigned long haddr = addr & HPAGE_PMD_MASK; | 1283 | unsigned long haddr = addr & HPAGE_PMD_MASK; |
1284 | int page_nid = -1, this_nid = numa_node_id(); | ||
1283 | int target_nid; | 1285 | int target_nid; |
1284 | int current_nid = -1; | 1286 | bool page_locked; |
1285 | bool migrated; | 1287 | bool migrated = false; |
1286 | 1288 | ||
1287 | spin_lock(&mm->page_table_lock); | 1289 | spin_lock(&mm->page_table_lock); |
1288 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1290 | if (unlikely(!pmd_same(pmd, *pmdp))) |
1289 | goto out_unlock; | 1291 | goto out_unlock; |
1290 | 1292 | ||
1291 | page = pmd_page(pmd); | 1293 | page = pmd_page(pmd); |
1292 | get_page(page); | 1294 | page_nid = page_to_nid(page); |
1293 | current_nid = page_to_nid(page); | ||
1294 | count_vm_numa_event(NUMA_HINT_FAULTS); | 1295 | count_vm_numa_event(NUMA_HINT_FAULTS); |
1295 | if (current_nid == numa_node_id()) | 1296 | if (page_nid == this_nid) |
1296 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | 1297 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); |
1297 | 1298 | ||
1299 | /* | ||
1300 | * Acquire the page lock to serialise THP migrations but avoid dropping | ||
1301 | * page_table_lock if at all possible | ||
1302 | */ | ||
1303 | page_locked = trylock_page(page); | ||
1298 | target_nid = mpol_misplaced(page, vma, haddr); | 1304 | target_nid = mpol_misplaced(page, vma, haddr); |
1299 | if (target_nid == -1) { | 1305 | if (target_nid == -1) { |
1300 | put_page(page); | 1306 | /* If the page was locked, there are no parallel migrations */ |
1301 | goto clear_pmdnuma; | 1307 | if (page_locked) |
1308 | goto clear_pmdnuma; | ||
1309 | |||
1310 | /* | ||
1311 | * Otherwise wait for potential migrations and retry. We do | ||
1312 | * relock and check_same as the page may no longer be mapped. | ||
1313 | * As the fault is being retried, do not account for it. | ||
1314 | */ | ||
1315 | spin_unlock(&mm->page_table_lock); | ||
1316 | wait_on_page_locked(page); | ||
1317 | page_nid = -1; | ||
1318 | goto out; | ||
1302 | } | 1319 | } |
1303 | 1320 | ||
1304 | /* Acquire the page lock to serialise THP migrations */ | 1321 | /* Page is misplaced, serialise migrations and parallel THP splits */ |
1322 | get_page(page); | ||
1305 | spin_unlock(&mm->page_table_lock); | 1323 | spin_unlock(&mm->page_table_lock); |
1306 | lock_page(page); | 1324 | if (!page_locked) |
1325 | lock_page(page); | ||
1326 | anon_vma = page_lock_anon_vma_read(page); | ||
1307 | 1327 | ||
1308 | /* Confirm the PTE did not while locked */ | 1328 | /* Confirm the PTE did not while locked */ |
1309 | spin_lock(&mm->page_table_lock); | 1329 | spin_lock(&mm->page_table_lock); |
1310 | if (unlikely(!pmd_same(pmd, *pmdp))) { | 1330 | if (unlikely(!pmd_same(pmd, *pmdp))) { |
1311 | unlock_page(page); | 1331 | unlock_page(page); |
1312 | put_page(page); | 1332 | put_page(page); |
1333 | page_nid = -1; | ||
1313 | goto out_unlock; | 1334 | goto out_unlock; |
1314 | } | 1335 | } |
1315 | spin_unlock(&mm->page_table_lock); | ||
1316 | 1336 | ||
1317 | /* Migrate the THP to the requested node */ | 1337 | /* |
1338 | * Migrate the THP to the requested node, returns with page unlocked | ||
1339 | * and pmd_numa cleared. | ||
1340 | */ | ||
1341 | spin_unlock(&mm->page_table_lock); | ||
1318 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1342 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
1319 | pmdp, pmd, addr, page, target_nid); | 1343 | pmdp, pmd, addr, page, target_nid); |
1320 | if (!migrated) | 1344 | if (migrated) |
1321 | goto check_same; | 1345 | page_nid = target_nid; |
1322 | |||
1323 | task_numa_fault(target_nid, HPAGE_PMD_NR, true); | ||
1324 | return 0; | ||
1325 | 1346 | ||
1326 | check_same: | 1347 | goto out; |
1327 | spin_lock(&mm->page_table_lock); | ||
1328 | if (unlikely(!pmd_same(pmd, *pmdp))) | ||
1329 | goto out_unlock; | ||
1330 | clear_pmdnuma: | 1348 | clear_pmdnuma: |
1349 | BUG_ON(!PageLocked(page)); | ||
1331 | pmd = pmd_mknonnuma(pmd); | 1350 | pmd = pmd_mknonnuma(pmd); |
1332 | set_pmd_at(mm, haddr, pmdp, pmd); | 1351 | set_pmd_at(mm, haddr, pmdp, pmd); |
1333 | VM_BUG_ON(pmd_numa(*pmdp)); | 1352 | VM_BUG_ON(pmd_numa(*pmdp)); |
1334 | update_mmu_cache_pmd(vma, addr, pmdp); | 1353 | update_mmu_cache_pmd(vma, addr, pmdp); |
1354 | unlock_page(page); | ||
1335 | out_unlock: | 1355 | out_unlock: |
1336 | spin_unlock(&mm->page_table_lock); | 1356 | spin_unlock(&mm->page_table_lock); |
1337 | if (current_nid != -1) | 1357 | |
1338 | task_numa_fault(current_nid, HPAGE_PMD_NR, false); | 1358 | out: |
1359 | if (anon_vma) | ||
1360 | page_unlock_anon_vma_read(anon_vma); | ||
1361 | |||
1362 | if (page_nid != -1) | ||
1363 | task_numa_fault(page_nid, HPAGE_PMD_NR, migrated); | ||
1364 | |||
1339 | return 0; | 1365 | return 0; |
1340 | } | 1366 | } |
1341 | 1367 | ||
diff --git a/mm/memory.c b/mm/memory.c index 1311f26497e6..d176154c243f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3521,12 +3521,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3521 | } | 3521 | } |
3522 | 3522 | ||
3523 | int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, | 3523 | int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, |
3524 | unsigned long addr, int current_nid) | 3524 | unsigned long addr, int page_nid) |
3525 | { | 3525 | { |
3526 | get_page(page); | 3526 | get_page(page); |
3527 | 3527 | ||
3528 | count_vm_numa_event(NUMA_HINT_FAULTS); | 3528 | count_vm_numa_event(NUMA_HINT_FAULTS); |
3529 | if (current_nid == numa_node_id()) | 3529 | if (page_nid == numa_node_id()) |
3530 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | 3530 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); |
3531 | 3531 | ||
3532 | return mpol_misplaced(page, vma, addr); | 3532 | return mpol_misplaced(page, vma, addr); |
@@ -3537,7 +3537,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3537 | { | 3537 | { |
3538 | struct page *page = NULL; | 3538 | struct page *page = NULL; |
3539 | spinlock_t *ptl; | 3539 | spinlock_t *ptl; |
3540 | int current_nid = -1; | 3540 | int page_nid = -1; |
3541 | int target_nid; | 3541 | int target_nid; |
3542 | bool migrated = false; | 3542 | bool migrated = false; |
3543 | 3543 | ||
@@ -3567,15 +3567,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3567 | return 0; | 3567 | return 0; |
3568 | } | 3568 | } |
3569 | 3569 | ||
3570 | current_nid = page_to_nid(page); | 3570 | page_nid = page_to_nid(page); |
3571 | target_nid = numa_migrate_prep(page, vma, addr, current_nid); | 3571 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
3572 | pte_unmap_unlock(ptep, ptl); | 3572 | pte_unmap_unlock(ptep, ptl); |
3573 | if (target_nid == -1) { | 3573 | if (target_nid == -1) { |
3574 | /* | ||
3575 | * Account for the fault against the current node if it not | ||
3576 | * being replaced regardless of where the page is located. | ||
3577 | */ | ||
3578 | current_nid = numa_node_id(); | ||
3579 | put_page(page); | 3574 | put_page(page); |
3580 | goto out; | 3575 | goto out; |
3581 | } | 3576 | } |
@@ -3583,11 +3578,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3583 | /* Migrate to the requested node */ | 3578 | /* Migrate to the requested node */ |
3584 | migrated = migrate_misplaced_page(page, target_nid); | 3579 | migrated = migrate_misplaced_page(page, target_nid); |
3585 | if (migrated) | 3580 | if (migrated) |
3586 | current_nid = target_nid; | 3581 | page_nid = target_nid; |
3587 | 3582 | ||
3588 | out: | 3583 | out: |
3589 | if (current_nid != -1) | 3584 | if (page_nid != -1) |
3590 | task_numa_fault(current_nid, 1, migrated); | 3585 | task_numa_fault(page_nid, 1, migrated); |
3591 | return 0; | 3586 | return 0; |
3592 | } | 3587 | } |
3593 | 3588 | ||
@@ -3602,7 +3597,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3602 | unsigned long offset; | 3597 | unsigned long offset; |
3603 | spinlock_t *ptl; | 3598 | spinlock_t *ptl; |
3604 | bool numa = false; | 3599 | bool numa = false; |
3605 | int local_nid = numa_node_id(); | ||
3606 | 3600 | ||
3607 | spin_lock(&mm->page_table_lock); | 3601 | spin_lock(&mm->page_table_lock); |
3608 | pmd = *pmdp; | 3602 | pmd = *pmdp; |
@@ -3625,9 +3619,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3625 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { | 3619 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { |
3626 | pte_t pteval = *pte; | 3620 | pte_t pteval = *pte; |
3627 | struct page *page; | 3621 | struct page *page; |
3628 | int curr_nid = local_nid; | 3622 | int page_nid = -1; |
3629 | int target_nid; | 3623 | int target_nid; |
3630 | bool migrated; | 3624 | bool migrated = false; |
3625 | |||
3631 | if (!pte_present(pteval)) | 3626 | if (!pte_present(pteval)) |
3632 | continue; | 3627 | continue; |
3633 | if (!pte_numa(pteval)) | 3628 | if (!pte_numa(pteval)) |
@@ -3649,25 +3644,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3649 | if (unlikely(page_mapcount(page) != 1)) | 3644 | if (unlikely(page_mapcount(page) != 1)) |
3650 | continue; | 3645 | continue; |
3651 | 3646 | ||
3652 | /* | 3647 | page_nid = page_to_nid(page); |
3653 | * Note that the NUMA fault is later accounted to either | 3648 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
3654 | * the node that is currently running or where the page is | 3649 | pte_unmap_unlock(pte, ptl); |
3655 | * migrated to. | 3650 | if (target_nid != -1) { |
3656 | */ | 3651 | migrated = migrate_misplaced_page(page, target_nid); |
3657 | curr_nid = local_nid; | 3652 | if (migrated) |
3658 | target_nid = numa_migrate_prep(page, vma, addr, | 3653 | page_nid = target_nid; |
3659 | page_to_nid(page)); | 3654 | } else { |
3660 | if (target_nid == -1) { | ||
3661 | put_page(page); | 3655 | put_page(page); |
3662 | continue; | ||
3663 | } | 3656 | } |
3664 | 3657 | ||
3665 | /* Migrate to the requested node */ | 3658 | if (page_nid != -1) |
3666 | pte_unmap_unlock(pte, ptl); | 3659 | task_numa_fault(page_nid, 1, migrated); |
3667 | migrated = migrate_misplaced_page(page, target_nid); | ||
3668 | if (migrated) | ||
3669 | curr_nid = target_nid; | ||
3670 | task_numa_fault(curr_nid, 1, migrated); | ||
3671 | 3660 | ||
3672 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); | 3661 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); |
3673 | } | 3662 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index 7a7325ee1d08..c04692774e88 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -1715,12 +1715,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1715 | unlock_page(new_page); | 1715 | unlock_page(new_page); |
1716 | put_page(new_page); /* Free it */ | 1716 | put_page(new_page); /* Free it */ |
1717 | 1717 | ||
1718 | unlock_page(page); | 1718 | /* Retake the callers reference and putback on LRU */ |
1719 | get_page(page); | ||
1719 | putback_lru_page(page); | 1720 | putback_lru_page(page); |
1720 | 1721 | mod_zone_page_state(page_zone(page), | |
1721 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1722 | NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); |
1722 | isolated = 0; | 1723 | goto out_fail; |
1723 | goto out; | ||
1724 | } | 1724 | } |
1725 | 1725 | ||
1726 | /* | 1726 | /* |
@@ -1737,9 +1737,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1737 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | 1737 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); |
1738 | entry = pmd_mkhuge(entry); | 1738 | entry = pmd_mkhuge(entry); |
1739 | 1739 | ||
1740 | page_add_new_anon_rmap(new_page, vma, haddr); | 1740 | pmdp_clear_flush(vma, haddr, pmd); |
1741 | |||
1742 | set_pmd_at(mm, haddr, pmd, entry); | 1741 | set_pmd_at(mm, haddr, pmd, entry); |
1742 | page_add_new_anon_rmap(new_page, vma, haddr); | ||
1743 | update_mmu_cache_pmd(vma, address, &entry); | 1743 | update_mmu_cache_pmd(vma, address, &entry); |
1744 | page_remove_rmap(page); | 1744 | page_remove_rmap(page); |
1745 | /* | 1745 | /* |
@@ -1758,7 +1758,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1758 | count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); | 1758 | count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); |
1759 | count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); | 1759 | count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); |
1760 | 1760 | ||
1761 | out: | ||
1762 | mod_zone_page_state(page_zone(page), | 1761 | mod_zone_page_state(page_zone(page), |
1763 | NR_ISOLATED_ANON + page_lru, | 1762 | NR_ISOLATED_ANON + page_lru, |
1764 | -HPAGE_PMD_NR); | 1763 | -HPAGE_PMD_NR); |
@@ -1767,6 +1766,10 @@ out: | |||
1767 | out_fail: | 1766 | out_fail: |
1768 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1767 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); |
1769 | out_dropref: | 1768 | out_dropref: |
1769 | entry = pmd_mknonnuma(entry); | ||
1770 | set_pmd_at(mm, haddr, pmd, entry); | ||
1771 | update_mmu_cache_pmd(vma, address, &entry); | ||
1772 | |||
1770 | unlock_page(page); | 1773 | unlock_page(page); |
1771 | put_page(page); | 1774 | put_page(page); |
1772 | return 0; | 1775 | return 0; |
diff --git a/mm/mprotect.c b/mm/mprotect.c index a3af058f68e4..412ba2b7326a 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -148,7 +148,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | |||
148 | split_huge_page_pmd(vma, addr, pmd); | 148 | split_huge_page_pmd(vma, addr, pmd); |
149 | else if (change_huge_pmd(vma, pmd, addr, newprot, | 149 | else if (change_huge_pmd(vma, pmd, addr, newprot, |
150 | prot_numa)) { | 150 | prot_numa)) { |
151 | pages += HPAGE_PMD_NR; | 151 | pages++; |
152 | continue; | 152 | continue; |
153 | } | 153 | } |
154 | /* fall through */ | 154 | /* fall through */ |