diff options
| -rw-r--r-- | mm/huge_memory.c | 70 | ||||
| -rw-r--r-- | mm/memory.c | 53 | ||||
| -rw-r--r-- | mm/migrate.c | 19 | ||||
| -rw-r--r-- | mm/mprotect.c | 2 |
4 files changed, 81 insertions, 63 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 610e3df2768a..cca80d96e509 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -1278,64 +1278,90 @@ out: | |||
| 1278 | int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | 1278 | int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, |
| 1279 | unsigned long addr, pmd_t pmd, pmd_t *pmdp) | 1279 | unsigned long addr, pmd_t pmd, pmd_t *pmdp) |
| 1280 | { | 1280 | { |
| 1281 | struct anon_vma *anon_vma = NULL; | ||
| 1281 | struct page *page; | 1282 | struct page *page; |
| 1282 | unsigned long haddr = addr & HPAGE_PMD_MASK; | 1283 | unsigned long haddr = addr & HPAGE_PMD_MASK; |
| 1284 | int page_nid = -1, this_nid = numa_node_id(); | ||
| 1283 | int target_nid; | 1285 | int target_nid; |
| 1284 | int current_nid = -1; | 1286 | bool page_locked; |
| 1285 | bool migrated; | 1287 | bool migrated = false; |
| 1286 | 1288 | ||
| 1287 | spin_lock(&mm->page_table_lock); | 1289 | spin_lock(&mm->page_table_lock); |
| 1288 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1290 | if (unlikely(!pmd_same(pmd, *pmdp))) |
| 1289 | goto out_unlock; | 1291 | goto out_unlock; |
| 1290 | 1292 | ||
| 1291 | page = pmd_page(pmd); | 1293 | page = pmd_page(pmd); |
| 1292 | get_page(page); | 1294 | page_nid = page_to_nid(page); |
| 1293 | current_nid = page_to_nid(page); | ||
| 1294 | count_vm_numa_event(NUMA_HINT_FAULTS); | 1295 | count_vm_numa_event(NUMA_HINT_FAULTS); |
| 1295 | if (current_nid == numa_node_id()) | 1296 | if (page_nid == this_nid) |
| 1296 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | 1297 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); |
| 1297 | 1298 | ||
| 1299 | /* | ||
| 1300 | * Acquire the page lock to serialise THP migrations but avoid dropping | ||
| 1301 | * page_table_lock if at all possible | ||
| 1302 | */ | ||
| 1303 | page_locked = trylock_page(page); | ||
| 1298 | target_nid = mpol_misplaced(page, vma, haddr); | 1304 | target_nid = mpol_misplaced(page, vma, haddr); |
| 1299 | if (target_nid == -1) { | 1305 | if (target_nid == -1) { |
| 1300 | put_page(page); | 1306 | /* If the page was locked, there are no parallel migrations */ |
| 1301 | goto clear_pmdnuma; | 1307 | if (page_locked) |
| 1308 | goto clear_pmdnuma; | ||
| 1309 | |||
| 1310 | /* | ||
| 1311 | * Otherwise wait for potential migrations and retry. We do | ||
| 1312 | * relock and check_same as the page may no longer be mapped. | ||
| 1313 | * As the fault is being retried, do not account for it. | ||
| 1314 | */ | ||
| 1315 | spin_unlock(&mm->page_table_lock); | ||
| 1316 | wait_on_page_locked(page); | ||
| 1317 | page_nid = -1; | ||
| 1318 | goto out; | ||
| 1302 | } | 1319 | } |
| 1303 | 1320 | ||
| 1304 | /* Acquire the page lock to serialise THP migrations */ | 1321 | /* Page is misplaced, serialise migrations and parallel THP splits */ |
| 1322 | get_page(page); | ||
| 1305 | spin_unlock(&mm->page_table_lock); | 1323 | spin_unlock(&mm->page_table_lock); |
| 1306 | lock_page(page); | 1324 | if (!page_locked) |
| 1325 | lock_page(page); | ||
| 1326 | anon_vma = page_lock_anon_vma_read(page); | ||
| 1307 | 1327 | ||
| 1308 | /* Confirm the PTE did not while locked */ | 1328 | /* Confirm the PTE did not while locked */ |
| 1309 | spin_lock(&mm->page_table_lock); | 1329 | spin_lock(&mm->page_table_lock); |
| 1310 | if (unlikely(!pmd_same(pmd, *pmdp))) { | 1330 | if (unlikely(!pmd_same(pmd, *pmdp))) { |
| 1311 | unlock_page(page); | 1331 | unlock_page(page); |
| 1312 | put_page(page); | 1332 | put_page(page); |
| 1333 | page_nid = -1; | ||
| 1313 | goto out_unlock; | 1334 | goto out_unlock; |
| 1314 | } | 1335 | } |
| 1315 | spin_unlock(&mm->page_table_lock); | ||
| 1316 | 1336 | ||
| 1317 | /* Migrate the THP to the requested node */ | 1337 | /* |
| 1338 | * Migrate the THP to the requested node, returns with page unlocked | ||
| 1339 | * and pmd_numa cleared. | ||
| 1340 | */ | ||
| 1341 | spin_unlock(&mm->page_table_lock); | ||
| 1318 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1342 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
| 1319 | pmdp, pmd, addr, page, target_nid); | 1343 | pmdp, pmd, addr, page, target_nid); |
| 1320 | if (!migrated) | 1344 | if (migrated) |
| 1321 | goto check_same; | 1345 | page_nid = target_nid; |
| 1322 | |||
| 1323 | task_numa_fault(target_nid, HPAGE_PMD_NR, true); | ||
| 1324 | return 0; | ||
| 1325 | 1346 | ||
| 1326 | check_same: | 1347 | goto out; |
| 1327 | spin_lock(&mm->page_table_lock); | ||
| 1328 | if (unlikely(!pmd_same(pmd, *pmdp))) | ||
| 1329 | goto out_unlock; | ||
| 1330 | clear_pmdnuma: | 1348 | clear_pmdnuma: |
| 1349 | BUG_ON(!PageLocked(page)); | ||
| 1331 | pmd = pmd_mknonnuma(pmd); | 1350 | pmd = pmd_mknonnuma(pmd); |
| 1332 | set_pmd_at(mm, haddr, pmdp, pmd); | 1351 | set_pmd_at(mm, haddr, pmdp, pmd); |
| 1333 | VM_BUG_ON(pmd_numa(*pmdp)); | 1352 | VM_BUG_ON(pmd_numa(*pmdp)); |
| 1334 | update_mmu_cache_pmd(vma, addr, pmdp); | 1353 | update_mmu_cache_pmd(vma, addr, pmdp); |
| 1354 | unlock_page(page); | ||
| 1335 | out_unlock: | 1355 | out_unlock: |
| 1336 | spin_unlock(&mm->page_table_lock); | 1356 | spin_unlock(&mm->page_table_lock); |
| 1337 | if (current_nid != -1) | 1357 | |
| 1338 | task_numa_fault(current_nid, HPAGE_PMD_NR, false); | 1358 | out: |
| 1359 | if (anon_vma) | ||
| 1360 | page_unlock_anon_vma_read(anon_vma); | ||
| 1361 | |||
| 1362 | if (page_nid != -1) | ||
| 1363 | task_numa_fault(page_nid, HPAGE_PMD_NR, migrated); | ||
| 1364 | |||
| 1339 | return 0; | 1365 | return 0; |
| 1340 | } | 1366 | } |
| 1341 | 1367 | ||
diff --git a/mm/memory.c b/mm/memory.c index 1311f26497e6..d176154c243f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -3521,12 +3521,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3521 | } | 3521 | } |
| 3522 | 3522 | ||
| 3523 | int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, | 3523 | int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, |
| 3524 | unsigned long addr, int current_nid) | 3524 | unsigned long addr, int page_nid) |
| 3525 | { | 3525 | { |
| 3526 | get_page(page); | 3526 | get_page(page); |
| 3527 | 3527 | ||
| 3528 | count_vm_numa_event(NUMA_HINT_FAULTS); | 3528 | count_vm_numa_event(NUMA_HINT_FAULTS); |
| 3529 | if (current_nid == numa_node_id()) | 3529 | if (page_nid == numa_node_id()) |
| 3530 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | 3530 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); |
| 3531 | 3531 | ||
| 3532 | return mpol_misplaced(page, vma, addr); | 3532 | return mpol_misplaced(page, vma, addr); |
| @@ -3537,7 +3537,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3537 | { | 3537 | { |
| 3538 | struct page *page = NULL; | 3538 | struct page *page = NULL; |
| 3539 | spinlock_t *ptl; | 3539 | spinlock_t *ptl; |
| 3540 | int current_nid = -1; | 3540 | int page_nid = -1; |
| 3541 | int target_nid; | 3541 | int target_nid; |
| 3542 | bool migrated = false; | 3542 | bool migrated = false; |
| 3543 | 3543 | ||
| @@ -3567,15 +3567,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3567 | return 0; | 3567 | return 0; |
| 3568 | } | 3568 | } |
| 3569 | 3569 | ||
| 3570 | current_nid = page_to_nid(page); | 3570 | page_nid = page_to_nid(page); |
| 3571 | target_nid = numa_migrate_prep(page, vma, addr, current_nid); | 3571 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
| 3572 | pte_unmap_unlock(ptep, ptl); | 3572 | pte_unmap_unlock(ptep, ptl); |
| 3573 | if (target_nid == -1) { | 3573 | if (target_nid == -1) { |
| 3574 | /* | ||
| 3575 | * Account for the fault against the current node if it not | ||
| 3576 | * being replaced regardless of where the page is located. | ||
| 3577 | */ | ||
| 3578 | current_nid = numa_node_id(); | ||
| 3579 | put_page(page); | 3574 | put_page(page); |
| 3580 | goto out; | 3575 | goto out; |
| 3581 | } | 3576 | } |
| @@ -3583,11 +3578,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3583 | /* Migrate to the requested node */ | 3578 | /* Migrate to the requested node */ |
| 3584 | migrated = migrate_misplaced_page(page, target_nid); | 3579 | migrated = migrate_misplaced_page(page, target_nid); |
| 3585 | if (migrated) | 3580 | if (migrated) |
| 3586 | current_nid = target_nid; | 3581 | page_nid = target_nid; |
| 3587 | 3582 | ||
| 3588 | out: | 3583 | out: |
| 3589 | if (current_nid != -1) | 3584 | if (page_nid != -1) |
| 3590 | task_numa_fault(current_nid, 1, migrated); | 3585 | task_numa_fault(page_nid, 1, migrated); |
| 3591 | return 0; | 3586 | return 0; |
| 3592 | } | 3587 | } |
| 3593 | 3588 | ||
| @@ -3602,7 +3597,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3602 | unsigned long offset; | 3597 | unsigned long offset; |
| 3603 | spinlock_t *ptl; | 3598 | spinlock_t *ptl; |
| 3604 | bool numa = false; | 3599 | bool numa = false; |
| 3605 | int local_nid = numa_node_id(); | ||
| 3606 | 3600 | ||
| 3607 | spin_lock(&mm->page_table_lock); | 3601 | spin_lock(&mm->page_table_lock); |
| 3608 | pmd = *pmdp; | 3602 | pmd = *pmdp; |
| @@ -3625,9 +3619,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3625 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { | 3619 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { |
| 3626 | pte_t pteval = *pte; | 3620 | pte_t pteval = *pte; |
| 3627 | struct page *page; | 3621 | struct page *page; |
| 3628 | int curr_nid = local_nid; | 3622 | int page_nid = -1; |
| 3629 | int target_nid; | 3623 | int target_nid; |
| 3630 | bool migrated; | 3624 | bool migrated = false; |
| 3625 | |||
| 3631 | if (!pte_present(pteval)) | 3626 | if (!pte_present(pteval)) |
| 3632 | continue; | 3627 | continue; |
| 3633 | if (!pte_numa(pteval)) | 3628 | if (!pte_numa(pteval)) |
| @@ -3649,25 +3644,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3649 | if (unlikely(page_mapcount(page) != 1)) | 3644 | if (unlikely(page_mapcount(page) != 1)) |
| 3650 | continue; | 3645 | continue; |
| 3651 | 3646 | ||
| 3652 | /* | 3647 | page_nid = page_to_nid(page); |
| 3653 | * Note that the NUMA fault is later accounted to either | 3648 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
| 3654 | * the node that is currently running or where the page is | 3649 | pte_unmap_unlock(pte, ptl); |
| 3655 | * migrated to. | 3650 | if (target_nid != -1) { |
| 3656 | */ | 3651 | migrated = migrate_misplaced_page(page, target_nid); |
| 3657 | curr_nid = local_nid; | 3652 | if (migrated) |
| 3658 | target_nid = numa_migrate_prep(page, vma, addr, | 3653 | page_nid = target_nid; |
| 3659 | page_to_nid(page)); | 3654 | } else { |
| 3660 | if (target_nid == -1) { | ||
| 3661 | put_page(page); | 3655 | put_page(page); |
| 3662 | continue; | ||
| 3663 | } | 3656 | } |
| 3664 | 3657 | ||
| 3665 | /* Migrate to the requested node */ | 3658 | if (page_nid != -1) |
| 3666 | pte_unmap_unlock(pte, ptl); | 3659 | task_numa_fault(page_nid, 1, migrated); |
| 3667 | migrated = migrate_misplaced_page(page, target_nid); | ||
| 3668 | if (migrated) | ||
| 3669 | curr_nid = target_nid; | ||
| 3670 | task_numa_fault(curr_nid, 1, migrated); | ||
| 3671 | 3660 | ||
| 3672 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); | 3661 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); |
| 3673 | } | 3662 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index 7a7325ee1d08..c04692774e88 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -1715,12 +1715,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
| 1715 | unlock_page(new_page); | 1715 | unlock_page(new_page); |
| 1716 | put_page(new_page); /* Free it */ | 1716 | put_page(new_page); /* Free it */ |
| 1717 | 1717 | ||
| 1718 | unlock_page(page); | 1718 | /* Retake the callers reference and putback on LRU */ |
| 1719 | get_page(page); | ||
| 1719 | putback_lru_page(page); | 1720 | putback_lru_page(page); |
| 1720 | 1721 | mod_zone_page_state(page_zone(page), | |
| 1721 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1722 | NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); |
| 1722 | isolated = 0; | 1723 | goto out_fail; |
| 1723 | goto out; | ||
| 1724 | } | 1724 | } |
| 1725 | 1725 | ||
| 1726 | /* | 1726 | /* |
| @@ -1737,9 +1737,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
| 1737 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | 1737 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); |
| 1738 | entry = pmd_mkhuge(entry); | 1738 | entry = pmd_mkhuge(entry); |
| 1739 | 1739 | ||
| 1740 | page_add_new_anon_rmap(new_page, vma, haddr); | 1740 | pmdp_clear_flush(vma, haddr, pmd); |
| 1741 | |||
| 1742 | set_pmd_at(mm, haddr, pmd, entry); | 1741 | set_pmd_at(mm, haddr, pmd, entry); |
| 1742 | page_add_new_anon_rmap(new_page, vma, haddr); | ||
| 1743 | update_mmu_cache_pmd(vma, address, &entry); | 1743 | update_mmu_cache_pmd(vma, address, &entry); |
| 1744 | page_remove_rmap(page); | 1744 | page_remove_rmap(page); |
| 1745 | /* | 1745 | /* |
| @@ -1758,7 +1758,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
| 1758 | count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); | 1758 | count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); |
| 1759 | count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); | 1759 | count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); |
| 1760 | 1760 | ||
| 1761 | out: | ||
| 1762 | mod_zone_page_state(page_zone(page), | 1761 | mod_zone_page_state(page_zone(page), |
| 1763 | NR_ISOLATED_ANON + page_lru, | 1762 | NR_ISOLATED_ANON + page_lru, |
| 1764 | -HPAGE_PMD_NR); | 1763 | -HPAGE_PMD_NR); |
| @@ -1767,6 +1766,10 @@ out: | |||
| 1767 | out_fail: | 1766 | out_fail: |
| 1768 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); | 1767 | count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); |
| 1769 | out_dropref: | 1768 | out_dropref: |
| 1769 | entry = pmd_mknonnuma(entry); | ||
| 1770 | set_pmd_at(mm, haddr, pmd, entry); | ||
| 1771 | update_mmu_cache_pmd(vma, address, &entry); | ||
| 1772 | |||
| 1770 | unlock_page(page); | 1773 | unlock_page(page); |
| 1771 | put_page(page); | 1774 | put_page(page); |
| 1772 | return 0; | 1775 | return 0; |
diff --git a/mm/mprotect.c b/mm/mprotect.c index a3af058f68e4..412ba2b7326a 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
| @@ -148,7 +148,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | |||
| 148 | split_huge_page_pmd(vma, addr, pmd); | 148 | split_huge_page_pmd(vma, addr, pmd); |
| 149 | else if (change_huge_pmd(vma, pmd, addr, newprot, | 149 | else if (change_huge_pmd(vma, pmd, addr, newprot, |
| 150 | prot_numa)) { | 150 | prot_numa)) { |
| 151 | pages += HPAGE_PMD_NR; | 151 | pages++; |
| 152 | continue; | 152 | continue; |
| 153 | } | 153 | } |
| 154 | /* fall through */ | 154 | /* fall through */ |
