aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/huge_memory.c70
-rw-r--r--mm/memory.c53
-rw-r--r--mm/migrate.c19
-rw-r--r--mm/mprotect.c2
4 files changed, 81 insertions, 63 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 610e3df2768a..cca80d96e509 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1278,64 +1278,90 @@ out:
1278int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, 1278int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1279 unsigned long addr, pmd_t pmd, pmd_t *pmdp) 1279 unsigned long addr, pmd_t pmd, pmd_t *pmdp)
1280{ 1280{
1281 struct anon_vma *anon_vma = NULL;
1281 struct page *page; 1282 struct page *page;
1282 unsigned long haddr = addr & HPAGE_PMD_MASK; 1283 unsigned long haddr = addr & HPAGE_PMD_MASK;
1284 int page_nid = -1, this_nid = numa_node_id();
1283 int target_nid; 1285 int target_nid;
1284 int current_nid = -1; 1286 bool page_locked;
1285 bool migrated; 1287 bool migrated = false;
1286 1288
1287 spin_lock(&mm->page_table_lock); 1289 spin_lock(&mm->page_table_lock);
1288 if (unlikely(!pmd_same(pmd, *pmdp))) 1290 if (unlikely(!pmd_same(pmd, *pmdp)))
1289 goto out_unlock; 1291 goto out_unlock;
1290 1292
1291 page = pmd_page(pmd); 1293 page = pmd_page(pmd);
1292 get_page(page); 1294 page_nid = page_to_nid(page);
1293 current_nid = page_to_nid(page);
1294 count_vm_numa_event(NUMA_HINT_FAULTS); 1295 count_vm_numa_event(NUMA_HINT_FAULTS);
1295 if (current_nid == numa_node_id()) 1296 if (page_nid == this_nid)
1296 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 1297 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
1297 1298
1299 /*
1300 * Acquire the page lock to serialise THP migrations but avoid dropping
1301 * page_table_lock if at all possible
1302 */
1303 page_locked = trylock_page(page);
1298 target_nid = mpol_misplaced(page, vma, haddr); 1304 target_nid = mpol_misplaced(page, vma, haddr);
1299 if (target_nid == -1) { 1305 if (target_nid == -1) {
1300 put_page(page); 1306 /* If the page was locked, there are no parallel migrations */
1301 goto clear_pmdnuma; 1307 if (page_locked)
1308 goto clear_pmdnuma;
1309
1310 /*
1311 * Otherwise wait for potential migrations and retry. We do
1312 * relock and check_same as the page may no longer be mapped.
1313 * As the fault is being retried, do not account for it.
1314 */
1315 spin_unlock(&mm->page_table_lock);
1316 wait_on_page_locked(page);
1317 page_nid = -1;
1318 goto out;
1302 } 1319 }
1303 1320
1304 /* Acquire the page lock to serialise THP migrations */ 1321 /* Page is misplaced, serialise migrations and parallel THP splits */
1322 get_page(page);
1305 spin_unlock(&mm->page_table_lock); 1323 spin_unlock(&mm->page_table_lock);
1306 lock_page(page); 1324 if (!page_locked)
1325 lock_page(page);
1326 anon_vma = page_lock_anon_vma_read(page);
1307 1327
1308 /* Confirm the PTE did not while locked */ 1328 /* Confirm the PTE did not while locked */
1309 spin_lock(&mm->page_table_lock); 1329 spin_lock(&mm->page_table_lock);
1310 if (unlikely(!pmd_same(pmd, *pmdp))) { 1330 if (unlikely(!pmd_same(pmd, *pmdp))) {
1311 unlock_page(page); 1331 unlock_page(page);
1312 put_page(page); 1332 put_page(page);
1333 page_nid = -1;
1313 goto out_unlock; 1334 goto out_unlock;
1314 } 1335 }
1315 spin_unlock(&mm->page_table_lock);
1316 1336
1317 /* Migrate the THP to the requested node */ 1337 /*
1338 * Migrate the THP to the requested node, returns with page unlocked
1339 * and pmd_numa cleared.
1340 */
1341 spin_unlock(&mm->page_table_lock);
1318 migrated = migrate_misplaced_transhuge_page(mm, vma, 1342 migrated = migrate_misplaced_transhuge_page(mm, vma,
1319 pmdp, pmd, addr, page, target_nid); 1343 pmdp, pmd, addr, page, target_nid);
1320 if (!migrated) 1344 if (migrated)
1321 goto check_same; 1345 page_nid = target_nid;
1322
1323 task_numa_fault(target_nid, HPAGE_PMD_NR, true);
1324 return 0;
1325 1346
1326check_same: 1347 goto out;
1327 spin_lock(&mm->page_table_lock);
1328 if (unlikely(!pmd_same(pmd, *pmdp)))
1329 goto out_unlock;
1330clear_pmdnuma: 1348clear_pmdnuma:
1349 BUG_ON(!PageLocked(page));
1331 pmd = pmd_mknonnuma(pmd); 1350 pmd = pmd_mknonnuma(pmd);
1332 set_pmd_at(mm, haddr, pmdp, pmd); 1351 set_pmd_at(mm, haddr, pmdp, pmd);
1333 VM_BUG_ON(pmd_numa(*pmdp)); 1352 VM_BUG_ON(pmd_numa(*pmdp));
1334 update_mmu_cache_pmd(vma, addr, pmdp); 1353 update_mmu_cache_pmd(vma, addr, pmdp);
1354 unlock_page(page);
1335out_unlock: 1355out_unlock:
1336 spin_unlock(&mm->page_table_lock); 1356 spin_unlock(&mm->page_table_lock);
1337 if (current_nid != -1) 1357
1338 task_numa_fault(current_nid, HPAGE_PMD_NR, false); 1358out:
1359 if (anon_vma)
1360 page_unlock_anon_vma_read(anon_vma);
1361
1362 if (page_nid != -1)
1363 task_numa_fault(page_nid, HPAGE_PMD_NR, migrated);
1364
1339 return 0; 1365 return 0;
1340} 1366}
1341 1367
diff --git a/mm/memory.c b/mm/memory.c
index 1311f26497e6..d176154c243f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3521,12 +3521,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3521} 3521}
3522 3522
3523int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, 3523int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
3524 unsigned long addr, int current_nid) 3524 unsigned long addr, int page_nid)
3525{ 3525{
3526 get_page(page); 3526 get_page(page);
3527 3527
3528 count_vm_numa_event(NUMA_HINT_FAULTS); 3528 count_vm_numa_event(NUMA_HINT_FAULTS);
3529 if (current_nid == numa_node_id()) 3529 if (page_nid == numa_node_id())
3530 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 3530 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
3531 3531
3532 return mpol_misplaced(page, vma, addr); 3532 return mpol_misplaced(page, vma, addr);
@@ -3537,7 +3537,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3537{ 3537{
3538 struct page *page = NULL; 3538 struct page *page = NULL;
3539 spinlock_t *ptl; 3539 spinlock_t *ptl;
3540 int current_nid = -1; 3540 int page_nid = -1;
3541 int target_nid; 3541 int target_nid;
3542 bool migrated = false; 3542 bool migrated = false;
3543 3543
@@ -3567,15 +3567,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3567 return 0; 3567 return 0;
3568 } 3568 }
3569 3569
3570 current_nid = page_to_nid(page); 3570 page_nid = page_to_nid(page);
3571 target_nid = numa_migrate_prep(page, vma, addr, current_nid); 3571 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3572 pte_unmap_unlock(ptep, ptl); 3572 pte_unmap_unlock(ptep, ptl);
3573 if (target_nid == -1) { 3573 if (target_nid == -1) {
3574 /*
3575 * Account for the fault against the current node if it not
3576 * being replaced regardless of where the page is located.
3577 */
3578 current_nid = numa_node_id();
3579 put_page(page); 3574 put_page(page);
3580 goto out; 3575 goto out;
3581 } 3576 }
@@ -3583,11 +3578,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3583 /* Migrate to the requested node */ 3578 /* Migrate to the requested node */
3584 migrated = migrate_misplaced_page(page, target_nid); 3579 migrated = migrate_misplaced_page(page, target_nid);
3585 if (migrated) 3580 if (migrated)
3586 current_nid = target_nid; 3581 page_nid = target_nid;
3587 3582
3588out: 3583out:
3589 if (current_nid != -1) 3584 if (page_nid != -1)
3590 task_numa_fault(current_nid, 1, migrated); 3585 task_numa_fault(page_nid, 1, migrated);
3591 return 0; 3586 return 0;
3592} 3587}
3593 3588
@@ -3602,7 +3597,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3602 unsigned long offset; 3597 unsigned long offset;
3603 spinlock_t *ptl; 3598 spinlock_t *ptl;
3604 bool numa = false; 3599 bool numa = false;
3605 int local_nid = numa_node_id();
3606 3600
3607 spin_lock(&mm->page_table_lock); 3601 spin_lock(&mm->page_table_lock);
3608 pmd = *pmdp; 3602 pmd = *pmdp;
@@ -3625,9 +3619,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3625 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { 3619 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
3626 pte_t pteval = *pte; 3620 pte_t pteval = *pte;
3627 struct page *page; 3621 struct page *page;
3628 int curr_nid = local_nid; 3622 int page_nid = -1;
3629 int target_nid; 3623 int target_nid;
3630 bool migrated; 3624 bool migrated = false;
3625
3631 if (!pte_present(pteval)) 3626 if (!pte_present(pteval))
3632 continue; 3627 continue;
3633 if (!pte_numa(pteval)) 3628 if (!pte_numa(pteval))
@@ -3649,25 +3644,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3649 if (unlikely(page_mapcount(page) != 1)) 3644 if (unlikely(page_mapcount(page) != 1))
3650 continue; 3645 continue;
3651 3646
3652 /* 3647 page_nid = page_to_nid(page);
3653 * Note that the NUMA fault is later accounted to either 3648 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3654 * the node that is currently running or where the page is 3649 pte_unmap_unlock(pte, ptl);
3655 * migrated to. 3650 if (target_nid != -1) {
3656 */ 3651 migrated = migrate_misplaced_page(page, target_nid);
3657 curr_nid = local_nid; 3652 if (migrated)
3658 target_nid = numa_migrate_prep(page, vma, addr, 3653 page_nid = target_nid;
3659 page_to_nid(page)); 3654 } else {
3660 if (target_nid == -1) {
3661 put_page(page); 3655 put_page(page);
3662 continue;
3663 } 3656 }
3664 3657
3665 /* Migrate to the requested node */ 3658 if (page_nid != -1)
3666 pte_unmap_unlock(pte, ptl); 3659 task_numa_fault(page_nid, 1, migrated);
3667 migrated = migrate_misplaced_page(page, target_nid);
3668 if (migrated)
3669 curr_nid = target_nid;
3670 task_numa_fault(curr_nid, 1, migrated);
3671 3660
3672 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); 3661 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
3673 } 3662 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 7a7325ee1d08..c04692774e88 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1715,12 +1715,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1715 unlock_page(new_page); 1715 unlock_page(new_page);
1716 put_page(new_page); /* Free it */ 1716 put_page(new_page); /* Free it */
1717 1717
1718 unlock_page(page); 1718 /* Retake the callers reference and putback on LRU */
1719 get_page(page);
1719 putback_lru_page(page); 1720 putback_lru_page(page);
1720 1721 mod_zone_page_state(page_zone(page),
1721 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1722 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
1722 isolated = 0; 1723 goto out_fail;
1723 goto out;
1724 } 1724 }
1725 1725
1726 /* 1726 /*
@@ -1737,9 +1737,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1737 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); 1737 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1738 entry = pmd_mkhuge(entry); 1738 entry = pmd_mkhuge(entry);
1739 1739
1740 page_add_new_anon_rmap(new_page, vma, haddr); 1740 pmdp_clear_flush(vma, haddr, pmd);
1741
1742 set_pmd_at(mm, haddr, pmd, entry); 1741 set_pmd_at(mm, haddr, pmd, entry);
1742 page_add_new_anon_rmap(new_page, vma, haddr);
1743 update_mmu_cache_pmd(vma, address, &entry); 1743 update_mmu_cache_pmd(vma, address, &entry);
1744 page_remove_rmap(page); 1744 page_remove_rmap(page);
1745 /* 1745 /*
@@ -1758,7 +1758,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1758 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); 1758 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
1759 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); 1759 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
1760 1760
1761out:
1762 mod_zone_page_state(page_zone(page), 1761 mod_zone_page_state(page_zone(page),
1763 NR_ISOLATED_ANON + page_lru, 1762 NR_ISOLATED_ANON + page_lru,
1764 -HPAGE_PMD_NR); 1763 -HPAGE_PMD_NR);
@@ -1767,6 +1766,10 @@ out:
1767out_fail: 1766out_fail:
1768 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 1767 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
1769out_dropref: 1768out_dropref:
1769 entry = pmd_mknonnuma(entry);
1770 set_pmd_at(mm, haddr, pmd, entry);
1771 update_mmu_cache_pmd(vma, address, &entry);
1772
1770 unlock_page(page); 1773 unlock_page(page);
1771 put_page(page); 1774 put_page(page);
1772 return 0; 1775 return 0;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index a3af058f68e4..412ba2b7326a 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -148,7 +148,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
148 split_huge_page_pmd(vma, addr, pmd); 148 split_huge_page_pmd(vma, addr, pmd);
149 else if (change_huge_pmd(vma, pmd, addr, newprot, 149 else if (change_huge_pmd(vma, pmd, addr, newprot,
150 prot_numa)) { 150 prot_numa)) {
151 pages += HPAGE_PMD_NR; 151 pages++;
152 continue; 152 continue;
153 } 153 }
154 /* fall through */ 154 /* fall through */