aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-10-07 06:28:45 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-11-12 22:05:34 -0500
commit174dfa40d63b2250e299d5376937d200e4662b7c (patch)
tree8e8eb3fdaa8eb818be95938f3591fedfae6b6cf6
parent299723f2297726ca4c0a9d8ef3548ece5466431d (diff)
mm: numa: Sanitize task_numa_fault() callsites
commit c61109e34f60f6e85bb43c5a1cd51c0e3db40847 upstream. There are three callers of task_numa_fault(): - do_huge_pmd_numa_page(): Accounts against the current node, not the node where the page resides, unless we migrated, in which case it accounts against the node we migrated to. - do_numa_page(): Accounts against the current node, not the node where the page resides, unless we migrated, in which case it accounts against the node we migrated to. - do_pmd_numa_page(): Accounts not at all when the page isn't migrated, otherwise accounts against the node we migrated towards. This seems wrong to me; all three sites should have the same sementaics, furthermore we should accounts against where the page really is, we already know where the task is. So modify all three sites to always account; we did after all receive the fault; and always account to where the page is after migration, regardless of success. They all still differ on when they clear the PTE/PMD; ideally that would get sorted too. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1381141781-10992-8-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--mm/huge_memory.c25
-rw-r--r--mm/memory.c53
2 files changed, 34 insertions, 44 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 64f8aadb0595..f276efe68c82 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1291,18 +1291,19 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1291 struct anon_vma *anon_vma = NULL; 1291 struct anon_vma *anon_vma = NULL;
1292 struct page *page; 1292 struct page *page;
1293 unsigned long haddr = addr & HPAGE_PMD_MASK; 1293 unsigned long haddr = addr & HPAGE_PMD_MASK;
1294 int page_nid = -1, this_nid = numa_node_id();
1294 int target_nid; 1295 int target_nid;
1295 int current_nid = -1; 1296 bool page_locked;
1296 bool migrated, page_locked; 1297 bool migrated = false;
1297 1298
1298 spin_lock(&mm->page_table_lock); 1299 spin_lock(&mm->page_table_lock);
1299 if (unlikely(!pmd_same(pmd, *pmdp))) 1300 if (unlikely(!pmd_same(pmd, *pmdp)))
1300 goto out_unlock; 1301 goto out_unlock;
1301 1302
1302 page = pmd_page(pmd); 1303 page = pmd_page(pmd);
1303 current_nid = page_to_nid(page); 1304 page_nid = page_to_nid(page);
1304 count_vm_numa_event(NUMA_HINT_FAULTS); 1305 count_vm_numa_event(NUMA_HINT_FAULTS);
1305 if (current_nid == numa_node_id()) 1306 if (page_nid == this_nid)
1306 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 1307 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
1307 1308
1308 /* 1309 /*
@@ -1345,19 +1346,18 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1345 spin_unlock(&mm->page_table_lock); 1346 spin_unlock(&mm->page_table_lock);
1346 migrated = migrate_misplaced_transhuge_page(mm, vma, 1347 migrated = migrate_misplaced_transhuge_page(mm, vma,
1347 pmdp, pmd, addr, page, target_nid); 1348 pmdp, pmd, addr, page, target_nid);
1348 if (!migrated) 1349 if (migrated)
1350 page_nid = target_nid;
1351 else
1349 goto check_same; 1352 goto check_same;
1350 1353
1351 task_numa_fault(target_nid, HPAGE_PMD_NR, true); 1354 goto out;
1352 if (anon_vma)
1353 page_unlock_anon_vma_read(anon_vma);
1354 return 0;
1355 1355
1356check_same: 1356check_same:
1357 spin_lock(&mm->page_table_lock); 1357 spin_lock(&mm->page_table_lock);
1358 if (unlikely(!pmd_same(pmd, *pmdp))) { 1358 if (unlikely(!pmd_same(pmd, *pmdp))) {
1359 /* Someone else took our fault */ 1359 /* Someone else took our fault */
1360 current_nid = -1; 1360 page_nid = -1;
1361 goto out_unlock; 1361 goto out_unlock;
1362 } 1362 }
1363clear_pmdnuma: 1363clear_pmdnuma:
@@ -1372,8 +1372,9 @@ out:
1372 if (anon_vma) 1372 if (anon_vma)
1373 page_unlock_anon_vma_read(anon_vma); 1373 page_unlock_anon_vma_read(anon_vma);
1374 1374
1375 if (current_nid != -1) 1375 if (page_nid != -1)
1376 task_numa_fault(current_nid, HPAGE_PMD_NR, false); 1376 task_numa_fault(page_nid, HPAGE_PMD_NR, migrated);
1377
1377 return 0; 1378 return 0;
1378} 1379}
1379 1380
diff --git a/mm/memory.c b/mm/memory.c
index 0a6f3d16747d..4b60011907d7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3525,12 +3525,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3525} 3525}
3526 3526
3527int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, 3527int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
3528 unsigned long addr, int current_nid) 3528 unsigned long addr, int page_nid)
3529{ 3529{
3530 get_page(page); 3530 get_page(page);
3531 3531
3532 count_vm_numa_event(NUMA_HINT_FAULTS); 3532 count_vm_numa_event(NUMA_HINT_FAULTS);
3533 if (current_nid == numa_node_id()) 3533 if (page_nid == numa_node_id())
3534 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 3534 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
3535 3535
3536 return mpol_misplaced(page, vma, addr); 3536 return mpol_misplaced(page, vma, addr);
@@ -3541,7 +3541,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3541{ 3541{
3542 struct page *page = NULL; 3542 struct page *page = NULL;
3543 spinlock_t *ptl; 3543 spinlock_t *ptl;
3544 int current_nid = -1; 3544 int page_nid = -1;
3545 int target_nid; 3545 int target_nid;
3546 bool migrated = false; 3546 bool migrated = false;
3547 3547
@@ -3571,15 +3571,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3571 return 0; 3571 return 0;
3572 } 3572 }
3573 3573
3574 current_nid = page_to_nid(page); 3574 page_nid = page_to_nid(page);
3575 target_nid = numa_migrate_prep(page, vma, addr, current_nid); 3575 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3576 pte_unmap_unlock(ptep, ptl); 3576 pte_unmap_unlock(ptep, ptl);
3577 if (target_nid == -1) { 3577 if (target_nid == -1) {
3578 /*
3579 * Account for the fault against the current node if it not
3580 * being replaced regardless of where the page is located.
3581 */
3582 current_nid = numa_node_id();
3583 put_page(page); 3578 put_page(page);
3584 goto out; 3579 goto out;
3585 } 3580 }
@@ -3587,11 +3582,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3587 /* Migrate to the requested node */ 3582 /* Migrate to the requested node */
3588 migrated = migrate_misplaced_page(page, target_nid); 3583 migrated = migrate_misplaced_page(page, target_nid);
3589 if (migrated) 3584 if (migrated)
3590 current_nid = target_nid; 3585 page_nid = target_nid;
3591 3586
3592out: 3587out:
3593 if (current_nid != -1) 3588 if (page_nid != -1)
3594 task_numa_fault(current_nid, 1, migrated); 3589 task_numa_fault(page_nid, 1, migrated);
3595 return 0; 3590 return 0;
3596} 3591}
3597 3592
@@ -3606,7 +3601,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3606 unsigned long offset; 3601 unsigned long offset;
3607 spinlock_t *ptl; 3602 spinlock_t *ptl;
3608 bool numa = false; 3603 bool numa = false;
3609 int local_nid = numa_node_id();
3610 3604
3611 spin_lock(&mm->page_table_lock); 3605 spin_lock(&mm->page_table_lock);
3612 pmd = *pmdp; 3606 pmd = *pmdp;
@@ -3629,9 +3623,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3629 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { 3623 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
3630 pte_t pteval = *pte; 3624 pte_t pteval = *pte;
3631 struct page *page; 3625 struct page *page;
3632 int curr_nid = local_nid; 3626 int page_nid = -1;
3633 int target_nid; 3627 int target_nid;
3634 bool migrated; 3628 bool migrated = false;
3629
3635 if (!pte_present(pteval)) 3630 if (!pte_present(pteval))
3636 continue; 3631 continue;
3637 if (!pte_numa(pteval)) 3632 if (!pte_numa(pteval))
@@ -3653,25 +3648,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3653 if (unlikely(page_mapcount(page) != 1)) 3648 if (unlikely(page_mapcount(page) != 1))
3654 continue; 3649 continue;
3655 3650
3656 /* 3651 page_nid = page_to_nid(page);
3657 * Note that the NUMA fault is later accounted to either 3652 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3658 * the node that is currently running or where the page is 3653 pte_unmap_unlock(pte, ptl);
3659 * migrated to. 3654 if (target_nid != -1) {
3660 */ 3655 migrated = migrate_misplaced_page(page, target_nid);
3661 curr_nid = local_nid; 3656 if (migrated)
3662 target_nid = numa_migrate_prep(page, vma, addr, 3657 page_nid = target_nid;
3663 page_to_nid(page)); 3658 } else {
3664 if (target_nid == -1) {
3665 put_page(page); 3659 put_page(page);
3666 continue;
3667 } 3660 }
3668 3661
3669 /* Migrate to the requested node */ 3662 if (page_nid != -1)
3670 pte_unmap_unlock(pte, ptl); 3663 task_numa_fault(page_nid, 1, migrated);
3671 migrated = migrate_misplaced_page(page, target_nid);
3672 if (migrated)
3673 curr_nid = target_nid;
3674 task_numa_fault(curr_nid, 1, migrated);
3675 3664
3676 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); 3665 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
3677 } 3666 }