aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-10-07 06:28:45 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-29 06:37:52 -0400
commitc61109e34f60f6e85bb43c5a1cd51c0e3db40847 (patch)
tree5b03f4c2d1376166da7e4a507ddf7a4c6e16eee7 /mm
parent587fe586f44a48f9691001ba6c45b86c8e4ba21f (diff)
mm: numa: Sanitize task_numa_fault() callsites
There are three callers of task_numa_fault(): - do_huge_pmd_numa_page(): Accounts against the current node, not the node where the page resides, unless we migrated, in which case it accounts against the node we migrated to. - do_numa_page(): Accounts against the current node, not the node where the page resides, unless we migrated, in which case it accounts against the node we migrated to. - do_pmd_numa_page(): Accounts not at all when the page isn't migrated, otherwise accounts against the node we migrated towards. This seems wrong to me; all three sites should have the same sementaics, furthermore we should accounts against where the page really is, we already know where the task is. So modify all three sites to always account; we did after all receive the fault; and always account to where the page is after migration, regardless of success. They all still differ on when they clear the PTE/PMD; ideally that would get sorted too. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: <stable@kernel.org> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1381141781-10992-8-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c25
-rw-r--r--mm/memory.c53
2 files changed, 34 insertions, 44 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d8534b3630e4..00ddfcdd810e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1281,18 +1281,19 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1281 struct anon_vma *anon_vma = NULL; 1281 struct anon_vma *anon_vma = NULL;
1282 struct page *page; 1282 struct page *page;
1283 unsigned long haddr = addr & HPAGE_PMD_MASK; 1283 unsigned long haddr = addr & HPAGE_PMD_MASK;
1284 int page_nid = -1, this_nid = numa_node_id();
1284 int target_nid; 1285 int target_nid;
1285 int current_nid = -1; 1286 bool page_locked;
1286 bool migrated, page_locked; 1287 bool migrated = false;
1287 1288
1288 spin_lock(&mm->page_table_lock); 1289 spin_lock(&mm->page_table_lock);
1289 if (unlikely(!pmd_same(pmd, *pmdp))) 1290 if (unlikely(!pmd_same(pmd, *pmdp)))
1290 goto out_unlock; 1291 goto out_unlock;
1291 1292
1292 page = pmd_page(pmd); 1293 page = pmd_page(pmd);
1293 current_nid = page_to_nid(page); 1294 page_nid = page_to_nid(page);
1294 count_vm_numa_event(NUMA_HINT_FAULTS); 1295 count_vm_numa_event(NUMA_HINT_FAULTS);
1295 if (current_nid == numa_node_id()) 1296 if (page_nid == this_nid)
1296 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 1297 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
1297 1298
1298 /* 1299 /*
@@ -1335,19 +1336,18 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1335 spin_unlock(&mm->page_table_lock); 1336 spin_unlock(&mm->page_table_lock);
1336 migrated = migrate_misplaced_transhuge_page(mm, vma, 1337 migrated = migrate_misplaced_transhuge_page(mm, vma,
1337 pmdp, pmd, addr, page, target_nid); 1338 pmdp, pmd, addr, page, target_nid);
1338 if (!migrated) 1339 if (migrated)
1340 page_nid = target_nid;
1341 else
1339 goto check_same; 1342 goto check_same;
1340 1343
1341 task_numa_fault(target_nid, HPAGE_PMD_NR, true); 1344 goto out;
1342 if (anon_vma)
1343 page_unlock_anon_vma_read(anon_vma);
1344 return 0;
1345 1345
1346check_same: 1346check_same:
1347 spin_lock(&mm->page_table_lock); 1347 spin_lock(&mm->page_table_lock);
1348 if (unlikely(!pmd_same(pmd, *pmdp))) { 1348 if (unlikely(!pmd_same(pmd, *pmdp))) {
1349 /* Someone else took our fault */ 1349 /* Someone else took our fault */
1350 current_nid = -1; 1350 page_nid = -1;
1351 goto out_unlock; 1351 goto out_unlock;
1352 } 1352 }
1353clear_pmdnuma: 1353clear_pmdnuma:
@@ -1362,8 +1362,9 @@ out:
1362 if (anon_vma) 1362 if (anon_vma)
1363 page_unlock_anon_vma_read(anon_vma); 1363 page_unlock_anon_vma_read(anon_vma);
1364 1364
1365 if (current_nid != -1) 1365 if (page_nid != -1)
1366 task_numa_fault(current_nid, HPAGE_PMD_NR, false); 1366 task_numa_fault(page_nid, HPAGE_PMD_NR, migrated);
1367
1367 return 0; 1368 return 0;
1368} 1369}
1369 1370
diff --git a/mm/memory.c b/mm/memory.c
index 1311f26497e6..d176154c243f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3521,12 +3521,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3521} 3521}
3522 3522
3523int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, 3523int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
3524 unsigned long addr, int current_nid) 3524 unsigned long addr, int page_nid)
3525{ 3525{
3526 get_page(page); 3526 get_page(page);
3527 3527
3528 count_vm_numa_event(NUMA_HINT_FAULTS); 3528 count_vm_numa_event(NUMA_HINT_FAULTS);
3529 if (current_nid == numa_node_id()) 3529 if (page_nid == numa_node_id())
3530 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 3530 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
3531 3531
3532 return mpol_misplaced(page, vma, addr); 3532 return mpol_misplaced(page, vma, addr);
@@ -3537,7 +3537,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3537{ 3537{
3538 struct page *page = NULL; 3538 struct page *page = NULL;
3539 spinlock_t *ptl; 3539 spinlock_t *ptl;
3540 int current_nid = -1; 3540 int page_nid = -1;
3541 int target_nid; 3541 int target_nid;
3542 bool migrated = false; 3542 bool migrated = false;
3543 3543
@@ -3567,15 +3567,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3567 return 0; 3567 return 0;
3568 } 3568 }
3569 3569
3570 current_nid = page_to_nid(page); 3570 page_nid = page_to_nid(page);
3571 target_nid = numa_migrate_prep(page, vma, addr, current_nid); 3571 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3572 pte_unmap_unlock(ptep, ptl); 3572 pte_unmap_unlock(ptep, ptl);
3573 if (target_nid == -1) { 3573 if (target_nid == -1) {
3574 /*
3575 * Account for the fault against the current node if it not
3576 * being replaced regardless of where the page is located.
3577 */
3578 current_nid = numa_node_id();
3579 put_page(page); 3574 put_page(page);
3580 goto out; 3575 goto out;
3581 } 3576 }
@@ -3583,11 +3578,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3583 /* Migrate to the requested node */ 3578 /* Migrate to the requested node */
3584 migrated = migrate_misplaced_page(page, target_nid); 3579 migrated = migrate_misplaced_page(page, target_nid);
3585 if (migrated) 3580 if (migrated)
3586 current_nid = target_nid; 3581 page_nid = target_nid;
3587 3582
3588out: 3583out:
3589 if (current_nid != -1) 3584 if (page_nid != -1)
3590 task_numa_fault(current_nid, 1, migrated); 3585 task_numa_fault(page_nid, 1, migrated);
3591 return 0; 3586 return 0;
3592} 3587}
3593 3588
@@ -3602,7 +3597,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3602 unsigned long offset; 3597 unsigned long offset;
3603 spinlock_t *ptl; 3598 spinlock_t *ptl;
3604 bool numa = false; 3599 bool numa = false;
3605 int local_nid = numa_node_id();
3606 3600
3607 spin_lock(&mm->page_table_lock); 3601 spin_lock(&mm->page_table_lock);
3608 pmd = *pmdp; 3602 pmd = *pmdp;
@@ -3625,9 +3619,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3625 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { 3619 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
3626 pte_t pteval = *pte; 3620 pte_t pteval = *pte;
3627 struct page *page; 3621 struct page *page;
3628 int curr_nid = local_nid; 3622 int page_nid = -1;
3629 int target_nid; 3623 int target_nid;
3630 bool migrated; 3624 bool migrated = false;
3625
3631 if (!pte_present(pteval)) 3626 if (!pte_present(pteval))
3632 continue; 3627 continue;
3633 if (!pte_numa(pteval)) 3628 if (!pte_numa(pteval))
@@ -3649,25 +3644,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3649 if (unlikely(page_mapcount(page) != 1)) 3644 if (unlikely(page_mapcount(page) != 1))
3650 continue; 3645 continue;
3651 3646
3652 /* 3647 page_nid = page_to_nid(page);
3653 * Note that the NUMA fault is later accounted to either 3648 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3654 * the node that is currently running or where the page is 3649 pte_unmap_unlock(pte, ptl);
3655 * migrated to. 3650 if (target_nid != -1) {
3656 */ 3651 migrated = migrate_misplaced_page(page, target_nid);
3657 curr_nid = local_nid; 3652 if (migrated)
3658 target_nid = numa_migrate_prep(page, vma, addr, 3653 page_nid = target_nid;
3659 page_to_nid(page)); 3654 } else {
3660 if (target_nid == -1) {
3661 put_page(page); 3655 put_page(page);
3662 continue;
3663 } 3656 }
3664 3657
3665 /* Migrate to the requested node */ 3658 if (page_nid != -1)
3666 pte_unmap_unlock(pte, ptl); 3659 task_numa_fault(page_nid, 1, migrated);
3667 migrated = migrate_misplaced_page(page, target_nid);
3668 if (migrated)
3669 curr_nid = target_nid;
3670 task_numa_fault(curr_nid, 1, migrated);
3671 3660
3672 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); 3661 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
3673 } 3662 }