aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2013-10-07 06:28:45 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-09 06:39:44 -0400
commit8191acbd30c73e45c24ad16c372e0b42cc7ac8f8 (patch)
tree1c558203ad1f7e141ef3e587d5d3d846ac82e0ec /mm
parentb8916634b77bffb233d8f2f45703c80343457cc1 (diff)
mm: numa: Sanitize task_numa_fault() callsites
There are three callers of task_numa_fault(): - do_huge_pmd_numa_page(): Accounts against the current node, not the node where the page resides, unless we migrated, in which case it accounts against the node we migrated to. - do_numa_page(): Accounts against the current node, not the node where the page resides, unless we migrated, in which case it accounts against the node we migrated to. - do_pmd_numa_page(): Accounts not at all when the page isn't migrated, otherwise accounts against the node we migrated towards. This seems wrong to me; all three sites should have the same sementaics, furthermore we should accounts against where the page really is, we already know where the task is. So modify all three sites to always account; we did after all receive the fault; and always account to where the page is after migration, regardless of success. They all still differ on when they clear the PTE/PMD; ideally that would get sorted too. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1381141781-10992-8-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c25
-rw-r--r--mm/memory.c53
2 files changed, 34 insertions, 44 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1d6334fc8b6d..c3bb65f284d5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1281,18 +1281,19 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1281 struct anon_vma *anon_vma = NULL; 1281 struct anon_vma *anon_vma = NULL;
1282 struct page *page; 1282 struct page *page;
1283 unsigned long haddr = addr & HPAGE_PMD_MASK; 1283 unsigned long haddr = addr & HPAGE_PMD_MASK;
1284 int page_nid = -1, this_nid = numa_node_id();
1284 int target_nid; 1285 int target_nid;
1285 int current_nid = -1; 1286 bool page_locked;
1286 bool migrated, page_locked; 1287 bool migrated = false;
1287 1288
1288 spin_lock(&mm->page_table_lock); 1289 spin_lock(&mm->page_table_lock);
1289 if (unlikely(!pmd_same(pmd, *pmdp))) 1290 if (unlikely(!pmd_same(pmd, *pmdp)))
1290 goto out_unlock; 1291 goto out_unlock;
1291 1292
1292 page = pmd_page(pmd); 1293 page = pmd_page(pmd);
1293 current_nid = page_to_nid(page); 1294 page_nid = page_to_nid(page);
1294 count_vm_numa_event(NUMA_HINT_FAULTS); 1295 count_vm_numa_event(NUMA_HINT_FAULTS);
1295 if (current_nid == numa_node_id()) 1296 if (page_nid == this_nid)
1296 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 1297 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
1297 1298
1298 /* 1299 /*
@@ -1335,19 +1336,18 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1335 spin_unlock(&mm->page_table_lock); 1336 spin_unlock(&mm->page_table_lock);
1336 migrated = migrate_misplaced_transhuge_page(mm, vma, 1337 migrated = migrate_misplaced_transhuge_page(mm, vma,
1337 pmdp, pmd, addr, page, target_nid); 1338 pmdp, pmd, addr, page, target_nid);
1338 if (!migrated) 1339 if (migrated)
1340 page_nid = target_nid;
1341 else
1339 goto check_same; 1342 goto check_same;
1340 1343
1341 task_numa_fault(target_nid, HPAGE_PMD_NR, true); 1344 goto out;
1342 if (anon_vma)
1343 page_unlock_anon_vma_read(anon_vma);
1344 return 0;
1345 1345
1346check_same: 1346check_same:
1347 spin_lock(&mm->page_table_lock); 1347 spin_lock(&mm->page_table_lock);
1348 if (unlikely(!pmd_same(pmd, *pmdp))) { 1348 if (unlikely(!pmd_same(pmd, *pmdp))) {
1349 /* Someone else took our fault */ 1349 /* Someone else took our fault */
1350 current_nid = -1; 1350 page_nid = -1;
1351 goto out_unlock; 1351 goto out_unlock;
1352 } 1352 }
1353clear_pmdnuma: 1353clear_pmdnuma:
@@ -1362,8 +1362,9 @@ out:
1362 if (anon_vma) 1362 if (anon_vma)
1363 page_unlock_anon_vma_read(anon_vma); 1363 page_unlock_anon_vma_read(anon_vma);
1364 1364
1365 if (current_nid != -1) 1365 if (page_nid != -1)
1366 task_numa_fault(current_nid, HPAGE_PMD_NR, false); 1366 task_numa_fault(page_nid, HPAGE_PMD_NR, migrated);
1367
1367 return 0; 1368 return 0;
1368} 1369}
1369 1370
diff --git a/mm/memory.c b/mm/memory.c
index ca0003947115..42ae82ee04c1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3519,12 +3519,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3519} 3519}
3520 3520
3521int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, 3521int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
3522 unsigned long addr, int current_nid) 3522 unsigned long addr, int page_nid)
3523{ 3523{
3524 get_page(page); 3524 get_page(page);
3525 3525
3526 count_vm_numa_event(NUMA_HINT_FAULTS); 3526 count_vm_numa_event(NUMA_HINT_FAULTS);
3527 if (current_nid == numa_node_id()) 3527 if (page_nid == numa_node_id())
3528 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 3528 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
3529 3529
3530 return mpol_misplaced(page, vma, addr); 3530 return mpol_misplaced(page, vma, addr);
@@ -3535,7 +3535,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3535{ 3535{
3536 struct page *page = NULL; 3536 struct page *page = NULL;
3537 spinlock_t *ptl; 3537 spinlock_t *ptl;
3538 int current_nid = -1; 3538 int page_nid = -1;
3539 int target_nid; 3539 int target_nid;
3540 bool migrated = false; 3540 bool migrated = false;
3541 3541
@@ -3565,15 +3565,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3565 return 0; 3565 return 0;
3566 } 3566 }
3567 3567
3568 current_nid = page_to_nid(page); 3568 page_nid = page_to_nid(page);
3569 target_nid = numa_migrate_prep(page, vma, addr, current_nid); 3569 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3570 pte_unmap_unlock(ptep, ptl); 3570 pte_unmap_unlock(ptep, ptl);
3571 if (target_nid == -1) { 3571 if (target_nid == -1) {
3572 /*
3573 * Account for the fault against the current node if it not
3574 * being replaced regardless of where the page is located.
3575 */
3576 current_nid = numa_node_id();
3577 put_page(page); 3572 put_page(page);
3578 goto out; 3573 goto out;
3579 } 3574 }
@@ -3581,11 +3576,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3581 /* Migrate to the requested node */ 3576 /* Migrate to the requested node */
3582 migrated = migrate_misplaced_page(page, target_nid); 3577 migrated = migrate_misplaced_page(page, target_nid);
3583 if (migrated) 3578 if (migrated)
3584 current_nid = target_nid; 3579 page_nid = target_nid;
3585 3580
3586out: 3581out:
3587 if (current_nid != -1) 3582 if (page_nid != -1)
3588 task_numa_fault(current_nid, 1, migrated); 3583 task_numa_fault(page_nid, 1, migrated);
3589 return 0; 3584 return 0;
3590} 3585}
3591 3586
@@ -3600,7 +3595,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3600 unsigned long offset; 3595 unsigned long offset;
3601 spinlock_t *ptl; 3596 spinlock_t *ptl;
3602 bool numa = false; 3597 bool numa = false;
3603 int local_nid = numa_node_id();
3604 3598
3605 spin_lock(&mm->page_table_lock); 3599 spin_lock(&mm->page_table_lock);
3606 pmd = *pmdp; 3600 pmd = *pmdp;
@@ -3623,9 +3617,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3623 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { 3617 for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
3624 pte_t pteval = *pte; 3618 pte_t pteval = *pte;
3625 struct page *page; 3619 struct page *page;
3626 int curr_nid = local_nid; 3620 int page_nid = -1;
3627 int target_nid; 3621 int target_nid;
3628 bool migrated; 3622 bool migrated = false;
3623
3629 if (!pte_present(pteval)) 3624 if (!pte_present(pteval))
3630 continue; 3625 continue;
3631 if (!pte_numa(pteval)) 3626 if (!pte_numa(pteval))
@@ -3647,25 +3642,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3647 if (unlikely(page_mapcount(page) != 1)) 3642 if (unlikely(page_mapcount(page) != 1))
3648 continue; 3643 continue;
3649 3644
3650 /* 3645 page_nid = page_to_nid(page);
3651 * Note that the NUMA fault is later accounted to either 3646 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3652 * the node that is currently running or where the page is 3647 pte_unmap_unlock(pte, ptl);
3653 * migrated to. 3648 if (target_nid != -1) {
3654 */ 3649 migrated = migrate_misplaced_page(page, target_nid);
3655 curr_nid = local_nid; 3650 if (migrated)
3656 target_nid = numa_migrate_prep(page, vma, addr, 3651 page_nid = target_nid;
3657 page_to_nid(page)); 3652 } else {
3658 if (target_nid == -1) {
3659 put_page(page); 3653 put_page(page);
3660 continue;
3661 } 3654 }
3662 3655
3663 /* Migrate to the requested node */ 3656 if (page_nid != -1)
3664 pte_unmap_unlock(pte, ptl); 3657 task_numa_fault(page_nid, 1, migrated);
3665 migrated = migrate_misplaced_page(page, target_nid);
3666 if (migrated)
3667 curr_nid = target_nid;
3668 task_numa_fault(curr_nid, 1, migrated);
3669 3658
3670 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); 3659 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
3671 } 3660 }