diff options
author | Mel Gorman <mgorman@suse.de> | 2013-10-07 06:28:45 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-29 06:37:52 -0400 |
commit | c61109e34f60f6e85bb43c5a1cd51c0e3db40847 (patch) | |
tree | 5b03f4c2d1376166da7e4a507ddf7a4c6e16eee7 /mm | |
parent | 587fe586f44a48f9691001ba6c45b86c8e4ba21f (diff) |
mm: numa: Sanitize task_numa_fault() callsites
There are three callers of task_numa_fault():
- do_huge_pmd_numa_page():
Accounts against the current node, not the node where the
page resides, unless we migrated, in which case it accounts
against the node we migrated to.
- do_numa_page():
Accounts against the current node, not the node where the
page resides, unless we migrated, in which case it accounts
against the node we migrated to.
- do_pmd_numa_page():
Accounts not at all when the page isn't migrated, otherwise
accounts against the node we migrated towards.
This seems wrong to me; all three sites should have the same
sementaics, furthermore we should accounts against where the page
really is, we already know where the task is.
So modify all three sites to always account; we did after all receive
the fault; and always account to where the page is after migration,
regardless of success.
They all still differ on when they clear the PTE/PMD; ideally that
would get sorted too.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: <stable@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-8-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 25 | ||||
-rw-r--r-- | mm/memory.c | 53 |
2 files changed, 34 insertions, 44 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d8534b3630e4..00ddfcdd810e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1281,18 +1281,19 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1281 | struct anon_vma *anon_vma = NULL; | 1281 | struct anon_vma *anon_vma = NULL; |
1282 | struct page *page; | 1282 | struct page *page; |
1283 | unsigned long haddr = addr & HPAGE_PMD_MASK; | 1283 | unsigned long haddr = addr & HPAGE_PMD_MASK; |
1284 | int page_nid = -1, this_nid = numa_node_id(); | ||
1284 | int target_nid; | 1285 | int target_nid; |
1285 | int current_nid = -1; | 1286 | bool page_locked; |
1286 | bool migrated, page_locked; | 1287 | bool migrated = false; |
1287 | 1288 | ||
1288 | spin_lock(&mm->page_table_lock); | 1289 | spin_lock(&mm->page_table_lock); |
1289 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1290 | if (unlikely(!pmd_same(pmd, *pmdp))) |
1290 | goto out_unlock; | 1291 | goto out_unlock; |
1291 | 1292 | ||
1292 | page = pmd_page(pmd); | 1293 | page = pmd_page(pmd); |
1293 | current_nid = page_to_nid(page); | 1294 | page_nid = page_to_nid(page); |
1294 | count_vm_numa_event(NUMA_HINT_FAULTS); | 1295 | count_vm_numa_event(NUMA_HINT_FAULTS); |
1295 | if (current_nid == numa_node_id()) | 1296 | if (page_nid == this_nid) |
1296 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | 1297 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); |
1297 | 1298 | ||
1298 | /* | 1299 | /* |
@@ -1335,19 +1336,18 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1335 | spin_unlock(&mm->page_table_lock); | 1336 | spin_unlock(&mm->page_table_lock); |
1336 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1337 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
1337 | pmdp, pmd, addr, page, target_nid); | 1338 | pmdp, pmd, addr, page, target_nid); |
1338 | if (!migrated) | 1339 | if (migrated) |
1340 | page_nid = target_nid; | ||
1341 | else | ||
1339 | goto check_same; | 1342 | goto check_same; |
1340 | 1343 | ||
1341 | task_numa_fault(target_nid, HPAGE_PMD_NR, true); | 1344 | goto out; |
1342 | if (anon_vma) | ||
1343 | page_unlock_anon_vma_read(anon_vma); | ||
1344 | return 0; | ||
1345 | 1345 | ||
1346 | check_same: | 1346 | check_same: |
1347 | spin_lock(&mm->page_table_lock); | 1347 | spin_lock(&mm->page_table_lock); |
1348 | if (unlikely(!pmd_same(pmd, *pmdp))) { | 1348 | if (unlikely(!pmd_same(pmd, *pmdp))) { |
1349 | /* Someone else took our fault */ | 1349 | /* Someone else took our fault */ |
1350 | current_nid = -1; | 1350 | page_nid = -1; |
1351 | goto out_unlock; | 1351 | goto out_unlock; |
1352 | } | 1352 | } |
1353 | clear_pmdnuma: | 1353 | clear_pmdnuma: |
@@ -1362,8 +1362,9 @@ out: | |||
1362 | if (anon_vma) | 1362 | if (anon_vma) |
1363 | page_unlock_anon_vma_read(anon_vma); | 1363 | page_unlock_anon_vma_read(anon_vma); |
1364 | 1364 | ||
1365 | if (current_nid != -1) | 1365 | if (page_nid != -1) |
1366 | task_numa_fault(current_nid, HPAGE_PMD_NR, false); | 1366 | task_numa_fault(page_nid, HPAGE_PMD_NR, migrated); |
1367 | |||
1367 | return 0; | 1368 | return 0; |
1368 | } | 1369 | } |
1369 | 1370 | ||
diff --git a/mm/memory.c b/mm/memory.c index 1311f26497e6..d176154c243f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3521,12 +3521,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3521 | } | 3521 | } |
3522 | 3522 | ||
3523 | int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, | 3523 | int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, |
3524 | unsigned long addr, int current_nid) | 3524 | unsigned long addr, int page_nid) |
3525 | { | 3525 | { |
3526 | get_page(page); | 3526 | get_page(page); |
3527 | 3527 | ||
3528 | count_vm_numa_event(NUMA_HINT_FAULTS); | 3528 | count_vm_numa_event(NUMA_HINT_FAULTS); |
3529 | if (current_nid == numa_node_id()) | 3529 | if (page_nid == numa_node_id()) |
3530 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | 3530 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); |
3531 | 3531 | ||
3532 | return mpol_misplaced(page, vma, addr); | 3532 | return mpol_misplaced(page, vma, addr); |
@@ -3537,7 +3537,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3537 | { | 3537 | { |
3538 | struct page *page = NULL; | 3538 | struct page *page = NULL; |
3539 | spinlock_t *ptl; | 3539 | spinlock_t *ptl; |
3540 | int current_nid = -1; | 3540 | int page_nid = -1; |
3541 | int target_nid; | 3541 | int target_nid; |
3542 | bool migrated = false; | 3542 | bool migrated = false; |
3543 | 3543 | ||
@@ -3567,15 +3567,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3567 | return 0; | 3567 | return 0; |
3568 | } | 3568 | } |
3569 | 3569 | ||
3570 | current_nid = page_to_nid(page); | 3570 | page_nid = page_to_nid(page); |
3571 | target_nid = numa_migrate_prep(page, vma, addr, current_nid); | 3571 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
3572 | pte_unmap_unlock(ptep, ptl); | 3572 | pte_unmap_unlock(ptep, ptl); |
3573 | if (target_nid == -1) { | 3573 | if (target_nid == -1) { |
3574 | /* | ||
3575 | * Account for the fault against the current node if it not | ||
3576 | * being replaced regardless of where the page is located. | ||
3577 | */ | ||
3578 | current_nid = numa_node_id(); | ||
3579 | put_page(page); | 3574 | put_page(page); |
3580 | goto out; | 3575 | goto out; |
3581 | } | 3576 | } |
@@ -3583,11 +3578,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3583 | /* Migrate to the requested node */ | 3578 | /* Migrate to the requested node */ |
3584 | migrated = migrate_misplaced_page(page, target_nid); | 3579 | migrated = migrate_misplaced_page(page, target_nid); |
3585 | if (migrated) | 3580 | if (migrated) |
3586 | current_nid = target_nid; | 3581 | page_nid = target_nid; |
3587 | 3582 | ||
3588 | out: | 3583 | out: |
3589 | if (current_nid != -1) | 3584 | if (page_nid != -1) |
3590 | task_numa_fault(current_nid, 1, migrated); | 3585 | task_numa_fault(page_nid, 1, migrated); |
3591 | return 0; | 3586 | return 0; |
3592 | } | 3587 | } |
3593 | 3588 | ||
@@ -3602,7 +3597,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3602 | unsigned long offset; | 3597 | unsigned long offset; |
3603 | spinlock_t *ptl; | 3598 | spinlock_t *ptl; |
3604 | bool numa = false; | 3599 | bool numa = false; |
3605 | int local_nid = numa_node_id(); | ||
3606 | 3600 | ||
3607 | spin_lock(&mm->page_table_lock); | 3601 | spin_lock(&mm->page_table_lock); |
3608 | pmd = *pmdp; | 3602 | pmd = *pmdp; |
@@ -3625,9 +3619,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3625 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { | 3619 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { |
3626 | pte_t pteval = *pte; | 3620 | pte_t pteval = *pte; |
3627 | struct page *page; | 3621 | struct page *page; |
3628 | int curr_nid = local_nid; | 3622 | int page_nid = -1; |
3629 | int target_nid; | 3623 | int target_nid; |
3630 | bool migrated; | 3624 | bool migrated = false; |
3625 | |||
3631 | if (!pte_present(pteval)) | 3626 | if (!pte_present(pteval)) |
3632 | continue; | 3627 | continue; |
3633 | if (!pte_numa(pteval)) | 3628 | if (!pte_numa(pteval)) |
@@ -3649,25 +3644,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3649 | if (unlikely(page_mapcount(page) != 1)) | 3644 | if (unlikely(page_mapcount(page) != 1)) |
3650 | continue; | 3645 | continue; |
3651 | 3646 | ||
3652 | /* | 3647 | page_nid = page_to_nid(page); |
3653 | * Note that the NUMA fault is later accounted to either | 3648 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
3654 | * the node that is currently running or where the page is | 3649 | pte_unmap_unlock(pte, ptl); |
3655 | * migrated to. | 3650 | if (target_nid != -1) { |
3656 | */ | 3651 | migrated = migrate_misplaced_page(page, target_nid); |
3657 | curr_nid = local_nid; | 3652 | if (migrated) |
3658 | target_nid = numa_migrate_prep(page, vma, addr, | 3653 | page_nid = target_nid; |
3659 | page_to_nid(page)); | 3654 | } else { |
3660 | if (target_nid == -1) { | ||
3661 | put_page(page); | 3655 | put_page(page); |
3662 | continue; | ||
3663 | } | 3656 | } |
3664 | 3657 | ||
3665 | /* Migrate to the requested node */ | 3658 | if (page_nid != -1) |
3666 | pte_unmap_unlock(pte, ptl); | 3659 | task_numa_fault(page_nid, 1, migrated); |
3667 | migrated = migrate_misplaced_page(page, target_nid); | ||
3668 | if (migrated) | ||
3669 | curr_nid = target_nid; | ||
3670 | task_numa_fault(curr_nid, 1, migrated); | ||
3671 | 3660 | ||
3672 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); | 3661 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); |
3673 | } | 3662 | } |