diff options
author | Mel Gorman <mgorman@suse.de> | 2013-10-07 06:29:05 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-09 06:40:32 -0400 |
commit | 1bc115d87dffd1c43bdc3c9c9d1e3a51c195d18e (patch) | |
tree | 56a26b4f4fe089e3dd1df5a26877d1e4c0114d35 /mm | |
parent | 9ff1d9ff3c2c8ab3feaeb2e8056a07ca293f7bde (diff) |
mm: numa: Scan pages with elevated page_mapcount
Currently automatic NUMA balancing is unable to distinguish between false
shared versus private pages except by ignoring pages with an elevated
page_mapcount entirely. This avoids shared pages bouncing between the
nodes whose task is using them but that is ignored quite a lot of data.
This patch kicks away the training wheels in preparation for adding support
for identifying shared/private pages is now in place. The ordering is so
that the impact of the shared/private detection can be easily measured. Note
that the patch does not migrate shared, file-backed within vmas marked
VM_EXEC as these are generally shared library pages. Migrating such pages
is not beneficial as there is an expectation they are read-shared between
caches and iTLB and iCache pressure is generally low.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-28-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 12 | ||||
-rw-r--r-- | mm/memory.c | 7 | ||||
-rw-r--r-- | mm/migrate.c | 17 | ||||
-rw-r--r-- | mm/mprotect.c | 4 |
4 files changed, 14 insertions, 26 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 914216733e0a..2a28c2c6c165 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1484,14 +1484,12 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1484 | struct page *page = pmd_page(*pmd); | 1484 | struct page *page = pmd_page(*pmd); |
1485 | 1485 | ||
1486 | /* | 1486 | /* |
1487 | * Only check non-shared pages. Do not trap faults | 1487 | * Do not trap faults against the zero page. The |
1488 | * against the zero page. The read-only data is likely | 1488 | * read-only data is likely to be read-cached on the |
1489 | * to be read-cached on the local CPU cache and it is | 1489 | * local CPU cache and it is less useful to know about |
1490 | * less useful to know about local vs remote hits on | 1490 | * local vs remote hits on the zero page. |
1491 | * the zero page. | ||
1492 | */ | 1491 | */ |
1493 | if (page_mapcount(page) == 1 && | 1492 | if (!is_huge_zero_page(page) && |
1494 | !is_huge_zero_page(page) && | ||
1495 | !pmd_numa(*pmd)) { | 1493 | !pmd_numa(*pmd)) { |
1496 | entry = pmdp_get_and_clear(mm, addr, pmd); | 1494 | entry = pmdp_get_and_clear(mm, addr, pmd); |
1497 | entry = pmd_mknuma(entry); | 1495 | entry = pmd_mknuma(entry); |
diff --git a/mm/memory.c b/mm/memory.c index 24bc9b848af6..3e3b4b8b6c41 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3577,7 +3577,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3577 | } | 3577 | } |
3578 | 3578 | ||
3579 | /* Migrate to the requested node */ | 3579 | /* Migrate to the requested node */ |
3580 | migrated = migrate_misplaced_page(page, target_nid); | 3580 | migrated = migrate_misplaced_page(page, vma, target_nid); |
3581 | if (migrated) | 3581 | if (migrated) |
3582 | page_nid = target_nid; | 3582 | page_nid = target_nid; |
3583 | 3583 | ||
@@ -3642,16 +3642,13 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3642 | page = vm_normal_page(vma, addr, pteval); | 3642 | page = vm_normal_page(vma, addr, pteval); |
3643 | if (unlikely(!page)) | 3643 | if (unlikely(!page)) |
3644 | continue; | 3644 | continue; |
3645 | /* only check non-shared pages */ | ||
3646 | if (unlikely(page_mapcount(page) != 1)) | ||
3647 | continue; | ||
3648 | 3645 | ||
3649 | last_nid = page_nid_last(page); | 3646 | last_nid = page_nid_last(page); |
3650 | page_nid = page_to_nid(page); | 3647 | page_nid = page_to_nid(page); |
3651 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); | 3648 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
3652 | pte_unmap_unlock(pte, ptl); | 3649 | pte_unmap_unlock(pte, ptl); |
3653 | if (target_nid != -1) { | 3650 | if (target_nid != -1) { |
3654 | migrated = migrate_misplaced_page(page, target_nid); | 3651 | migrated = migrate_misplaced_page(page, vma, target_nid); |
3655 | if (migrated) | 3652 | if (migrated) |
3656 | page_nid = target_nid; | 3653 | page_nid = target_nid; |
3657 | } else { | 3654 | } else { |
diff --git a/mm/migrate.c b/mm/migrate.c index 7bd90d3b16bb..fcba2f46bb80 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -1599,7 +1599,8 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) | |||
1599 | * node. Caller is expected to have an elevated reference count on | 1599 | * node. Caller is expected to have an elevated reference count on |
1600 | * the page that will be dropped by this function before returning. | 1600 | * the page that will be dropped by this function before returning. |
1601 | */ | 1601 | */ |
1602 | int migrate_misplaced_page(struct page *page, int node) | 1602 | int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, |
1603 | int node) | ||
1603 | { | 1604 | { |
1604 | pg_data_t *pgdat = NODE_DATA(node); | 1605 | pg_data_t *pgdat = NODE_DATA(node); |
1605 | int isolated; | 1606 | int isolated; |
@@ -1607,10 +1608,11 @@ int migrate_misplaced_page(struct page *page, int node) | |||
1607 | LIST_HEAD(migratepages); | 1608 | LIST_HEAD(migratepages); |
1608 | 1609 | ||
1609 | /* | 1610 | /* |
1610 | * Don't migrate pages that are mapped in multiple processes. | 1611 | * Don't migrate file pages that are mapped in multiple processes |
1611 | * TODO: Handle false sharing detection instead of this hammer | 1612 | * with execute permissions as they are probably shared libraries. |
1612 | */ | 1613 | */ |
1613 | if (page_mapcount(page) != 1) | 1614 | if (page_mapcount(page) != 1 && page_is_file_cache(page) && |
1615 | (vma->vm_flags & VM_EXEC)) | ||
1614 | goto out; | 1616 | goto out; |
1615 | 1617 | ||
1616 | /* | 1618 | /* |
@@ -1661,13 +1663,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1661 | int page_lru = page_is_file_cache(page); | 1663 | int page_lru = page_is_file_cache(page); |
1662 | 1664 | ||
1663 | /* | 1665 | /* |
1664 | * Don't migrate pages that are mapped in multiple processes. | ||
1665 | * TODO: Handle false sharing detection instead of this hammer | ||
1666 | */ | ||
1667 | if (page_mapcount(page) != 1) | ||
1668 | goto out_dropref; | ||
1669 | |||
1670 | /* | ||
1671 | * Rate-limit the amount of data that is being migrated to a node. | 1666 | * Rate-limit the amount of data that is being migrated to a node. |
1672 | * Optimal placement is no good if the memory bus is saturated and | 1667 | * Optimal placement is no good if the memory bus is saturated and |
1673 | * all the time is being spent migrating! | 1668 | * all the time is being spent migrating! |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 2da33dca6134..41e02923fcd9 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -69,9 +69,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
69 | if (last_nid != this_nid) | 69 | if (last_nid != this_nid) |
70 | all_same_node = false; | 70 | all_same_node = false; |
71 | 71 | ||
72 | /* only check non-shared pages */ | 72 | if (!pte_numa(oldpte)) { |
73 | if (!pte_numa(oldpte) && | ||
74 | page_mapcount(page) == 1) { | ||
75 | ptent = pte_mknuma(ptent); | 73 | ptent = pte_mknuma(ptent); |
76 | updated = true; | 74 | updated = true; |
77 | } | 75 | } |