aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-12-14 20:59:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:20 -0500
commit62b61f611eb5e20f7e9f8619bfd03bdfe8af6348 (patch)
tree9f06fff7eb6530fbe90b4d998b91071133f6af25 /mm/mempolicy.c
parente9995ef978a7d5296fe04a9a2c5ca6e66d8bb4e5 (diff)
ksm: memory hotremove migration only
The previous patch enables page migration of ksm pages, but that soon gets into trouble: not surprising, since we're using the ksm page lock to lock operations on its stable_node, but page migration switches the page whose lock is to be used for that. Another layer of locking would fix it, but do we need that yet? Do we actually need page migration of ksm pages? Yes, memory hotremove needs to offline sections of memory: and since we stopped allocating ksm pages with GFP_HIGHUSER, they will tend to be GFP_HIGHUSER_MOVABLE candidates for migration. But KSM is currently unconscious of NUMA issues, happily merging pages from different NUMA nodes: at present the rule must be, not to use MADV_MERGEABLE where you care about NUMA. So no, NUMA page migration of ksm pages does not make sense yet. So, to complete support for ksm swapping we need to make hotremove safe. ksm_memory_callback() take ksm_thread_mutex when MEM_GOING_OFFLINE and release it when MEM_OFFLINE or MEM_CANCEL_OFFLINE. But if mapped pages are freed before migration reaches them, stable_nodes may be left still pointing to struct pages which have been removed from the system: the stable_node needs to identify a page by pfn rather than page pointer, then it can safely prune them when MEM_OFFLINE. And make NUMA migration skip PageKsm pages where it skips PageReserved. But it's only when we reach unmap_and_move() that the page lock is taken and we can be sure that raised pagecount has prevented a PageAnon from being upgraded: so add offlining arg to migrate_pages(), to migrate ksm page when offlining (has sufficient locking) but reject it otherwise. Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Izik Eidus <ieidus@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Chris Wright <chrisw@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c19
1 files changed, 7 insertions, 12 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f11fdad06204..290fb5bf0440 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -85,6 +85,7 @@
85#include <linux/seq_file.h> 85#include <linux/seq_file.h>
86#include <linux/proc_fs.h> 86#include <linux/proc_fs.h>
87#include <linux/migrate.h> 87#include <linux/migrate.h>
88#include <linux/ksm.h>
88#include <linux/rmap.h> 89#include <linux/rmap.h>
89#include <linux/security.h> 90#include <linux/security.h>
90#include <linux/syscalls.h> 91#include <linux/syscalls.h>
@@ -413,17 +414,11 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
413 if (!page) 414 if (!page)
414 continue; 415 continue;
415 /* 416 /*
416 * The check for PageReserved here is important to avoid 417 * vm_normal_page() filters out zero pages, but there might
417 * handling zero pages and other pages that may have been 418 * still be PageReserved pages to skip, perhaps in a VDSO.
418 * marked special by the system. 419 * And we cannot move PageKsm pages sensibly or safely yet.
419 *
420 * If the PageReserved would not be checked here then f.e.
421 * the location of the zero page could have an influence
422 * on MPOL_MF_STRICT, zero pages would be counted for
423 * the per node stats, and there would be useless attempts
424 * to put zero pages on the migration list.
425 */ 420 */
426 if (PageReserved(page)) 421 if (PageReserved(page) || PageKsm(page))
427 continue; 422 continue;
428 nid = page_to_nid(page); 423 nid = page_to_nid(page);
429 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) 424 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
@@ -839,7 +834,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
839 flags | MPOL_MF_DISCONTIG_OK, &pagelist); 834 flags | MPOL_MF_DISCONTIG_OK, &pagelist);
840 835
841 if (!list_empty(&pagelist)) 836 if (!list_empty(&pagelist))
842 err = migrate_pages(&pagelist, new_node_page, dest); 837 err = migrate_pages(&pagelist, new_node_page, dest, 0);
843 838
844 return err; 839 return err;
845} 840}
@@ -1056,7 +1051,7 @@ static long do_mbind(unsigned long start, unsigned long len,
1056 1051
1057 if (!list_empty(&pagelist)) 1052 if (!list_empty(&pagelist))
1058 nr_failed = migrate_pages(&pagelist, new_vma_page, 1053 nr_failed = migrate_pages(&pagelist, new_vma_page,
1059 (unsigned long)vma); 1054 (unsigned long)vma, 0);
1060 1055
1061 if (!err && nr_failed && (flags & MPOL_MF_STRICT)) 1056 if (!err && nr_failed && (flags & MPOL_MF_STRICT))
1062 err = -EIO; 1057 err = -EIO;