aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/ksm.c84
-rw-r--r--mm/memory_hotplug.c2
-rw-r--r--mm/mempolicy.c19
-rw-r--r--mm/migrate.c27
4 files changed, 100 insertions, 32 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index dfdc292d3626..d4c228a9d278 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -29,6 +29,7 @@
29#include <linux/wait.h> 29#include <linux/wait.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/rbtree.h> 31#include <linux/rbtree.h>
32#include <linux/memory.h>
32#include <linux/mmu_notifier.h> 33#include <linux/mmu_notifier.h>
33#include <linux/swap.h> 34#include <linux/swap.h>
34#include <linux/ksm.h> 35#include <linux/ksm.h>
@@ -108,14 +109,14 @@ struct ksm_scan {
108 109
109/** 110/**
110 * struct stable_node - node of the stable rbtree 111 * struct stable_node - node of the stable rbtree
111 * @page: pointer to struct page of the ksm page
112 * @node: rb node of this ksm page in the stable tree 112 * @node: rb node of this ksm page in the stable tree
113 * @hlist: hlist head of rmap_items using this ksm page 113 * @hlist: hlist head of rmap_items using this ksm page
114 * @kpfn: page frame number of this ksm page
114 */ 115 */
115struct stable_node { 116struct stable_node {
116 struct page *page;
117 struct rb_node node; 117 struct rb_node node;
118 struct hlist_head hlist; 118 struct hlist_head hlist;
119 unsigned long kpfn;
119}; 120};
120 121
121/** 122/**
@@ -515,7 +516,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node)
515 struct page *page; 516 struct page *page;
516 void *expected_mapping; 517 void *expected_mapping;
517 518
518 page = stable_node->page; 519 page = pfn_to_page(stable_node->kpfn);
519 expected_mapping = (void *)stable_node + 520 expected_mapping = (void *)stable_node +
520 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); 521 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
521 rcu_read_lock(); 522 rcu_read_lock();
@@ -973,7 +974,7 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
973 * This function returns the stable tree node of identical content if found, 974 * This function returns the stable tree node of identical content if found,
974 * NULL otherwise. 975 * NULL otherwise.
975 */ 976 */
976static struct stable_node *stable_tree_search(struct page *page) 977static struct page *stable_tree_search(struct page *page)
977{ 978{
978 struct rb_node *node = root_stable_tree.rb_node; 979 struct rb_node *node = root_stable_tree.rb_node;
979 struct stable_node *stable_node; 980 struct stable_node *stable_node;
@@ -981,7 +982,7 @@ static struct stable_node *stable_tree_search(struct page *page)
981 stable_node = page_stable_node(page); 982 stable_node = page_stable_node(page);
982 if (stable_node) { /* ksm page forked */ 983 if (stable_node) { /* ksm page forked */
983 get_page(page); 984 get_page(page);
984 return stable_node; 985 return page;
985 } 986 }
986 987
987 while (node) { 988 while (node) {
@@ -1003,7 +1004,7 @@ static struct stable_node *stable_tree_search(struct page *page)
1003 put_page(tree_page); 1004 put_page(tree_page);
1004 node = node->rb_right; 1005 node = node->rb_right;
1005 } else 1006 } else
1006 return stable_node; 1007 return tree_page;
1007 } 1008 }
1008 1009
1009 return NULL; 1010 return NULL;
@@ -1059,7 +1060,7 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
1059 1060
1060 INIT_HLIST_HEAD(&stable_node->hlist); 1061 INIT_HLIST_HEAD(&stable_node->hlist);
1061 1062
1062 stable_node->page = kpage; 1063 stable_node->kpfn = page_to_pfn(kpage);
1063 set_page_stable_node(kpage, stable_node); 1064 set_page_stable_node(kpage, stable_node);
1064 1065
1065 return stable_node; 1066 return stable_node;
@@ -1170,9 +1171,8 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1170 remove_rmap_item_from_tree(rmap_item); 1171 remove_rmap_item_from_tree(rmap_item);
1171 1172
1172 /* We first start with searching the page inside the stable tree */ 1173 /* We first start with searching the page inside the stable tree */
1173 stable_node = stable_tree_search(page); 1174 kpage = stable_tree_search(page);
1174 if (stable_node) { 1175 if (kpage) {
1175 kpage = stable_node->page;
1176 err = try_to_merge_with_ksm_page(rmap_item, page, kpage); 1176 err = try_to_merge_with_ksm_page(rmap_item, page, kpage);
1177 if (!err) { 1177 if (!err) {
1178 /* 1178 /*
@@ -1180,7 +1180,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1180 * add its rmap_item to the stable tree. 1180 * add its rmap_item to the stable tree.
1181 */ 1181 */
1182 lock_page(kpage); 1182 lock_page(kpage);
1183 stable_tree_append(rmap_item, stable_node); 1183 stable_tree_append(rmap_item, page_stable_node(kpage));
1184 unlock_page(kpage); 1184 unlock_page(kpage);
1185 } 1185 }
1186 put_page(kpage); 1186 put_page(kpage);
@@ -1715,12 +1715,63 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage)
1715 1715
1716 stable_node = page_stable_node(newpage); 1716 stable_node = page_stable_node(newpage);
1717 if (stable_node) { 1717 if (stable_node) {
1718 VM_BUG_ON(stable_node->page != oldpage); 1718 VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
1719 stable_node->page = newpage; 1719 stable_node->kpfn = page_to_pfn(newpage);
1720 } 1720 }
1721} 1721}
1722#endif /* CONFIG_MIGRATION */ 1722#endif /* CONFIG_MIGRATION */
1723 1723
1724#ifdef CONFIG_MEMORY_HOTREMOVE
1725static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn,
1726 unsigned long end_pfn)
1727{
1728 struct rb_node *node;
1729
1730 for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) {
1731 struct stable_node *stable_node;
1732
1733 stable_node = rb_entry(node, struct stable_node, node);
1734 if (stable_node->kpfn >= start_pfn &&
1735 stable_node->kpfn < end_pfn)
1736 return stable_node;
1737 }
1738 return NULL;
1739}
1740
1741static int ksm_memory_callback(struct notifier_block *self,
1742 unsigned long action, void *arg)
1743{
1744 struct memory_notify *mn = arg;
1745 struct stable_node *stable_node;
1746
1747 switch (action) {
1748 case MEM_GOING_OFFLINE:
1749 /*
1750 * Keep it very simple for now: just lock out ksmd and
1751 * MADV_UNMERGEABLE while any memory is going offline.
1752 */
1753 mutex_lock(&ksm_thread_mutex);
1754 break;
1755
1756 case MEM_OFFLINE:
1757 /*
1758 * Most of the work is done by page migration; but there might
1759 * be a few stable_nodes left over, still pointing to struct
1760 * pages which have been offlined: prune those from the tree.
1761 */
1762 while ((stable_node = ksm_check_stable_tree(mn->start_pfn,
1763 mn->start_pfn + mn->nr_pages)) != NULL)
1764 remove_node_from_stable_tree(stable_node);
1765 /* fallthrough */
1766
1767 case MEM_CANCEL_OFFLINE:
1768 mutex_unlock(&ksm_thread_mutex);
1769 break;
1770 }
1771 return NOTIFY_OK;
1772}
1773#endif /* CONFIG_MEMORY_HOTREMOVE */
1774
1724#ifdef CONFIG_SYSFS 1775#ifdef CONFIG_SYSFS
1725/* 1776/*
1726 * This all compiles without CONFIG_SYSFS, but is a waste of space. 1777 * This all compiles without CONFIG_SYSFS, but is a waste of space.
@@ -1946,6 +1997,13 @@ static int __init ksm_init(void)
1946 1997
1947#endif /* CONFIG_SYSFS */ 1998#endif /* CONFIG_SYSFS */
1948 1999
2000#ifdef CONFIG_MEMORY_HOTREMOVE
2001 /*
2002 * Choose a high priority since the callback takes ksm_thread_mutex:
2003 * later callbacks could only be taking locks which nest within that.
2004 */
2005 hotplug_memory_notifier(ksm_memory_callback, 100);
2006#endif
1949 return 0; 2007 return 0;
1950 2008
1951out_free2: 2009out_free2:
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index bc5a08138f1e..67e941d7882c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -698,7 +698,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
698 if (list_empty(&source)) 698 if (list_empty(&source))
699 goto out; 699 goto out;
700 /* this function returns # of failed pages */ 700 /* this function returns # of failed pages */
701 ret = migrate_pages(&source, hotremove_migrate_alloc, 0); 701 ret = migrate_pages(&source, hotremove_migrate_alloc, 0, 1);
702 702
703out: 703out:
704 return ret; 704 return ret;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f11fdad06204..290fb5bf0440 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -85,6 +85,7 @@
85#include <linux/seq_file.h> 85#include <linux/seq_file.h>
86#include <linux/proc_fs.h> 86#include <linux/proc_fs.h>
87#include <linux/migrate.h> 87#include <linux/migrate.h>
88#include <linux/ksm.h>
88#include <linux/rmap.h> 89#include <linux/rmap.h>
89#include <linux/security.h> 90#include <linux/security.h>
90#include <linux/syscalls.h> 91#include <linux/syscalls.h>
@@ -413,17 +414,11 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
413 if (!page) 414 if (!page)
414 continue; 415 continue;
415 /* 416 /*
416 * The check for PageReserved here is important to avoid 417 * vm_normal_page() filters out zero pages, but there might
417 * handling zero pages and other pages that may have been 418 * still be PageReserved pages to skip, perhaps in a VDSO.
418 * marked special by the system. 419 * And we cannot move PageKsm pages sensibly or safely yet.
419 *
420 * If the PageReserved would not be checked here then f.e.
421 * the location of the zero page could have an influence
422 * on MPOL_MF_STRICT, zero pages would be counted for
423 * the per node stats, and there would be useless attempts
424 * to put zero pages on the migration list.
425 */ 420 */
426 if (PageReserved(page)) 421 if (PageReserved(page) || PageKsm(page))
427 continue; 422 continue;
428 nid = page_to_nid(page); 423 nid = page_to_nid(page);
429 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) 424 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
@@ -839,7 +834,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
839 flags | MPOL_MF_DISCONTIG_OK, &pagelist); 834 flags | MPOL_MF_DISCONTIG_OK, &pagelist);
840 835
841 if (!list_empty(&pagelist)) 836 if (!list_empty(&pagelist))
842 err = migrate_pages(&pagelist, new_node_page, dest); 837 err = migrate_pages(&pagelist, new_node_page, dest, 0);
843 838
844 return err; 839 return err;
845} 840}
@@ -1056,7 +1051,7 @@ static long do_mbind(unsigned long start, unsigned long len,
1056 1051
1057 if (!list_empty(&pagelist)) 1052 if (!list_empty(&pagelist))
1058 nr_failed = migrate_pages(&pagelist, new_vma_page, 1053 nr_failed = migrate_pages(&pagelist, new_vma_page,
1059 (unsigned long)vma); 1054 (unsigned long)vma, 0);
1060 1055
1061 if (!err && nr_failed && (flags & MPOL_MF_STRICT)) 1056 if (!err && nr_failed && (flags & MPOL_MF_STRICT))
1062 err = -EIO; 1057 err = -EIO;
diff --git a/mm/migrate.c b/mm/migrate.c
index 0b714747c028..2a0ea3ef509e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -543,7 +543,7 @@ static int move_to_new_page(struct page *newpage, struct page *page)
543 * to the newly allocated page in newpage. 543 * to the newly allocated page in newpage.
544 */ 544 */
545static int unmap_and_move(new_page_t get_new_page, unsigned long private, 545static int unmap_and_move(new_page_t get_new_page, unsigned long private,
546 struct page *page, int force) 546 struct page *page, int force, int offlining)
547{ 547{
548 int rc = 0; 548 int rc = 0;
549 int *result = NULL; 549 int *result = NULL;
@@ -569,6 +569,20 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
569 lock_page(page); 569 lock_page(page);
570 } 570 }
571 571
572 /*
573 * Only memory hotplug's offline_pages() caller has locked out KSM,
574 * and can safely migrate a KSM page. The other cases have skipped
575 * PageKsm along with PageReserved - but it is only now when we have
576 * the page lock that we can be certain it will not go KSM beneath us
577 * (KSM will not upgrade a page from PageAnon to PageKsm when it sees
578 * its pagecount raised, but only here do we take the page lock which
579 * serializes that).
580 */
581 if (PageKsm(page) && !offlining) {
582 rc = -EBUSY;
583 goto unlock;
584 }
585
572 /* charge against new page */ 586 /* charge against new page */
573 charge = mem_cgroup_prepare_migration(page, &mem); 587 charge = mem_cgroup_prepare_migration(page, &mem);
574 if (charge == -ENOMEM) { 588 if (charge == -ENOMEM) {
@@ -685,7 +699,7 @@ move_newpage:
685 * Return: Number of pages not migrated or error code. 699 * Return: Number of pages not migrated or error code.
686 */ 700 */
687int migrate_pages(struct list_head *from, 701int migrate_pages(struct list_head *from,
688 new_page_t get_new_page, unsigned long private) 702 new_page_t get_new_page, unsigned long private, int offlining)
689{ 703{
690 int retry = 1; 704 int retry = 1;
691 int nr_failed = 0; 705 int nr_failed = 0;
@@ -705,7 +719,7 @@ int migrate_pages(struct list_head *from,
705 cond_resched(); 719 cond_resched();
706 720
707 rc = unmap_and_move(get_new_page, private, 721 rc = unmap_and_move(get_new_page, private,
708 page, pass > 2); 722 page, pass > 2, offlining);
709 723
710 switch(rc) { 724 switch(rc) {
711 case -ENOMEM: 725 case -ENOMEM:
@@ -801,7 +815,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
801 if (!page) 815 if (!page)
802 goto set_status; 816 goto set_status;
803 817
804 if (PageReserved(page)) /* Check for zero page */ 818 /* Use PageReserved to check for zero page */
819 if (PageReserved(page) || PageKsm(page))
805 goto put_and_set; 820 goto put_and_set;
806 821
807 pp->page = page; 822 pp->page = page;
@@ -838,7 +853,7 @@ set_status:
838 err = 0; 853 err = 0;
839 if (!list_empty(&pagelist)) 854 if (!list_empty(&pagelist))
840 err = migrate_pages(&pagelist, new_page_node, 855 err = migrate_pages(&pagelist, new_page_node,
841 (unsigned long)pm); 856 (unsigned long)pm, 0);
842 857
843 up_read(&mm->mmap_sem); 858 up_read(&mm->mmap_sem);
844 return err; 859 return err;
@@ -959,7 +974,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
959 974
960 err = -ENOENT; 975 err = -ENOENT;
961 /* Use PageReserved to check for zero page */ 976 /* Use PageReserved to check for zero page */
962 if (!page || PageReserved(page)) 977 if (!page || PageReserved(page) || PageKsm(page))
963 goto set_status; 978 goto set_status;
964 979
965 err = page_to_nid(page); 980 err = page_to_nid(page);