diff options
-rw-r--r-- | include/linux/migrate.h | 8 | ||||
-rw-r--r-- | mm/ksm.c | 84 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 2 | ||||
-rw-r--r-- | mm/mempolicy.c | 19 | ||||
-rw-r--r-- | mm/migrate.c | 27 |
5 files changed, 103 insertions, 37 deletions
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 527602cdea1..7f085c97c79 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -12,7 +12,8 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); | |||
12 | extern int putback_lru_pages(struct list_head *l); | 12 | extern int putback_lru_pages(struct list_head *l); |
13 | extern int migrate_page(struct address_space *, | 13 | extern int migrate_page(struct address_space *, |
14 | struct page *, struct page *); | 14 | struct page *, struct page *); |
15 | extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long); | 15 | extern int migrate_pages(struct list_head *l, new_page_t x, |
16 | unsigned long private, int offlining); | ||
16 | 17 | ||
17 | extern int fail_migrate_page(struct address_space *, | 18 | extern int fail_migrate_page(struct address_space *, |
18 | struct page *, struct page *); | 19 | struct page *, struct page *); |
@@ -26,10 +27,7 @@ extern int migrate_vmas(struct mm_struct *mm, | |||
26 | 27 | ||
27 | static inline int putback_lru_pages(struct list_head *l) { return 0; } | 28 | static inline int putback_lru_pages(struct list_head *l) { return 0; } |
28 | static inline int migrate_pages(struct list_head *l, new_page_t x, | 29 | static inline int migrate_pages(struct list_head *l, new_page_t x, |
29 | unsigned long private) { return -ENOSYS; } | 30 | unsigned long private, int offlining) { return -ENOSYS; } |
30 | |||
31 | static inline int migrate_pages_to(struct list_head *pagelist, | ||
32 | struct vm_area_struct *vma, int dest) { return 0; } | ||
33 | 31 | ||
34 | static inline int migrate_prep(void) { return -ENOSYS; } | 32 | static inline int migrate_prep(void) { return -ENOSYS; } |
35 | 33 | ||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/wait.h> | 29 | #include <linux/wait.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/rbtree.h> | 31 | #include <linux/rbtree.h> |
32 | #include <linux/memory.h> | ||
32 | #include <linux/mmu_notifier.h> | 33 | #include <linux/mmu_notifier.h> |
33 | #include <linux/swap.h> | 34 | #include <linux/swap.h> |
34 | #include <linux/ksm.h> | 35 | #include <linux/ksm.h> |
@@ -108,14 +109,14 @@ struct ksm_scan { | |||
108 | 109 | ||
109 | /** | 110 | /** |
110 | * struct stable_node - node of the stable rbtree | 111 | * struct stable_node - node of the stable rbtree |
111 | * @page: pointer to struct page of the ksm page | ||
112 | * @node: rb node of this ksm page in the stable tree | 112 | * @node: rb node of this ksm page in the stable tree |
113 | * @hlist: hlist head of rmap_items using this ksm page | 113 | * @hlist: hlist head of rmap_items using this ksm page |
114 | * @kpfn: page frame number of this ksm page | ||
114 | */ | 115 | */ |
115 | struct stable_node { | 116 | struct stable_node { |
116 | struct page *page; | ||
117 | struct rb_node node; | 117 | struct rb_node node; |
118 | struct hlist_head hlist; | 118 | struct hlist_head hlist; |
119 | unsigned long kpfn; | ||
119 | }; | 120 | }; |
120 | 121 | ||
121 | /** | 122 | /** |
@@ -515,7 +516,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node) | |||
515 | struct page *page; | 516 | struct page *page; |
516 | void *expected_mapping; | 517 | void *expected_mapping; |
517 | 518 | ||
518 | page = stable_node->page; | 519 | page = pfn_to_page(stable_node->kpfn); |
519 | expected_mapping = (void *)stable_node + | 520 | expected_mapping = (void *)stable_node + |
520 | (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); | 521 | (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); |
521 | rcu_read_lock(); | 522 | rcu_read_lock(); |
@@ -973,7 +974,7 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item, | |||
973 | * This function returns the stable tree node of identical content if found, | 974 | * This function returns the stable tree node of identical content if found, |
974 | * NULL otherwise. | 975 | * NULL otherwise. |
975 | */ | 976 | */ |
976 | static struct stable_node *stable_tree_search(struct page *page) | 977 | static struct page *stable_tree_search(struct page *page) |
977 | { | 978 | { |
978 | struct rb_node *node = root_stable_tree.rb_node; | 979 | struct rb_node *node = root_stable_tree.rb_node; |
979 | struct stable_node *stable_node; | 980 | struct stable_node *stable_node; |
@@ -981,7 +982,7 @@ static struct stable_node *stable_tree_search(struct page *page) | |||
981 | stable_node = page_stable_node(page); | 982 | stable_node = page_stable_node(page); |
982 | if (stable_node) { /* ksm page forked */ | 983 | if (stable_node) { /* ksm page forked */ |
983 | get_page(page); | 984 | get_page(page); |
984 | return stable_node; | 985 | return page; |
985 | } | 986 | } |
986 | 987 | ||
987 | while (node) { | 988 | while (node) { |
@@ -1003,7 +1004,7 @@ static struct stable_node *stable_tree_search(struct page *page) | |||
1003 | put_page(tree_page); | 1004 | put_page(tree_page); |
1004 | node = node->rb_right; | 1005 | node = node->rb_right; |
1005 | } else | 1006 | } else |
1006 | return stable_node; | 1007 | return tree_page; |
1007 | } | 1008 | } |
1008 | 1009 | ||
1009 | return NULL; | 1010 | return NULL; |
@@ -1059,7 +1060,7 @@ static struct stable_node *stable_tree_insert(struct page *kpage) | |||
1059 | 1060 | ||
1060 | INIT_HLIST_HEAD(&stable_node->hlist); | 1061 | INIT_HLIST_HEAD(&stable_node->hlist); |
1061 | 1062 | ||
1062 | stable_node->page = kpage; | 1063 | stable_node->kpfn = page_to_pfn(kpage); |
1063 | set_page_stable_node(kpage, stable_node); | 1064 | set_page_stable_node(kpage, stable_node); |
1064 | 1065 | ||
1065 | return stable_node; | 1066 | return stable_node; |
@@ -1170,9 +1171,8 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) | |||
1170 | remove_rmap_item_from_tree(rmap_item); | 1171 | remove_rmap_item_from_tree(rmap_item); |
1171 | 1172 | ||
1172 | /* We first start with searching the page inside the stable tree */ | 1173 | /* We first start with searching the page inside the stable tree */ |
1173 | stable_node = stable_tree_search(page); | 1174 | kpage = stable_tree_search(page); |
1174 | if (stable_node) { | 1175 | if (kpage) { |
1175 | kpage = stable_node->page; | ||
1176 | err = try_to_merge_with_ksm_page(rmap_item, page, kpage); | 1176 | err = try_to_merge_with_ksm_page(rmap_item, page, kpage); |
1177 | if (!err) { | 1177 | if (!err) { |
1178 | /* | 1178 | /* |
@@ -1180,7 +1180,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) | |||
1180 | * add its rmap_item to the stable tree. | 1180 | * add its rmap_item to the stable tree. |
1181 | */ | 1181 | */ |
1182 | lock_page(kpage); | 1182 | lock_page(kpage); |
1183 | stable_tree_append(rmap_item, stable_node); | 1183 | stable_tree_append(rmap_item, page_stable_node(kpage)); |
1184 | unlock_page(kpage); | 1184 | unlock_page(kpage); |
1185 | } | 1185 | } |
1186 | put_page(kpage); | 1186 | put_page(kpage); |
@@ -1715,12 +1715,63 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage) | |||
1715 | 1715 | ||
1716 | stable_node = page_stable_node(newpage); | 1716 | stable_node = page_stable_node(newpage); |
1717 | if (stable_node) { | 1717 | if (stable_node) { |
1718 | VM_BUG_ON(stable_node->page != oldpage); | 1718 | VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage)); |
1719 | stable_node->page = newpage; | 1719 | stable_node->kpfn = page_to_pfn(newpage); |
1720 | } | 1720 | } |
1721 | } | 1721 | } |
1722 | #endif /* CONFIG_MIGRATION */ | 1722 | #endif /* CONFIG_MIGRATION */ |
1723 | 1723 | ||
1724 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
1725 | static struct stable_node *ksm_check_stable_tree(unsigned long start_pfn, | ||
1726 | unsigned long end_pfn) | ||
1727 | { | ||
1728 | struct rb_node *node; | ||
1729 | |||
1730 | for (node = rb_first(&root_stable_tree); node; node = rb_next(node)) { | ||
1731 | struct stable_node *stable_node; | ||
1732 | |||
1733 | stable_node = rb_entry(node, struct stable_node, node); | ||
1734 | if (stable_node->kpfn >= start_pfn && | ||
1735 | stable_node->kpfn < end_pfn) | ||
1736 | return stable_node; | ||
1737 | } | ||
1738 | return NULL; | ||
1739 | } | ||
1740 | |||
1741 | static int ksm_memory_callback(struct notifier_block *self, | ||
1742 | unsigned long action, void *arg) | ||
1743 | { | ||
1744 | struct memory_notify *mn = arg; | ||
1745 | struct stable_node *stable_node; | ||
1746 | |||
1747 | switch (action) { | ||
1748 | case MEM_GOING_OFFLINE: | ||
1749 | /* | ||
1750 | * Keep it very simple for now: just lock out ksmd and | ||
1751 | * MADV_UNMERGEABLE while any memory is going offline. | ||
1752 | */ | ||
1753 | mutex_lock(&ksm_thread_mutex); | ||
1754 | break; | ||
1755 | |||
1756 | case MEM_OFFLINE: | ||
1757 | /* | ||
1758 | * Most of the work is done by page migration; but there might | ||
1759 | * be a few stable_nodes left over, still pointing to struct | ||
1760 | * pages which have been offlined: prune those from the tree. | ||
1761 | */ | ||
1762 | while ((stable_node = ksm_check_stable_tree(mn->start_pfn, | ||
1763 | mn->start_pfn + mn->nr_pages)) != NULL) | ||
1764 | remove_node_from_stable_tree(stable_node); | ||
1765 | /* fallthrough */ | ||
1766 | |||
1767 | case MEM_CANCEL_OFFLINE: | ||
1768 | mutex_unlock(&ksm_thread_mutex); | ||
1769 | break; | ||
1770 | } | ||
1771 | return NOTIFY_OK; | ||
1772 | } | ||
1773 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | ||
1774 | |||
1724 | #ifdef CONFIG_SYSFS | 1775 | #ifdef CONFIG_SYSFS |
1725 | /* | 1776 | /* |
1726 | * This all compiles without CONFIG_SYSFS, but is a waste of space. | 1777 | * This all compiles without CONFIG_SYSFS, but is a waste of space. |
@@ -1946,6 +1997,13 @@ static int __init ksm_init(void) | |||
1946 | 1997 | ||
1947 | #endif /* CONFIG_SYSFS */ | 1998 | #endif /* CONFIG_SYSFS */ |
1948 | 1999 | ||
2000 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
2001 | /* | ||
2002 | * Choose a high priority since the callback takes ksm_thread_mutex: | ||
2003 | * later callbacks could only be taking locks which nest within that. | ||
2004 | */ | ||
2005 | hotplug_memory_notifier(ksm_memory_callback, 100); | ||
2006 | #endif | ||
1949 | return 0; | 2007 | return 0; |
1950 | 2008 | ||
1951 | out_free2: | 2009 | out_free2: |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index bc5a08138f1..67e941d7882 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -698,7 +698,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
698 | if (list_empty(&source)) | 698 | if (list_empty(&source)) |
699 | goto out; | 699 | goto out; |
700 | /* this function returns # of failed pages */ | 700 | /* this function returns # of failed pages */ |
701 | ret = migrate_pages(&source, hotremove_migrate_alloc, 0); | 701 | ret = migrate_pages(&source, hotremove_migrate_alloc, 0, 1); |
702 | 702 | ||
703 | out: | 703 | out: |
704 | return ret; | 704 | return ret; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f11fdad0620..290fb5bf044 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -85,6 +85,7 @@ | |||
85 | #include <linux/seq_file.h> | 85 | #include <linux/seq_file.h> |
86 | #include <linux/proc_fs.h> | 86 | #include <linux/proc_fs.h> |
87 | #include <linux/migrate.h> | 87 | #include <linux/migrate.h> |
88 | #include <linux/ksm.h> | ||
88 | #include <linux/rmap.h> | 89 | #include <linux/rmap.h> |
89 | #include <linux/security.h> | 90 | #include <linux/security.h> |
90 | #include <linux/syscalls.h> | 91 | #include <linux/syscalls.h> |
@@ -413,17 +414,11 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
413 | if (!page) | 414 | if (!page) |
414 | continue; | 415 | continue; |
415 | /* | 416 | /* |
416 | * The check for PageReserved here is important to avoid | 417 | * vm_normal_page() filters out zero pages, but there might |
417 | * handling zero pages and other pages that may have been | 418 | * still be PageReserved pages to skip, perhaps in a VDSO. |
418 | * marked special by the system. | 419 | * And we cannot move PageKsm pages sensibly or safely yet. |
419 | * | ||
420 | * If the PageReserved would not be checked here then f.e. | ||
421 | * the location of the zero page could have an influence | ||
422 | * on MPOL_MF_STRICT, zero pages would be counted for | ||
423 | * the per node stats, and there would be useless attempts | ||
424 | * to put zero pages on the migration list. | ||
425 | */ | 420 | */ |
426 | if (PageReserved(page)) | 421 | if (PageReserved(page) || PageKsm(page)) |
427 | continue; | 422 | continue; |
428 | nid = page_to_nid(page); | 423 | nid = page_to_nid(page); |
429 | if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) | 424 | if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) |
@@ -839,7 +834,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, | |||
839 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); | 834 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); |
840 | 835 | ||
841 | if (!list_empty(&pagelist)) | 836 | if (!list_empty(&pagelist)) |
842 | err = migrate_pages(&pagelist, new_node_page, dest); | 837 | err = migrate_pages(&pagelist, new_node_page, dest, 0); |
843 | 838 | ||
844 | return err; | 839 | return err; |
845 | } | 840 | } |
@@ -1056,7 +1051,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1056 | 1051 | ||
1057 | if (!list_empty(&pagelist)) | 1052 | if (!list_empty(&pagelist)) |
1058 | nr_failed = migrate_pages(&pagelist, new_vma_page, | 1053 | nr_failed = migrate_pages(&pagelist, new_vma_page, |
1059 | (unsigned long)vma); | 1054 | (unsigned long)vma, 0); |
1060 | 1055 | ||
1061 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) | 1056 | if (!err && nr_failed && (flags & MPOL_MF_STRICT)) |
1062 | err = -EIO; | 1057 | err = -EIO; |
diff --git a/mm/migrate.c b/mm/migrate.c index 0b714747c02..2a0ea3ef509 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -543,7 +543,7 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
543 | * to the newly allocated page in newpage. | 543 | * to the newly allocated page in newpage. |
544 | */ | 544 | */ |
545 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, | 545 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, |
546 | struct page *page, int force) | 546 | struct page *page, int force, int offlining) |
547 | { | 547 | { |
548 | int rc = 0; | 548 | int rc = 0; |
549 | int *result = NULL; | 549 | int *result = NULL; |
@@ -569,6 +569,20 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
569 | lock_page(page); | 569 | lock_page(page); |
570 | } | 570 | } |
571 | 571 | ||
572 | /* | ||
573 | * Only memory hotplug's offline_pages() caller has locked out KSM, | ||
574 | * and can safely migrate a KSM page. The other cases have skipped | ||
575 | * PageKsm along with PageReserved - but it is only now when we have | ||
576 | * the page lock that we can be certain it will not go KSM beneath us | ||
577 | * (KSM will not upgrade a page from PageAnon to PageKsm when it sees | ||
578 | * its pagecount raised, but only here do we take the page lock which | ||
579 | * serializes that). | ||
580 | */ | ||
581 | if (PageKsm(page) && !offlining) { | ||
582 | rc = -EBUSY; | ||
583 | goto unlock; | ||
584 | } | ||
585 | |||
572 | /* charge against new page */ | 586 | /* charge against new page */ |
573 | charge = mem_cgroup_prepare_migration(page, &mem); | 587 | charge = mem_cgroup_prepare_migration(page, &mem); |
574 | if (charge == -ENOMEM) { | 588 | if (charge == -ENOMEM) { |
@@ -685,7 +699,7 @@ move_newpage: | |||
685 | * Return: Number of pages not migrated or error code. | 699 | * Return: Number of pages not migrated or error code. |
686 | */ | 700 | */ |
687 | int migrate_pages(struct list_head *from, | 701 | int migrate_pages(struct list_head *from, |
688 | new_page_t get_new_page, unsigned long private) | 702 | new_page_t get_new_page, unsigned long private, int offlining) |
689 | { | 703 | { |
690 | int retry = 1; | 704 | int retry = 1; |
691 | int nr_failed = 0; | 705 | int nr_failed = 0; |
@@ -705,7 +719,7 @@ int migrate_pages(struct list_head *from, | |||
705 | cond_resched(); | 719 | cond_resched(); |
706 | 720 | ||
707 | rc = unmap_and_move(get_new_page, private, | 721 | rc = unmap_and_move(get_new_page, private, |
708 | page, pass > 2); | 722 | page, pass > 2, offlining); |
709 | 723 | ||
710 | switch(rc) { | 724 | switch(rc) { |
711 | case -ENOMEM: | 725 | case -ENOMEM: |
@@ -801,7 +815,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
801 | if (!page) | 815 | if (!page) |
802 | goto set_status; | 816 | goto set_status; |
803 | 817 | ||
804 | if (PageReserved(page)) /* Check for zero page */ | 818 | /* Use PageReserved to check for zero page */ |
819 | if (PageReserved(page) || PageKsm(page)) | ||
805 | goto put_and_set; | 820 | goto put_and_set; |
806 | 821 | ||
807 | pp->page = page; | 822 | pp->page = page; |
@@ -838,7 +853,7 @@ set_status: | |||
838 | err = 0; | 853 | err = 0; |
839 | if (!list_empty(&pagelist)) | 854 | if (!list_empty(&pagelist)) |
840 | err = migrate_pages(&pagelist, new_page_node, | 855 | err = migrate_pages(&pagelist, new_page_node, |
841 | (unsigned long)pm); | 856 | (unsigned long)pm, 0); |
842 | 857 | ||
843 | up_read(&mm->mmap_sem); | 858 | up_read(&mm->mmap_sem); |
844 | return err; | 859 | return err; |
@@ -959,7 +974,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, | |||
959 | 974 | ||
960 | err = -ENOENT; | 975 | err = -ENOENT; |
961 | /* Use PageReserved to check for zero page */ | 976 | /* Use PageReserved to check for zero page */ |
962 | if (!page || PageReserved(page)) | 977 | if (!page || PageReserved(page) || PageKsm(page)) |
963 | goto set_status; | 978 | goto set_status; |
964 | 979 | ||
965 | err = page_to_nid(page); | 980 | err = page_to_nid(page); |