aboutsummaryrefslogtreecommitdiffstats
path: root/mm/ksm.c
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2013-02-22 19:35:08 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:19 -0500
commitcbf86cfe04a66471f23b9e62e5eba4e525f38855 (patch)
tree5572b238db52f238e3291354a4bba161f4495627 /mm/ksm.c
parent8aafa6a485ae77ce4a49eb1280f3d2c6074a03fb (diff)
ksm: remove old stable nodes more thoroughly
Switching merge_across_nodes after running KSM is liable to oops on stale nodes still left over from the previous stable tree. It's not something that people will often want to do, but it would be lame to demand a reboot when they're trying to determine which merge_across_nodes setting is best. How can this happen? We only permit switching merge_across_nodes when pages_shared is 0, and usually set run 2 to force that beforehand, which ought to unmerge everything: yet oopses still occur when you then run 1. Three causes: 1. The old stable tree (built according to the inverse merge_across_nodes) has not been fully torn down. A stable node lingers until get_ksm_page() notices that the page it references no longer references it: but the page is not necessarily freed as soon as expected, particularly when swapcache. Fix this with a pass through the old stable tree, applying get_ksm_page() to each of the remaining nodes (most found stale and removed immediately), with forced removal of any left over. Unless the page is still mapped: I've not seen that case, it shouldn't occur, but better to WARN_ON_ONCE and EBUSY than BUG. 2. __ksm_enter() has a nice little optimization, to insert the new mm just behind ksmd's cursor, so there's a full pass for it to stabilize (or be removed) before ksmd addresses it. Nice when ksmd is running, but not so nice when we're trying to unmerge all mms: we were missing those mms forked and inserted behind the unmerge cursor. Easily fixed by inserting at the end when KSM_RUN_UNMERGE. 3. It is possible for a KSM page to be faulted back from swapcache into an mm, just after unmerge_and_remove_all_rmap_items() scanned past it. Fix this by copying on fault when KSM_RUN_UNMERGE: but that is private to ksm.c, so dissolve the distinction between ksm_might_need_to_copy() and ksm_does_need_to_copy(), doing it all in the one call into ksm.c. A long outstanding, unrelated bugfix sneaks in with that third fix: ksm_does_need_to_copy() would copy from a !PageUptodate page (implying I/O error when read in from swap) to a page which it then marks Uptodate. Fix this case by not copying, letting do_swap_page() discover the error. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Petr Holasek <pholasek@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Izik Eidus <izik.eidus@ravellosystems.com> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Acked-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/ksm.c')
-rw-r--r--mm/ksm.c83
1 files changed, 79 insertions, 4 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index e02430fb26f6..4c22cdff02ad 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -644,6 +644,57 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
644/* 644/*
645 * Only called through the sysfs control interface: 645 * Only called through the sysfs control interface:
646 */ 646 */
647static int remove_stable_node(struct stable_node *stable_node)
648{
649 struct page *page;
650 int err;
651
652 page = get_ksm_page(stable_node, true);
653 if (!page) {
654 /*
655 * get_ksm_page did remove_node_from_stable_tree itself.
656 */
657 return 0;
658 }
659
660 if (WARN_ON_ONCE(page_mapped(page)))
661 err = -EBUSY;
662 else {
663 /*
664 * This page might be in a pagevec waiting to be freed,
665 * or it might be PageSwapCache (perhaps under writeback),
666 * or it might have been removed from swapcache a moment ago.
667 */
668 set_page_stable_node(page, NULL);
669 remove_node_from_stable_tree(stable_node);
670 err = 0;
671 }
672
673 unlock_page(page);
674 put_page(page);
675 return err;
676}
677
678static int remove_all_stable_nodes(void)
679{
680 struct stable_node *stable_node;
681 int nid;
682 int err = 0;
683
684 for (nid = 0; nid < nr_node_ids; nid++) {
685 while (root_stable_tree[nid].rb_node) {
686 stable_node = rb_entry(root_stable_tree[nid].rb_node,
687 struct stable_node, node);
688 if (remove_stable_node(stable_node)) {
689 err = -EBUSY;
690 break; /* proceed to next nid */
691 }
692 cond_resched();
693 }
694 }
695 return err;
696}
697
647static int unmerge_and_remove_all_rmap_items(void) 698static int unmerge_and_remove_all_rmap_items(void)
648{ 699{
649 struct mm_slot *mm_slot; 700 struct mm_slot *mm_slot;
@@ -691,6 +742,8 @@ static int unmerge_and_remove_all_rmap_items(void)
691 } 742 }
692 } 743 }
693 744
745 /* Clean up stable nodes, but don't worry if some are still busy */
746 remove_all_stable_nodes();
694 ksm_scan.seqnr = 0; 747 ksm_scan.seqnr = 0;
695 return 0; 748 return 0;
696 749
@@ -1586,11 +1639,19 @@ int __ksm_enter(struct mm_struct *mm)
1586 spin_lock(&ksm_mmlist_lock); 1639 spin_lock(&ksm_mmlist_lock);
1587 insert_to_mm_slots_hash(mm, mm_slot); 1640 insert_to_mm_slots_hash(mm, mm_slot);
1588 /* 1641 /*
1589 * Insert just behind the scanning cursor, to let the area settle 1642 * When KSM_RUN_MERGE (or KSM_RUN_STOP),
1643 * insert just behind the scanning cursor, to let the area settle
1590 * down a little; when fork is followed by immediate exec, we don't 1644 * down a little; when fork is followed by immediate exec, we don't
1591 * want ksmd to waste time setting up and tearing down an rmap_list. 1645 * want ksmd to waste time setting up and tearing down an rmap_list.
1646 *
1647 * But when KSM_RUN_UNMERGE, it's important to insert ahead of its
1648 * scanning cursor, otherwise KSM pages in newly forked mms will be
1649 * missed: then we might as well insert at the end of the list.
1592 */ 1650 */
1593 list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list); 1651 if (ksm_run & KSM_RUN_UNMERGE)
1652 list_add_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list);
1653 else
1654 list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
1594 spin_unlock(&ksm_mmlist_lock); 1655 spin_unlock(&ksm_mmlist_lock);
1595 1656
1596 set_bit(MMF_VM_MERGEABLE, &mm->flags); 1657 set_bit(MMF_VM_MERGEABLE, &mm->flags);
@@ -1640,11 +1701,25 @@ void __ksm_exit(struct mm_struct *mm)
1640 } 1701 }
1641} 1702}
1642 1703
1643struct page *ksm_does_need_to_copy(struct page *page, 1704struct page *ksm_might_need_to_copy(struct page *page,
1644 struct vm_area_struct *vma, unsigned long address) 1705 struct vm_area_struct *vma, unsigned long address)
1645{ 1706{
1707 struct anon_vma *anon_vma = page_anon_vma(page);
1646 struct page *new_page; 1708 struct page *new_page;
1647 1709
1710 if (PageKsm(page)) {
1711 if (page_stable_node(page) &&
1712 !(ksm_run & KSM_RUN_UNMERGE))
1713 return page; /* no need to copy it */
1714 } else if (!anon_vma) {
1715 return page; /* no need to copy it */
1716 } else if (anon_vma->root == vma->anon_vma->root &&
1717 page->index == linear_page_index(vma, address)) {
1718 return page; /* still no need to copy it */
1719 }
1720 if (!PageUptodate(page))
1721 return page; /* let do_swap_page report the error */
1722
1648 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 1723 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1649 if (new_page) { 1724 if (new_page) {
1650 copy_user_highpage(new_page, page, address, vma); 1725 copy_user_highpage(new_page, page, address, vma);
@@ -2024,7 +2099,7 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
2024 2099
2025 mutex_lock(&ksm_thread_mutex); 2100 mutex_lock(&ksm_thread_mutex);
2026 if (ksm_merge_across_nodes != knob) { 2101 if (ksm_merge_across_nodes != knob) {
2027 if (ksm_pages_shared) 2102 if (ksm_pages_shared || remove_all_stable_nodes())
2028 err = -EBUSY; 2103 err = -EBUSY;
2029 else 2104 else
2030 ksm_merge_across_nodes = knob; 2105 ksm_merge_across_nodes = knob;