summaryrefslogtreecommitdiffstats
path: root/mm/ksm.c
diff options
context:
space:
mode:
authorZhou Chengming <zhouchengming1@huawei.com>2016-05-12 18:42:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-12 18:52:50 -0400
commit7496fea9a6bf644afe360af795b121a77635b37d (patch)
treee4cdf6f223bef8091167650803a1468194e92d5f /mm/ksm.c
parentc25a1e0671fbca7b2c0d0757d533bd2650d6dc0c (diff)
ksm: fix conflict between mmput and scan_get_next_rmap_item
A concurrency issue about KSM in the function scan_get_next_rmap_item. task A (ksmd): |task B (the mm's task): | mm = slot->mm; | down_read(&mm->mmap_sem); | | ... | | spin_lock(&ksm_mmlist_lock); | | ksm_scan.mm_slot go to the next slot; | | spin_unlock(&ksm_mmlist_lock); | |mmput() -> | ksm_exit(): | |spin_lock(&ksm_mmlist_lock); |if (mm_slot && ksm_scan.mm_slot != mm_slot) { | if (!mm_slot->rmap_list) { | easy_to_free = 1; | ... | |if (easy_to_free) { | mmdrop(mm); | ... | |So this mm_struct may be freed in the mmput(). | up_read(&mm->mmap_sem); | As we can see above, the ksmd thread may access a mm_struct that already been freed to the kmem_cache. Suppose a fork will get this mm_struct from the kmem_cache, the ksmd thread then call up_read(&mm->mmap_sem), will cause mmap_sem.count to become -1. As suggested by Andrea Arcangeli, unmerge_and_remove_all_rmap_items has the same SMP race condition, so fix it too. My prev fix in function scan_get_next_rmap_item will introduce a different SMP race condition, so just invert the up_read/spin_unlock order as Andrea Arcangeli said. Link: http://lkml.kernel.org/r/1462708815-31301-1-git-send-email-zhouchengming1@huawei.com Signed-off-by: Zhou Chengming <zhouchengming1@huawei.com> Suggested-by: Andrea Arcangeli <aarcange@redhat.com> Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Geliang Tang <geliangtang@163.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Hanjun Guo <guohanjun@huawei.com> Cc: Ding Tianhong <dingtianhong@huawei.com> Cc: Li Bin <huawei.libin@huawei.com> Cc: Zhen Lei <thunder.leizhen@huawei.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/ksm.c')
-rw-r--r--mm/ksm.c15
1 files changed, 10 insertions, 5 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index b99e828172f6..4786b4150f62 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -783,6 +783,7 @@ static int unmerge_and_remove_all_rmap_items(void)
783 } 783 }
784 784
785 remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list); 785 remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
786 up_read(&mm->mmap_sem);
786 787
787 spin_lock(&ksm_mmlist_lock); 788 spin_lock(&ksm_mmlist_lock);
788 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next, 789 ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
@@ -794,12 +795,9 @@ static int unmerge_and_remove_all_rmap_items(void)
794 795
795 free_mm_slot(mm_slot); 796 free_mm_slot(mm_slot);
796 clear_bit(MMF_VM_MERGEABLE, &mm->flags); 797 clear_bit(MMF_VM_MERGEABLE, &mm->flags);
797 up_read(&mm->mmap_sem);
798 mmdrop(mm); 798 mmdrop(mm);
799 } else { 799 } else
800 spin_unlock(&ksm_mmlist_lock); 800 spin_unlock(&ksm_mmlist_lock);
801 up_read(&mm->mmap_sem);
802 }
803 } 801 }
804 802
805 /* Clean up stable nodes, but don't worry if some are still busy */ 803 /* Clean up stable nodes, but don't worry if some are still busy */
@@ -1663,8 +1661,15 @@ next_mm:
1663 up_read(&mm->mmap_sem); 1661 up_read(&mm->mmap_sem);
1664 mmdrop(mm); 1662 mmdrop(mm);
1665 } else { 1663 } else {
1666 spin_unlock(&ksm_mmlist_lock);
1667 up_read(&mm->mmap_sem); 1664 up_read(&mm->mmap_sem);
1665 /*
1666 * up_read(&mm->mmap_sem) first because after
1667 * spin_unlock(&ksm_mmlist_lock) run, the "mm" may
1668 * already have been freed under us by __ksm_exit()
1669 * because the "mm_slot" is still hashed and
1670 * ksm_scan.mm_slot doesn't point to it anymore.
1671 */
1672 spin_unlock(&ksm_mmlist_lock);
1668 } 1673 }
1669 1674
1670 /* Repeat until we've completed scanning the whole list */ 1675 /* Repeat until we've completed scanning the whole list */