diff options
author | Zhou Chengming <zhouchengming1@huawei.com> | 2016-05-12 18:42:21 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-12 18:52:50 -0400 |
commit | 7496fea9a6bf644afe360af795b121a77635b37d (patch) | |
tree | e4cdf6f223bef8091167650803a1468194e92d5f /mm/ksm.c | |
parent | c25a1e0671fbca7b2c0d0757d533bd2650d6dc0c (diff) |
ksm: fix conflict between mmput and scan_get_next_rmap_item
A concurrency issue about KSM in the function scan_get_next_rmap_item.
task A (ksmd): |task B (the mm's task):
|
mm = slot->mm; |
down_read(&mm->mmap_sem); |
|
... |
|
spin_lock(&ksm_mmlist_lock); |
|
ksm_scan.mm_slot go to the next slot; |
|
spin_unlock(&ksm_mmlist_lock); |
|mmput() ->
| ksm_exit():
|
|spin_lock(&ksm_mmlist_lock);
|if (mm_slot && ksm_scan.mm_slot != mm_slot) {
| if (!mm_slot->rmap_list) {
| easy_to_free = 1;
| ...
|
|if (easy_to_free) {
| mmdrop(mm);
| ...
|
|So this mm_struct may be freed in the mmput().
|
up_read(&mm->mmap_sem); |
As we can see above, the ksmd thread may access a mm_struct that already
been freed to the kmem_cache. Suppose a fork will get this mm_struct from
the kmem_cache, the ksmd thread then call up_read(&mm->mmap_sem), will
cause mmap_sem.count to become -1.
As suggested by Andrea Arcangeli, unmerge_and_remove_all_rmap_items has
the same SMP race condition, so fix it too. My prev fix in function
scan_get_next_rmap_item will introduce a different SMP race condition, so
just invert the up_read/spin_unlock order as Andrea Arcangeli said.
Link: http://lkml.kernel.org/r/1462708815-31301-1-git-send-email-zhouchengming1@huawei.com
Signed-off-by: Zhou Chengming <zhouchengming1@huawei.com>
Suggested-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Geliang Tang <geliangtang@163.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Ding Tianhong <dingtianhong@huawei.com>
Cc: Li Bin <huawei.libin@huawei.com>
Cc: Zhen Lei <thunder.leizhen@huawei.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/ksm.c')
-rw-r--r-- | mm/ksm.c | 15 |
1 files changed, 10 insertions, 5 deletions
@@ -783,6 +783,7 @@ static int unmerge_and_remove_all_rmap_items(void) | |||
783 | } | 783 | } |
784 | 784 | ||
785 | remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list); | 785 | remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list); |
786 | up_read(&mm->mmap_sem); | ||
786 | 787 | ||
787 | spin_lock(&ksm_mmlist_lock); | 788 | spin_lock(&ksm_mmlist_lock); |
788 | ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next, | 789 | ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next, |
@@ -794,12 +795,9 @@ static int unmerge_and_remove_all_rmap_items(void) | |||
794 | 795 | ||
795 | free_mm_slot(mm_slot); | 796 | free_mm_slot(mm_slot); |
796 | clear_bit(MMF_VM_MERGEABLE, &mm->flags); | 797 | clear_bit(MMF_VM_MERGEABLE, &mm->flags); |
797 | up_read(&mm->mmap_sem); | ||
798 | mmdrop(mm); | 798 | mmdrop(mm); |
799 | } else { | 799 | } else |
800 | spin_unlock(&ksm_mmlist_lock); | 800 | spin_unlock(&ksm_mmlist_lock); |
801 | up_read(&mm->mmap_sem); | ||
802 | } | ||
803 | } | 801 | } |
804 | 802 | ||
805 | /* Clean up stable nodes, but don't worry if some are still busy */ | 803 | /* Clean up stable nodes, but don't worry if some are still busy */ |
@@ -1663,8 +1661,15 @@ next_mm: | |||
1663 | up_read(&mm->mmap_sem); | 1661 | up_read(&mm->mmap_sem); |
1664 | mmdrop(mm); | 1662 | mmdrop(mm); |
1665 | } else { | 1663 | } else { |
1666 | spin_unlock(&ksm_mmlist_lock); | ||
1667 | up_read(&mm->mmap_sem); | 1664 | up_read(&mm->mmap_sem); |
1665 | /* | ||
1666 | * up_read(&mm->mmap_sem) first because after | ||
1667 | * spin_unlock(&ksm_mmlist_lock) run, the "mm" may | ||
1668 | * already have been freed under us by __ksm_exit() | ||
1669 | * because the "mm_slot" is still hashed and | ||
1670 | * ksm_scan.mm_slot doesn't point to it anymore. | ||
1671 | */ | ||
1672 | spin_unlock(&ksm_mmlist_lock); | ||
1668 | } | 1673 | } |
1669 | 1674 | ||
1670 | /* Repeat until we've completed scanning the whole list */ | 1675 | /* Repeat until we've completed scanning the whole list */ |