aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2010-09-09 19:37:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-09-09 21:57:24 -0400
commit4969c1192d15afa3389e7ae3302096ff684ba655 (patch)
treeabe560c8f293191be65488c49f4db3f3a626e63c
parent7c5367f205f7d53659fb19b9fdf65b7bc1a592c6 (diff)
mm: fix swapin race condition
The pte_same check is reliable only if the swap entry remains pinned (by the page lock on swapcache). We've also to ensure the swapcache isn't removed before we take the lock as try_to_free_swap won't care about the page pin. One of the possible impacts of this patch is that a KSM-shared page can point to the anon_vma of another process, which could exit before the page is freed. This can leave a page with a pointer to a recycled anon_vma object, or worse, a pointer to something that is no longer an anon_vma. [riel@redhat.com: changelog help] Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Hugh Dickins <hughd@google.com> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: <stable@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/ksm.h20
-rw-r--r--mm/ksm.c3
-rw-r--r--mm/memory.c39
3 files changed, 43 insertions, 19 deletions
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 74d691ee9121..3319a6967626 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -16,6 +16,9 @@
16struct stable_node; 16struct stable_node;
17struct mem_cgroup; 17struct mem_cgroup;
18 18
19struct page *ksm_does_need_to_copy(struct page *page,
20 struct vm_area_struct *vma, unsigned long address);
21
19#ifdef CONFIG_KSM 22#ifdef CONFIG_KSM
20int ksm_madvise(struct vm_area_struct *vma, unsigned long start, 23int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
21 unsigned long end, int advice, unsigned long *vm_flags); 24 unsigned long end, int advice, unsigned long *vm_flags);
@@ -70,19 +73,14 @@ static inline void set_page_stable_node(struct page *page,
70 * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, 73 * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
71 * but what if the vma was unmerged while the page was swapped out? 74 * but what if the vma was unmerged while the page was swapped out?
72 */ 75 */
73struct page *ksm_does_need_to_copy(struct page *page, 76static inline int ksm_might_need_to_copy(struct page *page,
74 struct vm_area_struct *vma, unsigned long address);
75static inline struct page *ksm_might_need_to_copy(struct page *page,
76 struct vm_area_struct *vma, unsigned long address) 77 struct vm_area_struct *vma, unsigned long address)
77{ 78{
78 struct anon_vma *anon_vma = page_anon_vma(page); 79 struct anon_vma *anon_vma = page_anon_vma(page);
79 80
80 if (!anon_vma || 81 return anon_vma &&
81 (anon_vma->root == vma->anon_vma->root && 82 (anon_vma->root != vma->anon_vma->root ||
82 page->index == linear_page_index(vma, address))) 83 page->index != linear_page_index(vma, address));
83 return page;
84
85 return ksm_does_need_to_copy(page, vma, address);
86} 84}
87 85
88int page_referenced_ksm(struct page *page, 86int page_referenced_ksm(struct page *page,
@@ -115,10 +113,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
115 return 0; 113 return 0;
116} 114}
117 115
118static inline struct page *ksm_might_need_to_copy(struct page *page, 116static inline int ksm_might_need_to_copy(struct page *page,
119 struct vm_area_struct *vma, unsigned long address) 117 struct vm_area_struct *vma, unsigned long address)
120{ 118{
121 return page; 119 return 0;
122} 120}
123 121
124static inline int page_referenced_ksm(struct page *page, 122static inline int page_referenced_ksm(struct page *page,
diff --git a/mm/ksm.c b/mm/ksm.c
index e2ae00458320..b1873cf03ed9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1504,8 +1504,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
1504{ 1504{
1505 struct page *new_page; 1505 struct page *new_page;
1506 1506
1507 unlock_page(page); /* any racers will COW it, not modify it */
1508
1509 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 1507 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1510 if (new_page) { 1508 if (new_page) {
1511 copy_user_highpage(new_page, page, address, vma); 1509 copy_user_highpage(new_page, page, address, vma);
@@ -1521,7 +1519,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
1521 add_page_to_unevictable_list(new_page); 1519 add_page_to_unevictable_list(new_page);
1522 } 1520 }
1523 1521
1524 page_cache_release(page);
1525 return new_page; 1522 return new_page;
1526} 1523}
1527 1524
diff --git a/mm/memory.c b/mm/memory.c
index 6b2ab1051851..71b161b73bb5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2623,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2623 unsigned int flags, pte_t orig_pte) 2623 unsigned int flags, pte_t orig_pte)
2624{ 2624{
2625 spinlock_t *ptl; 2625 spinlock_t *ptl;
2626 struct page *page; 2626 struct page *page, *swapcache = NULL;
2627 swp_entry_t entry; 2627 swp_entry_t entry;
2628 pte_t pte; 2628 pte_t pte;
2629 struct mem_cgroup *ptr = NULL; 2629 struct mem_cgroup *ptr = NULL;
@@ -2679,10 +2679,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2679 lock_page(page); 2679 lock_page(page);
2680 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2680 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2681 2681
2682 page = ksm_might_need_to_copy(page, vma, address); 2682 /*
2683 if (!page) { 2683 * Make sure try_to_free_swap didn't release the swapcache
2684 ret = VM_FAULT_OOM; 2684 * from under us. The page pin isn't enough to prevent that.
2685 goto out; 2685 */
2686 if (unlikely(!PageSwapCache(page)))
2687 goto out_page;
2688
2689 if (ksm_might_need_to_copy(page, vma, address)) {
2690 swapcache = page;
2691 page = ksm_does_need_to_copy(page, vma, address);
2692
2693 if (unlikely(!page)) {
2694 ret = VM_FAULT_OOM;
2695 page = swapcache;
2696 swapcache = NULL;
2697 goto out_page;
2698 }
2686 } 2699 }
2687 2700
2688 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { 2701 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
@@ -2735,6 +2748,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2735 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) 2748 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
2736 try_to_free_swap(page); 2749 try_to_free_swap(page);
2737 unlock_page(page); 2750 unlock_page(page);
2751 if (swapcache) {
2752 /*
2753 * Hold the lock to avoid the swap entry to be reused
2754 * until we take the PT lock for the pte_same() check
2755 * (to avoid false positives from pte_same). For
2756 * further safety release the lock after the swap_free
2757 * so that the swap count won't change under a
2758 * parallel locked swapcache.
2759 */
2760 unlock_page(swapcache);
2761 page_cache_release(swapcache);
2762 }
2738 2763
2739 if (flags & FAULT_FLAG_WRITE) { 2764 if (flags & FAULT_FLAG_WRITE) {
2740 ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); 2765 ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
@@ -2756,6 +2781,10 @@ out_page:
2756 unlock_page(page); 2781 unlock_page(page);
2757out_release: 2782out_release:
2758 page_cache_release(page); 2783 page_cache_release(page);
2784 if (swapcache) {
2785 unlock_page(swapcache);
2786 page_cache_release(swapcache);
2787 }
2759 return ret; 2788 return ret;
2760} 2789}
2761 2790