diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2010-09-09 19:37:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-09-09 21:57:24 -0400 |
commit | 4969c1192d15afa3389e7ae3302096ff684ba655 (patch) | |
tree | abe560c8f293191be65488c49f4db3f3a626e63c | |
parent | 7c5367f205f7d53659fb19b9fdf65b7bc1a592c6 (diff) |
mm: fix swapin race condition
The pte_same check is reliable only if the swap entry remains pinned (by
the page lock on swapcache). We've also to ensure the swapcache isn't
removed before we take the lock as try_to_free_swap won't care about the
page pin.
One of the possible impacts of this patch is that a KSM-shared page can
point to the anon_vma of another process, which could exit before the page
is freed.
This can leave a page with a pointer to a recycled anon_vma object, or
worse, a pointer to something that is no longer an anon_vma.
[riel@redhat.com: changelog help]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/ksm.h | 20 | ||||
-rw-r--r-- | mm/ksm.c | 3 | ||||
-rw-r--r-- | mm/memory.c | 39 |
3 files changed, 43 insertions, 19 deletions
diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 74d691ee9121..3319a6967626 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h | |||
@@ -16,6 +16,9 @@ | |||
16 | struct stable_node; | 16 | struct stable_node; |
17 | struct mem_cgroup; | 17 | struct mem_cgroup; |
18 | 18 | ||
19 | struct page *ksm_does_need_to_copy(struct page *page, | ||
20 | struct vm_area_struct *vma, unsigned long address); | ||
21 | |||
19 | #ifdef CONFIG_KSM | 22 | #ifdef CONFIG_KSM |
20 | int ksm_madvise(struct vm_area_struct *vma, unsigned long start, | 23 | int ksm_madvise(struct vm_area_struct *vma, unsigned long start, |
21 | unsigned long end, int advice, unsigned long *vm_flags); | 24 | unsigned long end, int advice, unsigned long *vm_flags); |
@@ -70,19 +73,14 @@ static inline void set_page_stable_node(struct page *page, | |||
70 | * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, | 73 | * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, |
71 | * but what if the vma was unmerged while the page was swapped out? | 74 | * but what if the vma was unmerged while the page was swapped out? |
72 | */ | 75 | */ |
73 | struct page *ksm_does_need_to_copy(struct page *page, | 76 | static inline int ksm_might_need_to_copy(struct page *page, |
74 | struct vm_area_struct *vma, unsigned long address); | ||
75 | static inline struct page *ksm_might_need_to_copy(struct page *page, | ||
76 | struct vm_area_struct *vma, unsigned long address) | 77 | struct vm_area_struct *vma, unsigned long address) |
77 | { | 78 | { |
78 | struct anon_vma *anon_vma = page_anon_vma(page); | 79 | struct anon_vma *anon_vma = page_anon_vma(page); |
79 | 80 | ||
80 | if (!anon_vma || | 81 | return anon_vma && |
81 | (anon_vma->root == vma->anon_vma->root && | 82 | (anon_vma->root != vma->anon_vma->root || |
82 | page->index == linear_page_index(vma, address))) | 83 | page->index != linear_page_index(vma, address)); |
83 | return page; | ||
84 | |||
85 | return ksm_does_need_to_copy(page, vma, address); | ||
86 | } | 84 | } |
87 | 85 | ||
88 | int page_referenced_ksm(struct page *page, | 86 | int page_referenced_ksm(struct page *page, |
@@ -115,10 +113,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, | |||
115 | return 0; | 113 | return 0; |
116 | } | 114 | } |
117 | 115 | ||
118 | static inline struct page *ksm_might_need_to_copy(struct page *page, | 116 | static inline int ksm_might_need_to_copy(struct page *page, |
119 | struct vm_area_struct *vma, unsigned long address) | 117 | struct vm_area_struct *vma, unsigned long address) |
120 | { | 118 | { |
121 | return page; | 119 | return 0; |
122 | } | 120 | } |
123 | 121 | ||
124 | static inline int page_referenced_ksm(struct page *page, | 122 | static inline int page_referenced_ksm(struct page *page, |
@@ -1504,8 +1504,6 @@ struct page *ksm_does_need_to_copy(struct page *page, | |||
1504 | { | 1504 | { |
1505 | struct page *new_page; | 1505 | struct page *new_page; |
1506 | 1506 | ||
1507 | unlock_page(page); /* any racers will COW it, not modify it */ | ||
1508 | |||
1509 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | 1507 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); |
1510 | if (new_page) { | 1508 | if (new_page) { |
1511 | copy_user_highpage(new_page, page, address, vma); | 1509 | copy_user_highpage(new_page, page, address, vma); |
@@ -1521,7 +1519,6 @@ struct page *ksm_does_need_to_copy(struct page *page, | |||
1521 | add_page_to_unevictable_list(new_page); | 1519 | add_page_to_unevictable_list(new_page); |
1522 | } | 1520 | } |
1523 | 1521 | ||
1524 | page_cache_release(page); | ||
1525 | return new_page; | 1522 | return new_page; |
1526 | } | 1523 | } |
1527 | 1524 | ||
diff --git a/mm/memory.c b/mm/memory.c index 6b2ab1051851..71b161b73bb5 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2623,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2623 | unsigned int flags, pte_t orig_pte) | 2623 | unsigned int flags, pte_t orig_pte) |
2624 | { | 2624 | { |
2625 | spinlock_t *ptl; | 2625 | spinlock_t *ptl; |
2626 | struct page *page; | 2626 | struct page *page, *swapcache = NULL; |
2627 | swp_entry_t entry; | 2627 | swp_entry_t entry; |
2628 | pte_t pte; | 2628 | pte_t pte; |
2629 | struct mem_cgroup *ptr = NULL; | 2629 | struct mem_cgroup *ptr = NULL; |
@@ -2679,10 +2679,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2679 | lock_page(page); | 2679 | lock_page(page); |
2680 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2680 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2681 | 2681 | ||
2682 | page = ksm_might_need_to_copy(page, vma, address); | 2682 | /* |
2683 | if (!page) { | 2683 | * Make sure try_to_free_swap didn't release the swapcache |
2684 | ret = VM_FAULT_OOM; | 2684 | * from under us. The page pin isn't enough to prevent that. |
2685 | goto out; | 2685 | */ |
2686 | if (unlikely(!PageSwapCache(page))) | ||
2687 | goto out_page; | ||
2688 | |||
2689 | if (ksm_might_need_to_copy(page, vma, address)) { | ||
2690 | swapcache = page; | ||
2691 | page = ksm_does_need_to_copy(page, vma, address); | ||
2692 | |||
2693 | if (unlikely(!page)) { | ||
2694 | ret = VM_FAULT_OOM; | ||
2695 | page = swapcache; | ||
2696 | swapcache = NULL; | ||
2697 | goto out_page; | ||
2698 | } | ||
2686 | } | 2699 | } |
2687 | 2700 | ||
2688 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { | 2701 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { |
@@ -2735,6 +2748,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2735 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) | 2748 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) |
2736 | try_to_free_swap(page); | 2749 | try_to_free_swap(page); |
2737 | unlock_page(page); | 2750 | unlock_page(page); |
2751 | if (swapcache) { | ||
2752 | /* | ||
2753 | * Hold the lock to avoid the swap entry to be reused | ||
2754 | * until we take the PT lock for the pte_same() check | ||
2755 | * (to avoid false positives from pte_same). For | ||
2756 | * further safety release the lock after the swap_free | ||
2757 | * so that the swap count won't change under a | ||
2758 | * parallel locked swapcache. | ||
2759 | */ | ||
2760 | unlock_page(swapcache); | ||
2761 | page_cache_release(swapcache); | ||
2762 | } | ||
2738 | 2763 | ||
2739 | if (flags & FAULT_FLAG_WRITE) { | 2764 | if (flags & FAULT_FLAG_WRITE) { |
2740 | ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); | 2765 | ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); |
@@ -2756,6 +2781,10 @@ out_page: | |||
2756 | unlock_page(page); | 2781 | unlock_page(page); |
2757 | out_release: | 2782 | out_release: |
2758 | page_cache_release(page); | 2783 | page_cache_release(page); |
2784 | if (swapcache) { | ||
2785 | unlock_page(swapcache); | ||
2786 | page_cache_release(swapcache); | ||
2787 | } | ||
2759 | return ret; | 2788 | return ret; |
2760 | } | 2789 | } |
2761 | 2790 | ||