diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-19 13:32:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-19 14:50:35 -0400 |
commit | d9d332e0874f46b91d8ac4604b68ee42b8a7a2c6 (patch) | |
tree | 070023e76343c4713c352aba31faae042ad3d4a6 | |
parent | 0cfd81031a26717fe14380d18275f8e217571615 (diff) |
anon_vma_prepare: properly lock even newly allocated entries
The anon_vma code is very subtle, and we end up doing optimistic lookups
of anon_vmas under RCU in page_lock_anon_vma() with no locking. Other
CPU's can also see the newly allocated entry immediately after we've
exposed it by setting "vma->anon_vma" to the new value.
We protect against the anon_vma being destroyed by having the SLAB
marked as SLAB_DESTROY_BY_RCU, so the RCU lookup can depend on the
allocation not being destroyed - but it might still be free'd and
re-allocated here to a new vma.
As a result, we should not do the anon_vma list ops on a newly allocated
vma without proper locking.
Acked-by: Nick Piggin <npiggin@suse.de>
Acked-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/rmap.c | 42 |
1 files changed, 32 insertions, 10 deletions
@@ -55,7 +55,33 @@ | |||
55 | 55 | ||
56 | struct kmem_cache *anon_vma_cachep; | 56 | struct kmem_cache *anon_vma_cachep; |
57 | 57 | ||
58 | /* This must be called under the mmap_sem. */ | 58 | /** |
59 | * anon_vma_prepare - attach an anon_vma to a memory region | ||
60 | * @vma: the memory region in question | ||
61 | * | ||
62 | * This makes sure the memory mapping described by 'vma' has | ||
63 | * an 'anon_vma' attached to it, so that we can associate the | ||
64 | * anonymous pages mapped into it with that anon_vma. | ||
65 | * | ||
66 | * The common case will be that we already have one, but if | ||
67 | * if not we either need to find an adjacent mapping that we | ||
68 | * can re-use the anon_vma from (very common when the only | ||
69 | * reason for splitting a vma has been mprotect()), or we | ||
70 | * allocate a new one. | ||
71 | * | ||
72 | * Anon-vma allocations are very subtle, because we may have | ||
73 | * optimistically looked up an anon_vma in page_lock_anon_vma() | ||
74 | * and that may actually touch the spinlock even in the newly | ||
75 | * allocated vma (it depends on RCU to make sure that the | ||
76 | * anon_vma isn't actually destroyed). | ||
77 | * | ||
78 | * As a result, we need to do proper anon_vma locking even | ||
79 | * for the new allocation. At the same time, we do not want | ||
80 | * to do any locking for the common case of already having | ||
81 | * an anon_vma. | ||
82 | * | ||
83 | * This must be called with the mmap_sem held for reading. | ||
84 | */ | ||
59 | int anon_vma_prepare(struct vm_area_struct *vma) | 85 | int anon_vma_prepare(struct vm_area_struct *vma) |
60 | { | 86 | { |
61 | struct anon_vma *anon_vma = vma->anon_vma; | 87 | struct anon_vma *anon_vma = vma->anon_vma; |
@@ -63,20 +89,17 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
63 | might_sleep(); | 89 | might_sleep(); |
64 | if (unlikely(!anon_vma)) { | 90 | if (unlikely(!anon_vma)) { |
65 | struct mm_struct *mm = vma->vm_mm; | 91 | struct mm_struct *mm = vma->vm_mm; |
66 | struct anon_vma *allocated, *locked; | 92 | struct anon_vma *allocated; |
67 | 93 | ||
68 | anon_vma = find_mergeable_anon_vma(vma); | 94 | anon_vma = find_mergeable_anon_vma(vma); |
69 | if (anon_vma) { | 95 | allocated = NULL; |
70 | allocated = NULL; | 96 | if (!anon_vma) { |
71 | locked = anon_vma; | ||
72 | spin_lock(&locked->lock); | ||
73 | } else { | ||
74 | anon_vma = anon_vma_alloc(); | 97 | anon_vma = anon_vma_alloc(); |
75 | if (unlikely(!anon_vma)) | 98 | if (unlikely(!anon_vma)) |
76 | return -ENOMEM; | 99 | return -ENOMEM; |
77 | allocated = anon_vma; | 100 | allocated = anon_vma; |
78 | locked = NULL; | ||
79 | } | 101 | } |
102 | spin_lock(&anon_vma->lock); | ||
80 | 103 | ||
81 | /* page_table_lock to protect against threads */ | 104 | /* page_table_lock to protect against threads */ |
82 | spin_lock(&mm->page_table_lock); | 105 | spin_lock(&mm->page_table_lock); |
@@ -87,8 +110,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
87 | } | 110 | } |
88 | spin_unlock(&mm->page_table_lock); | 111 | spin_unlock(&mm->page_table_lock); |
89 | 112 | ||
90 | if (locked) | 113 | spin_unlock(&anon_vma->lock); |
91 | spin_unlock(&locked->lock); | ||
92 | if (unlikely(allocated)) | 114 | if (unlikely(allocated)) |
93 | anon_vma_free(allocated); | 115 | anon_vma_free(allocated); |
94 | } | 116 | } |