diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-19 13:32:20 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-19 14:50:35 -0400 |
| commit | d9d332e0874f46b91d8ac4604b68ee42b8a7a2c6 (patch) | |
| tree | 070023e76343c4713c352aba31faae042ad3d4a6 /mm | |
| parent | 0cfd81031a26717fe14380d18275f8e217571615 (diff) | |
anon_vma_prepare: properly lock even newly allocated entries
The anon_vma code is very subtle, and we end up doing optimistic lookups
of anon_vmas under RCU in page_lock_anon_vma() with no locking. Other
CPU's can also see the newly allocated entry immediately after we've
exposed it by setting "vma->anon_vma" to the new value.
We protect against the anon_vma being destroyed by having the SLAB
marked as SLAB_DESTROY_BY_RCU, so the RCU lookup can depend on the
allocation not being destroyed - but it might still be free'd and
re-allocated here to a new vma.
As a result, we should not do the anon_vma list ops on a newly allocated
vma without proper locking.
Acked-by: Nick Piggin <npiggin@suse.de>
Acked-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/rmap.c | 42 |
1 files changed, 32 insertions, 10 deletions
| @@ -55,7 +55,33 @@ | |||
| 55 | 55 | ||
| 56 | struct kmem_cache *anon_vma_cachep; | 56 | struct kmem_cache *anon_vma_cachep; |
| 57 | 57 | ||
| 58 | /* This must be called under the mmap_sem. */ | 58 | /** |
| 59 | * anon_vma_prepare - attach an anon_vma to a memory region | ||
| 60 | * @vma: the memory region in question | ||
| 61 | * | ||
| 62 | * This makes sure the memory mapping described by 'vma' has | ||
| 63 | * an 'anon_vma' attached to it, so that we can associate the | ||
| 64 | * anonymous pages mapped into it with that anon_vma. | ||
| 65 | * | ||
| 66 | * The common case will be that we already have one, but if | ||
| 67 | * if not we either need to find an adjacent mapping that we | ||
| 68 | * can re-use the anon_vma from (very common when the only | ||
| 69 | * reason for splitting a vma has been mprotect()), or we | ||
| 70 | * allocate a new one. | ||
| 71 | * | ||
| 72 | * Anon-vma allocations are very subtle, because we may have | ||
| 73 | * optimistically looked up an anon_vma in page_lock_anon_vma() | ||
| 74 | * and that may actually touch the spinlock even in the newly | ||
| 75 | * allocated vma (it depends on RCU to make sure that the | ||
| 76 | * anon_vma isn't actually destroyed). | ||
| 77 | * | ||
| 78 | * As a result, we need to do proper anon_vma locking even | ||
| 79 | * for the new allocation. At the same time, we do not want | ||
| 80 | * to do any locking for the common case of already having | ||
| 81 | * an anon_vma. | ||
| 82 | * | ||
| 83 | * This must be called with the mmap_sem held for reading. | ||
| 84 | */ | ||
| 59 | int anon_vma_prepare(struct vm_area_struct *vma) | 85 | int anon_vma_prepare(struct vm_area_struct *vma) |
| 60 | { | 86 | { |
| 61 | struct anon_vma *anon_vma = vma->anon_vma; | 87 | struct anon_vma *anon_vma = vma->anon_vma; |
| @@ -63,20 +89,17 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
| 63 | might_sleep(); | 89 | might_sleep(); |
| 64 | if (unlikely(!anon_vma)) { | 90 | if (unlikely(!anon_vma)) { |
| 65 | struct mm_struct *mm = vma->vm_mm; | 91 | struct mm_struct *mm = vma->vm_mm; |
| 66 | struct anon_vma *allocated, *locked; | 92 | struct anon_vma *allocated; |
| 67 | 93 | ||
| 68 | anon_vma = find_mergeable_anon_vma(vma); | 94 | anon_vma = find_mergeable_anon_vma(vma); |
| 69 | if (anon_vma) { | 95 | allocated = NULL; |
| 70 | allocated = NULL; | 96 | if (!anon_vma) { |
| 71 | locked = anon_vma; | ||
| 72 | spin_lock(&locked->lock); | ||
| 73 | } else { | ||
| 74 | anon_vma = anon_vma_alloc(); | 97 | anon_vma = anon_vma_alloc(); |
| 75 | if (unlikely(!anon_vma)) | 98 | if (unlikely(!anon_vma)) |
| 76 | return -ENOMEM; | 99 | return -ENOMEM; |
| 77 | allocated = anon_vma; | 100 | allocated = anon_vma; |
| 78 | locked = NULL; | ||
| 79 | } | 101 | } |
| 102 | spin_lock(&anon_vma->lock); | ||
| 80 | 103 | ||
| 81 | /* page_table_lock to protect against threads */ | 104 | /* page_table_lock to protect against threads */ |
| 82 | spin_lock(&mm->page_table_lock); | 105 | spin_lock(&mm->page_table_lock); |
| @@ -87,8 +110,7 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
| 87 | } | 110 | } |
| 88 | spin_unlock(&mm->page_table_lock); | 111 | spin_unlock(&mm->page_table_lock); |
| 89 | 112 | ||
| 90 | if (locked) | 113 | spin_unlock(&anon_vma->lock); |
| 91 | spin_unlock(&locked->lock); | ||
| 92 | if (unlikely(allocated)) | 114 | if (unlikely(allocated)) |
| 93 | anon_vma_free(allocated); | 115 | anon_vma_free(allocated); |
| 94 | } | 116 | } |
