aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c156
1 files changed, 130 insertions, 26 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 5cb47111f79e..be34094e4595 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -62,6 +62,7 @@
62#include "internal.h" 62#include "internal.h"
63 63
64static struct kmem_cache *anon_vma_cachep; 64static struct kmem_cache *anon_vma_cachep;
65static struct kmem_cache *anon_vma_chain_cachep;
65 66
66static inline struct anon_vma *anon_vma_alloc(void) 67static inline struct anon_vma *anon_vma_alloc(void)
67{ 68{
@@ -73,6 +74,16 @@ void anon_vma_free(struct anon_vma *anon_vma)
73 kmem_cache_free(anon_vma_cachep, anon_vma); 74 kmem_cache_free(anon_vma_cachep, anon_vma);
74} 75}
75 76
77static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
78{
79 return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
80}
81
82void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
83{
84 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
85}
86
76/** 87/**
77 * anon_vma_prepare - attach an anon_vma to a memory region 88 * anon_vma_prepare - attach an anon_vma to a memory region
78 * @vma: the memory region in question 89 * @vma: the memory region in question
@@ -103,18 +114,23 @@ void anon_vma_free(struct anon_vma *anon_vma)
103int anon_vma_prepare(struct vm_area_struct *vma) 114int anon_vma_prepare(struct vm_area_struct *vma)
104{ 115{
105 struct anon_vma *anon_vma = vma->anon_vma; 116 struct anon_vma *anon_vma = vma->anon_vma;
117 struct anon_vma_chain *avc;
106 118
107 might_sleep(); 119 might_sleep();
108 if (unlikely(!anon_vma)) { 120 if (unlikely(!anon_vma)) {
109 struct mm_struct *mm = vma->vm_mm; 121 struct mm_struct *mm = vma->vm_mm;
110 struct anon_vma *allocated; 122 struct anon_vma *allocated;
111 123
124 avc = anon_vma_chain_alloc();
125 if (!avc)
126 goto out_enomem;
127
112 anon_vma = find_mergeable_anon_vma(vma); 128 anon_vma = find_mergeable_anon_vma(vma);
113 allocated = NULL; 129 allocated = NULL;
114 if (!anon_vma) { 130 if (!anon_vma) {
115 anon_vma = anon_vma_alloc(); 131 anon_vma = anon_vma_alloc();
116 if (unlikely(!anon_vma)) 132 if (unlikely(!anon_vma))
117 return -ENOMEM; 133 goto out_enomem_free_avc;
118 allocated = anon_vma; 134 allocated = anon_vma;
119 } 135 }
120 spin_lock(&anon_vma->lock); 136 spin_lock(&anon_vma->lock);
@@ -123,53 +139,113 @@ int anon_vma_prepare(struct vm_area_struct *vma)
123 spin_lock(&mm->page_table_lock); 139 spin_lock(&mm->page_table_lock);
124 if (likely(!vma->anon_vma)) { 140 if (likely(!vma->anon_vma)) {
125 vma->anon_vma = anon_vma; 141 vma->anon_vma = anon_vma;
126 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 142 avc->anon_vma = anon_vma;
143 avc->vma = vma;
144 list_add(&avc->same_vma, &vma->anon_vma_chain);
145 list_add(&avc->same_anon_vma, &anon_vma->head);
127 allocated = NULL; 146 allocated = NULL;
128 } 147 }
129 spin_unlock(&mm->page_table_lock); 148 spin_unlock(&mm->page_table_lock);
130 149
131 spin_unlock(&anon_vma->lock); 150 spin_unlock(&anon_vma->lock);
132 if (unlikely(allocated)) 151 if (unlikely(allocated)) {
133 anon_vma_free(allocated); 152 anon_vma_free(allocated);
153 anon_vma_chain_free(avc);
154 }
134 } 155 }
135 return 0; 156 return 0;
157
158 out_enomem_free_avc:
159 anon_vma_chain_free(avc);
160 out_enomem:
161 return -ENOMEM;
136} 162}
137 163
138void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) 164static void anon_vma_chain_link(struct vm_area_struct *vma,
165 struct anon_vma_chain *avc,
166 struct anon_vma *anon_vma)
139{ 167{
140 BUG_ON(vma->anon_vma != next->anon_vma); 168 avc->vma = vma;
141 list_del(&next->anon_vma_node); 169 avc->anon_vma = anon_vma;
170 list_add(&avc->same_vma, &vma->anon_vma_chain);
171
172 spin_lock(&anon_vma->lock);
173 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
174 spin_unlock(&anon_vma->lock);
142} 175}
143 176
144void __anon_vma_link(struct vm_area_struct *vma) 177/*
178 * Attach the anon_vmas from src to dst.
179 * Returns 0 on success, -ENOMEM on failure.
180 */
181int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
145{ 182{
146 struct anon_vma *anon_vma = vma->anon_vma; 183 struct anon_vma_chain *avc, *pavc;
184
185 list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
186 avc = anon_vma_chain_alloc();
187 if (!avc)
188 goto enomem_failure;
189 anon_vma_chain_link(dst, avc, pavc->anon_vma);
190 }
191 return 0;
147 192
148 if (anon_vma) 193 enomem_failure:
149 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 194 unlink_anon_vmas(dst);
195 return -ENOMEM;
150} 196}
151 197
152void anon_vma_link(struct vm_area_struct *vma) 198/*
199 * Attach vma to its own anon_vma, as well as to the anon_vmas that
200 * the corresponding VMA in the parent process is attached to.
201 * Returns 0 on success, non-zero on failure.
202 */
203int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
153{ 204{
154 struct anon_vma *anon_vma = vma->anon_vma; 205 struct anon_vma_chain *avc;
206 struct anon_vma *anon_vma;
155 207
156 if (anon_vma) { 208 /* Don't bother if the parent process has no anon_vma here. */
157 spin_lock(&anon_vma->lock); 209 if (!pvma->anon_vma)
158 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 210 return 0;
159 spin_unlock(&anon_vma->lock); 211
160 } 212 /*
213 * First, attach the new VMA to the parent VMA's anon_vmas,
214 * so rmap can find non-COWed pages in child processes.
215 */
216 if (anon_vma_clone(vma, pvma))
217 return -ENOMEM;
218
219 /* Then add our own anon_vma. */
220 anon_vma = anon_vma_alloc();
221 if (!anon_vma)
222 goto out_error;
223 avc = anon_vma_chain_alloc();
224 if (!avc)
225 goto out_error_free_anon_vma;
226 anon_vma_chain_link(vma, avc, anon_vma);
227 /* Mark this anon_vma as the one where our new (COWed) pages go. */
228 vma->anon_vma = anon_vma;
229
230 return 0;
231
232 out_error_free_anon_vma:
233 anon_vma_free(anon_vma);
234 out_error:
235 return -ENOMEM;
161} 236}
162 237
163void anon_vma_unlink(struct vm_area_struct *vma) 238static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
164{ 239{
165 struct anon_vma *anon_vma = vma->anon_vma; 240 struct anon_vma *anon_vma = anon_vma_chain->anon_vma;
166 int empty; 241 int empty;
167 242
243 /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
168 if (!anon_vma) 244 if (!anon_vma)
169 return; 245 return;
170 246
171 spin_lock(&anon_vma->lock); 247 spin_lock(&anon_vma->lock);
172 list_del(&vma->anon_vma_node); 248 list_del(&anon_vma_chain->same_anon_vma);
173 249
174 /* We must garbage collect the anon_vma if it's empty */ 250 /* We must garbage collect the anon_vma if it's empty */
175 empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); 251 empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
@@ -179,6 +255,18 @@ void anon_vma_unlink(struct vm_area_struct *vma)
179 anon_vma_free(anon_vma); 255 anon_vma_free(anon_vma);
180} 256}
181 257
258void unlink_anon_vmas(struct vm_area_struct *vma)
259{
260 struct anon_vma_chain *avc, *next;
261
262 /* Unlink each anon_vma chained to the VMA. */
263 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
264 anon_vma_unlink(avc);
265 list_del(&avc->same_vma);
266 anon_vma_chain_free(avc);
267 }
268}
269
182static void anon_vma_ctor(void *data) 270static void anon_vma_ctor(void *data)
183{ 271{
184 struct anon_vma *anon_vma = data; 272 struct anon_vma *anon_vma = data;
@@ -192,6 +280,7 @@ void __init anon_vma_init(void)
192{ 280{
193 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), 281 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
194 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); 282 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
283 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
195} 284}
196 285
197/* 286/*
@@ -240,6 +329,18 @@ vma_address(struct page *page, struct vm_area_struct *vma)
240 /* page should be within @vma mapping range */ 329 /* page should be within @vma mapping range */
241 return -EFAULT; 330 return -EFAULT;
242 } 331 }
332 if (unlikely(vma->vm_flags & VM_LOCK_RMAP)) {
333 /*
334 * This VMA is being unlinked or is not yet linked into the
335 * VMA tree. Do not try to follow this rmap. This race
336 * condition can result in page_referenced() ignoring a
337 * reference or in try_to_unmap() failing to unmap a page.
338 * The VMA cannot be freed under us because we hold the
339 * anon_vma->lock, which the munmap code takes while
340 * unlinking the anon_vmas from the VMA.
341 */
342 return -EFAULT;
343 }
243 return address; 344 return address;
244} 345}
245 346
@@ -396,7 +497,7 @@ static int page_referenced_anon(struct page *page,
396{ 497{
397 unsigned int mapcount; 498 unsigned int mapcount;
398 struct anon_vma *anon_vma; 499 struct anon_vma *anon_vma;
399 struct vm_area_struct *vma; 500 struct anon_vma_chain *avc;
400 int referenced = 0; 501 int referenced = 0;
401 502
402 anon_vma = page_lock_anon_vma(page); 503 anon_vma = page_lock_anon_vma(page);
@@ -404,7 +505,8 @@ static int page_referenced_anon(struct page *page,
404 return referenced; 505 return referenced;
405 506
406 mapcount = page_mapcount(page); 507 mapcount = page_mapcount(page);
407 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 508 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
509 struct vm_area_struct *vma = avc->vma;
408 unsigned long address = vma_address(page, vma); 510 unsigned long address = vma_address(page, vma);
409 if (address == -EFAULT) 511 if (address == -EFAULT)
410 continue; 512 continue;
@@ -1025,14 +1127,15 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1025static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) 1127static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1026{ 1128{
1027 struct anon_vma *anon_vma; 1129 struct anon_vma *anon_vma;
1028 struct vm_area_struct *vma; 1130 struct anon_vma_chain *avc;
1029 int ret = SWAP_AGAIN; 1131 int ret = SWAP_AGAIN;
1030 1132
1031 anon_vma = page_lock_anon_vma(page); 1133 anon_vma = page_lock_anon_vma(page);
1032 if (!anon_vma) 1134 if (!anon_vma)
1033 return ret; 1135 return ret;
1034 1136
1035 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1137 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1138 struct vm_area_struct *vma = avc->vma;
1036 unsigned long address = vma_address(page, vma); 1139 unsigned long address = vma_address(page, vma);
1037 if (address == -EFAULT) 1140 if (address == -EFAULT)
1038 continue; 1141 continue;
@@ -1223,7 +1326,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1223 struct vm_area_struct *, unsigned long, void *), void *arg) 1326 struct vm_area_struct *, unsigned long, void *), void *arg)
1224{ 1327{
1225 struct anon_vma *anon_vma; 1328 struct anon_vma *anon_vma;
1226 struct vm_area_struct *vma; 1329 struct anon_vma_chain *avc;
1227 int ret = SWAP_AGAIN; 1330 int ret = SWAP_AGAIN;
1228 1331
1229 /* 1332 /*
@@ -1238,7 +1341,8 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1238 if (!anon_vma) 1341 if (!anon_vma)
1239 return ret; 1342 return ret;
1240 spin_lock(&anon_vma->lock); 1343 spin_lock(&anon_vma->lock);
1241 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1344 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1345 struct vm_area_struct *vma = avc->vma;
1242 unsigned long address = vma_address(page, vma); 1346 unsigned long address = vma_address(page, vma);
1243 if (address == -EFAULT) 1347 if (address == -EFAULT)
1244 continue; 1348 continue;