aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
authorKonstantin Khlebnikov <koct9i@gmail.com>2015-01-08 17:32:15 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-01-08 18:10:51 -0500
commit7a3ef208e662f4b63d43a23f61a64a129c525bbc (patch)
tree130f561d487db5224bf6501fdd7182c92eb7ff8e /mm/rmap.c
parent3245d6acab981a2388ffb877c7ecc97e763c59d4 (diff)
mm: prevent endless growth of anon_vma hierarchy
Constantly forking task causes unlimited grow of anon_vma chain. Each next child allocates new level of anon_vmas and links vma to all previous levels because pages might be inherited from any level. This patch adds heuristic which decides to reuse existing anon_vma instead of forking new one. It adds counter anon_vma->degree which counts linked vmas and directly descending anon_vmas and reuses anon_vma if counter is lower than two. As a result each anon_vma has either vma or at least two descending anon_vmas. In such trees half of nodes are leafs with alive vmas, thus count of anon_vmas is no more than two times bigger than count of vmas. This heuristic reuses anon_vmas as few as possible because each reuse adds false aliasing among vmas and rmap walker ought to scan more ptes when it searches where page is might be mapped. Link: http://lkml.kernel.org/r/20120816024610.GA5350@evergreen.ssec.wisc.edu Fixes: 5beb49305251 ("mm: change anon_vma linking to fix multi-process server scalability issue") [akpm@linux-foundation.org: fix typo, per Rik] Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com> Reported-by: Daniel Forrest <dan.forrest@ssec.wisc.edu> Tested-by: Michal Hocko <mhocko@suse.cz> Tested-by: Jerome Marchand <jmarchan@redhat.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: <stable@vger.kernel.org> [2.6.34+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c42
1 files changed, 41 insertions, 1 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index c5bc241127b2..71cd5bd0c17d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -72,6 +72,8 @@ static inline struct anon_vma *anon_vma_alloc(void)
72 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); 72 anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
73 if (anon_vma) { 73 if (anon_vma) {
74 atomic_set(&anon_vma->refcount, 1); 74 atomic_set(&anon_vma->refcount, 1);
75 anon_vma->degree = 1; /* Reference for first vma */
76 anon_vma->parent = anon_vma;
75 /* 77 /*
76 * Initialise the anon_vma root to point to itself. If called 78 * Initialise the anon_vma root to point to itself. If called
77 * from fork, the root will be reset to the parents anon_vma. 79 * from fork, the root will be reset to the parents anon_vma.
@@ -188,6 +190,8 @@ int anon_vma_prepare(struct vm_area_struct *vma)
188 if (likely(!vma->anon_vma)) { 190 if (likely(!vma->anon_vma)) {
189 vma->anon_vma = anon_vma; 191 vma->anon_vma = anon_vma;
190 anon_vma_chain_link(vma, avc, anon_vma); 192 anon_vma_chain_link(vma, avc, anon_vma);
193 /* vma reference or self-parent link for new root */
194 anon_vma->degree++;
191 allocated = NULL; 195 allocated = NULL;
192 avc = NULL; 196 avc = NULL;
193 } 197 }
@@ -236,6 +240,14 @@ static inline void unlock_anon_vma_root(struct anon_vma *root)
236/* 240/*
237 * Attach the anon_vmas from src to dst. 241 * Attach the anon_vmas from src to dst.
238 * Returns 0 on success, -ENOMEM on failure. 242 * Returns 0 on success, -ENOMEM on failure.
243 *
244 * If dst->anon_vma is NULL this function tries to find and reuse existing
245 * anon_vma which has no vmas and only one child anon_vma. This prevents
246 * degradation of anon_vma hierarchy to endless linear chain in case of
247 * constantly forking task. On the other hand, an anon_vma with more than one
248 * child isn't reused even if there was no alive vma, thus rmap walker has a
249 * good chance of avoiding scanning the whole hierarchy when it searches where
250 * page is mapped.
239 */ 251 */
240int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) 252int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
241{ 253{
@@ -256,7 +268,21 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
256 anon_vma = pavc->anon_vma; 268 anon_vma = pavc->anon_vma;
257 root = lock_anon_vma_root(root, anon_vma); 269 root = lock_anon_vma_root(root, anon_vma);
258 anon_vma_chain_link(dst, avc, anon_vma); 270 anon_vma_chain_link(dst, avc, anon_vma);
271
272 /*
273 * Reuse existing anon_vma if its degree lower than two,
274 * that means it has no vma and only one anon_vma child.
275 *
276 * Do not chose parent anon_vma, otherwise first child
277 * will always reuse it. Root anon_vma is never reused:
278 * it has self-parent reference and at least one child.
279 */
280 if (!dst->anon_vma && anon_vma != src->anon_vma &&
281 anon_vma->degree < 2)
282 dst->anon_vma = anon_vma;
259 } 283 }
284 if (dst->anon_vma)
285 dst->anon_vma->degree++;
260 unlock_anon_vma_root(root); 286 unlock_anon_vma_root(root);
261 return 0; 287 return 0;
262 288
@@ -280,6 +306,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
280 if (!pvma->anon_vma) 306 if (!pvma->anon_vma)
281 return 0; 307 return 0;
282 308
309 /* Drop inherited anon_vma, we'll reuse existing or allocate new. */
310 vma->anon_vma = NULL;
311
283 /* 312 /*
284 * First, attach the new VMA to the parent VMA's anon_vmas, 313 * First, attach the new VMA to the parent VMA's anon_vmas,
285 * so rmap can find non-COWed pages in child processes. 314 * so rmap can find non-COWed pages in child processes.
@@ -288,6 +317,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
288 if (error) 317 if (error)
289 return error; 318 return error;
290 319
320 /* An existing anon_vma has been reused, all done then. */
321 if (vma->anon_vma)
322 return 0;
323
291 /* Then add our own anon_vma. */ 324 /* Then add our own anon_vma. */
292 anon_vma = anon_vma_alloc(); 325 anon_vma = anon_vma_alloc();
293 if (!anon_vma) 326 if (!anon_vma)
@@ -301,6 +334,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
301 * lock any of the anon_vmas in this anon_vma tree. 334 * lock any of the anon_vmas in this anon_vma tree.
302 */ 335 */
303 anon_vma->root = pvma->anon_vma->root; 336 anon_vma->root = pvma->anon_vma->root;
337 anon_vma->parent = pvma->anon_vma;
304 /* 338 /*
305 * With refcounts, an anon_vma can stay around longer than the 339 * With refcounts, an anon_vma can stay around longer than the
306 * process it belongs to. The root anon_vma needs to be pinned until 340 * process it belongs to. The root anon_vma needs to be pinned until
@@ -311,6 +345,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
311 vma->anon_vma = anon_vma; 345 vma->anon_vma = anon_vma;
312 anon_vma_lock_write(anon_vma); 346 anon_vma_lock_write(anon_vma);
313 anon_vma_chain_link(vma, avc, anon_vma); 347 anon_vma_chain_link(vma, avc, anon_vma);
348 anon_vma->parent->degree++;
314 anon_vma_unlock_write(anon_vma); 349 anon_vma_unlock_write(anon_vma);
315 350
316 return 0; 351 return 0;
@@ -341,12 +376,16 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
341 * Leave empty anon_vmas on the list - we'll need 376 * Leave empty anon_vmas on the list - we'll need
342 * to free them outside the lock. 377 * to free them outside the lock.
343 */ 378 */
344 if (RB_EMPTY_ROOT(&anon_vma->rb_root)) 379 if (RB_EMPTY_ROOT(&anon_vma->rb_root)) {
380 anon_vma->parent->degree--;
345 continue; 381 continue;
382 }
346 383
347 list_del(&avc->same_vma); 384 list_del(&avc->same_vma);
348 anon_vma_chain_free(avc); 385 anon_vma_chain_free(avc);
349 } 386 }
387 if (vma->anon_vma)
388 vma->anon_vma->degree--;
350 unlock_anon_vma_root(root); 389 unlock_anon_vma_root(root);
351 390
352 /* 391 /*
@@ -357,6 +396,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
357 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { 396 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
358 struct anon_vma *anon_vma = avc->anon_vma; 397 struct anon_vma *anon_vma = avc->anon_vma;
359 398
399 BUG_ON(anon_vma->degree);
360 put_anon_vma(anon_vma); 400 put_anon_vma(anon_vma);
361 401
362 list_del(&avc->same_vma); 402 list_del(&avc->same_vma);