aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2012-10-08 19:31:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:41 -0400
commitbf181b9f9d8dfbba58b23441ad60d0bc33806d64 (patch)
tree7ad0caaf8998f31c5d910dcbb768f5a1d381b5f4 /mm/rmap.c
parent108d6642ad81bb1d62b401490a334d2c12397517 (diff)
mm anon rmap: replace same_anon_vma linked list with an interval tree.
When a large VMA (anon or private file mapping) is first touched, which will populate its anon_vma field, and then split into many regions through the use of mprotect(), the original anon_vma ends up linking all of the vmas on a linked list. This can cause rmap to become inefficient, as we have to walk potentially thousands of irrelevent vmas before finding the one a given anon page might fall into. By replacing the same_anon_vma linked list with an interval tree (where each avc's interval is determined by its vma's start and last pgoffs), we can make rmap efficient for this use case again. While the change is large, all of its pieces are fairly simple. Most places that were walking the same_anon_vma list were looking for a known pgoff, so they can just use the anon_vma_interval_tree_foreach() interval tree iterator instead. The exception here is ksm, where the page's index is not known. It would probably be possible to rework ksm so that the index would be known, but for now I have decided to keep things simple and just walk the entirety of the interval tree there. When updating vma's that already have an anon_vma assigned, we must take care to re-index the corresponding avc's on their interval tree. This is done through the use of anon_vma_interval_tree_pre_update_vma() and anon_vma_interval_tree_post_update_vma(), which remove the avc's from their interval tree before the update and re-insert them after the update. The anon_vma stays locked during the update, so there is no chance that rmap would miss the vmas that are being updated. Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Daniel Santos <daniel.santos@pobox.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c24
1 files changed, 12 insertions, 12 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 8cbd62fde0f1..9c61bf387fd1 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -127,12 +127,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
127 avc->vma = vma; 127 avc->vma = vma;
128 avc->anon_vma = anon_vma; 128 avc->anon_vma = anon_vma;
129 list_add(&avc->same_vma, &vma->anon_vma_chain); 129 list_add(&avc->same_vma, &vma->anon_vma_chain);
130 130 anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
131 /*
132 * It's critical to add new vmas to the tail of the anon_vma,
133 * see comment in huge_memory.c:__split_huge_page().
134 */
135 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
136} 131}
137 132
138/** 133/**
@@ -336,13 +331,13 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
336 struct anon_vma *anon_vma = avc->anon_vma; 331 struct anon_vma *anon_vma = avc->anon_vma;
337 332
338 root = lock_anon_vma_root(root, anon_vma); 333 root = lock_anon_vma_root(root, anon_vma);
339 list_del(&avc->same_anon_vma); 334 anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
340 335
341 /* 336 /*
342 * Leave empty anon_vmas on the list - we'll need 337 * Leave empty anon_vmas on the list - we'll need
343 * to free them outside the lock. 338 * to free them outside the lock.
344 */ 339 */
345 if (list_empty(&anon_vma->head)) 340 if (RB_EMPTY_ROOT(&anon_vma->rb_root))
346 continue; 341 continue;
347 342
348 list_del(&avc->same_vma); 343 list_del(&avc->same_vma);
@@ -371,7 +366,7 @@ static void anon_vma_ctor(void *data)
371 366
372 mutex_init(&anon_vma->mutex); 367 mutex_init(&anon_vma->mutex);
373 atomic_set(&anon_vma->refcount, 0); 368 atomic_set(&anon_vma->refcount, 0);
374 INIT_LIST_HEAD(&anon_vma->head); 369 anon_vma->rb_root = RB_ROOT;
375} 370}
376 371
377void __init anon_vma_init(void) 372void __init anon_vma_init(void)
@@ -724,6 +719,7 @@ static int page_referenced_anon(struct page *page,
724{ 719{
725 unsigned int mapcount; 720 unsigned int mapcount;
726 struct anon_vma *anon_vma; 721 struct anon_vma *anon_vma;
722 pgoff_t pgoff;
727 struct anon_vma_chain *avc; 723 struct anon_vma_chain *avc;
728 int referenced = 0; 724 int referenced = 0;
729 725
@@ -732,7 +728,8 @@ static int page_referenced_anon(struct page *page,
732 return referenced; 728 return referenced;
733 729
734 mapcount = page_mapcount(page); 730 mapcount = page_mapcount(page);
735 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { 731 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
732 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
736 struct vm_area_struct *vma = avc->vma; 733 struct vm_area_struct *vma = avc->vma;
737 unsigned long address = vma_address(page, vma); 734 unsigned long address = vma_address(page, vma);
738 if (address == -EFAULT) 735 if (address == -EFAULT)
@@ -1445,6 +1442,7 @@ bool is_vma_temporary_stack(struct vm_area_struct *vma)
1445static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) 1442static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1446{ 1443{
1447 struct anon_vma *anon_vma; 1444 struct anon_vma *anon_vma;
1445 pgoff_t pgoff;
1448 struct anon_vma_chain *avc; 1446 struct anon_vma_chain *avc;
1449 int ret = SWAP_AGAIN; 1447 int ret = SWAP_AGAIN;
1450 1448
@@ -1452,7 +1450,8 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1452 if (!anon_vma) 1450 if (!anon_vma)
1453 return ret; 1451 return ret;
1454 1452
1455 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { 1453 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1454 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1456 struct vm_area_struct *vma = avc->vma; 1455 struct vm_area_struct *vma = avc->vma;
1457 unsigned long address; 1456 unsigned long address;
1458 1457
@@ -1668,6 +1667,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1668 struct vm_area_struct *, unsigned long, void *), void *arg) 1667 struct vm_area_struct *, unsigned long, void *), void *arg)
1669{ 1668{
1670 struct anon_vma *anon_vma; 1669 struct anon_vma *anon_vma;
1670 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1671 struct anon_vma_chain *avc; 1671 struct anon_vma_chain *avc;
1672 int ret = SWAP_AGAIN; 1672 int ret = SWAP_AGAIN;
1673 1673
@@ -1681,7 +1681,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1681 if (!anon_vma) 1681 if (!anon_vma)
1682 return ret; 1682 return ret;
1683 anon_vma_lock(anon_vma); 1683 anon_vma_lock(anon_vma);
1684 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { 1684 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1685 struct vm_area_struct *vma = avc->vma; 1685 struct vm_area_struct *vma = avc->vma;
1686 unsigned long address = vma_address(page, vma); 1686 unsigned long address = vma_address(page, vma);
1687 if (address == -EFAULT) 1687 if (address == -EFAULT)