aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2012-10-08 19:31:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:41 -0400
commitbf181b9f9d8dfbba58b23441ad60d0bc33806d64 (patch)
tree7ad0caaf8998f31c5d910dcbb768f5a1d381b5f4
parent108d6642ad81bb1d62b401490a334d2c12397517 (diff)
mm anon rmap: replace same_anon_vma linked list with an interval tree.
When a large VMA (anon or private file mapping) is first touched, which will populate its anon_vma field, and then split into many regions through the use of mprotect(), the original anon_vma ends up linking all of the vmas on a linked list. This can cause rmap to become inefficient, as we have to walk potentially thousands of irrelevent vmas before finding the one a given anon page might fall into. By replacing the same_anon_vma linked list with an interval tree (where each avc's interval is determined by its vma's start and last pgoffs), we can make rmap efficient for this use case again. While the change is large, all of its pieces are fairly simple. Most places that were walking the same_anon_vma list were looking for a known pgoff, so they can just use the anon_vma_interval_tree_foreach() interval tree iterator instead. The exception here is ksm, where the page's index is not known. It would probably be possible to rework ksm so that the index would be known, but for now I have decided to keep things simple and just walk the entirety of the interval tree there. When updating vma's that already have an anon_vma assigned, we must take care to re-index the corresponding avc's on their interval tree. This is done through the use of anon_vma_interval_tree_pre_update_vma() and anon_vma_interval_tree_post_update_vma(), which remove the avc's from their interval tree before the update and re-insert them after the update. The anon_vma stays locked during the update, so there is no chance that rmap would miss the vmas that are being updated. Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Daniel Santos <daniel.santos@pobox.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mm.h14
-rw-r--r--include/linux/rmap.h11
-rw-r--r--mm/huge_memory.c5
-rw-r--r--mm/interval_tree.c14
-rw-r--r--mm/ksm.c9
-rw-r--r--mm/memory-failure.c5
-rw-r--r--mm/mmap.c73
-rw-r--r--mm/rmap.c24
8 files changed, 114 insertions, 41 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f1d9aaadb566..0cdab4e0f814 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -20,6 +20,7 @@
20 20
21struct mempolicy; 21struct mempolicy;
22struct anon_vma; 22struct anon_vma;
23struct anon_vma_chain;
23struct file_ra_state; 24struct file_ra_state;
24struct user_struct; 25struct user_struct;
25struct writeback_control; 26struct writeback_control;
@@ -1377,6 +1378,19 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
1377 list_add_tail(&vma->shared.nonlinear, list); 1378 list_add_tail(&vma->shared.nonlinear, list);
1378} 1379}
1379 1380
1381void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
1382 struct rb_root *root);
1383void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
1384 struct rb_root *root);
1385struct anon_vma_chain *anon_vma_interval_tree_iter_first(
1386 struct rb_root *root, unsigned long start, unsigned long last);
1387struct anon_vma_chain *anon_vma_interval_tree_iter_next(
1388 struct anon_vma_chain *node, unsigned long start, unsigned long last);
1389
1390#define anon_vma_interval_tree_foreach(avc, root, start, last) \
1391 for (avc = anon_vma_interval_tree_iter_first(root, start, last); \
1392 avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))
1393
1380/* mmap.c */ 1394/* mmap.c */
1381extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); 1395extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
1382extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, 1396extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 7f32cec57e67..dce44f7d3ed8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -37,14 +37,14 @@ struct anon_vma {
37 atomic_t refcount; 37 atomic_t refcount;
38 38
39 /* 39 /*
40 * NOTE: the LSB of the head.next is set by 40 * NOTE: the LSB of the rb_root.rb_node is set by
41 * mm_take_all_locks() _after_ taking the above lock. So the 41 * mm_take_all_locks() _after_ taking the above lock. So the
42 * head must only be read/written after taking the above lock 42 * rb_root must only be read/written after taking the above lock
43 * to be sure to see a valid next pointer. The LSB bit itself 43 * to be sure to see a valid next pointer. The LSB bit itself
44 * is serialized by a system wide lock only visible to 44 * is serialized by a system wide lock only visible to
45 * mm_take_all_locks() (mm_all_locks_mutex). 45 * mm_take_all_locks() (mm_all_locks_mutex).
46 */ 46 */
47 struct list_head head; /* Chain of private "related" vmas */ 47 struct rb_root rb_root; /* Interval tree of private "related" vmas */
48}; 48};
49 49
50/* 50/*
@@ -57,14 +57,15 @@ struct anon_vma {
57 * with a VMA, or the VMAs associated with an anon_vma. 57 * with a VMA, or the VMAs associated with an anon_vma.
58 * The "same_vma" list contains the anon_vma_chains linking 58 * The "same_vma" list contains the anon_vma_chains linking
59 * all the anon_vmas associated with this VMA. 59 * all the anon_vmas associated with this VMA.
60 * The "same_anon_vma" list contains the anon_vma_chains 60 * The "rb" field indexes on an interval tree the anon_vma_chains
61 * which link all the VMAs associated with this anon_vma. 61 * which link all the VMAs associated with this anon_vma.
62 */ 62 */
63struct anon_vma_chain { 63struct anon_vma_chain {
64 struct vm_area_struct *vma; 64 struct vm_area_struct *vma;
65 struct anon_vma *anon_vma; 65 struct anon_vma *anon_vma;
66 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ 66 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */
67 struct list_head same_anon_vma; /* locked by anon_vma->mutex */ 67 struct rb_node rb; /* locked by anon_vma->mutex */
68 unsigned long rb_subtree_last;
68}; 69};
69 70
70#ifdef CONFIG_MMU 71#ifdef CONFIG_MMU
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 010d32944d14..ce59ada09462 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1375,13 +1375,14 @@ static void __split_huge_page(struct page *page,
1375 struct anon_vma *anon_vma) 1375 struct anon_vma *anon_vma)
1376{ 1376{
1377 int mapcount, mapcount2; 1377 int mapcount, mapcount2;
1378 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1378 struct anon_vma_chain *avc; 1379 struct anon_vma_chain *avc;
1379 1380
1380 BUG_ON(!PageHead(page)); 1381 BUG_ON(!PageHead(page));
1381 BUG_ON(PageTail(page)); 1382 BUG_ON(PageTail(page));
1382 1383
1383 mapcount = 0; 1384 mapcount = 0;
1384 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { 1385 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1385 struct vm_area_struct *vma = avc->vma; 1386 struct vm_area_struct *vma = avc->vma;
1386 unsigned long addr = vma_address(page, vma); 1387 unsigned long addr = vma_address(page, vma);
1387 BUG_ON(is_vma_temporary_stack(vma)); 1388 BUG_ON(is_vma_temporary_stack(vma));
@@ -1407,7 +1408,7 @@ static void __split_huge_page(struct page *page,
1407 __split_huge_page_refcount(page); 1408 __split_huge_page_refcount(page);
1408 1409
1409 mapcount2 = 0; 1410 mapcount2 = 0;
1410 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { 1411 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1411 struct vm_area_struct *vma = avc->vma; 1412 struct vm_area_struct *vma = avc->vma;
1412 unsigned long addr = vma_address(page, vma); 1413 unsigned long addr = vma_address(page, vma);
1413 BUG_ON(is_vma_temporary_stack(vma)); 1414 BUG_ON(is_vma_temporary_stack(vma));
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
index 4ab7b9ec3a56..f7c72cd35e1d 100644
--- a/mm/interval_tree.c
+++ b/mm/interval_tree.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/rmap.h>
11#include <linux/interval_tree_generic.h> 12#include <linux/interval_tree_generic.h>
12 13
13static inline unsigned long vma_start_pgoff(struct vm_area_struct *v) 14static inline unsigned long vma_start_pgoff(struct vm_area_struct *v)
@@ -57,3 +58,16 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node,
57 rb_insert_augmented(&node->shared.linear.rb, root, 58 rb_insert_augmented(&node->shared.linear.rb, root,
58 &vma_interval_tree_augment); 59 &vma_interval_tree_augment);
59} 60}
61
62static inline unsigned long avc_start_pgoff(struct anon_vma_chain *avc)
63{
64 return vma_start_pgoff(avc->vma);
65}
66
67static inline unsigned long avc_last_pgoff(struct anon_vma_chain *avc)
68{
69 return vma_last_pgoff(avc->vma);
70}
71
72INTERVAL_TREE_DEFINE(struct anon_vma_chain, rb, unsigned long, rb_subtree_last,
73 avc_start_pgoff, avc_last_pgoff,, anon_vma_interval_tree)
diff --git a/mm/ksm.c b/mm/ksm.c
index 9638620a7530..14ee5cf8a513 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1618,7 +1618,8 @@ again:
1618 struct vm_area_struct *vma; 1618 struct vm_area_struct *vma;
1619 1619
1620 anon_vma_lock(anon_vma); 1620 anon_vma_lock(anon_vma);
1621 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { 1621 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
1622 0, ULONG_MAX) {
1622 vma = vmac->vma; 1623 vma = vmac->vma;
1623 if (rmap_item->address < vma->vm_start || 1624 if (rmap_item->address < vma->vm_start ||
1624 rmap_item->address >= vma->vm_end) 1625 rmap_item->address >= vma->vm_end)
@@ -1671,7 +1672,8 @@ again:
1671 struct vm_area_struct *vma; 1672 struct vm_area_struct *vma;
1672 1673
1673 anon_vma_lock(anon_vma); 1674 anon_vma_lock(anon_vma);
1674 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { 1675 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
1676 0, ULONG_MAX) {
1675 vma = vmac->vma; 1677 vma = vmac->vma;
1676 if (rmap_item->address < vma->vm_start || 1678 if (rmap_item->address < vma->vm_start ||
1677 rmap_item->address >= vma->vm_end) 1679 rmap_item->address >= vma->vm_end)
@@ -1723,7 +1725,8 @@ again:
1723 struct vm_area_struct *vma; 1725 struct vm_area_struct *vma;
1724 1726
1725 anon_vma_lock(anon_vma); 1727 anon_vma_lock(anon_vma);
1726 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { 1728 anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
1729 0, ULONG_MAX) {
1727 vma = vmac->vma; 1730 vma = vmac->vma;
1728 if (rmap_item->address < vma->vm_start || 1731 if (rmap_item->address < vma->vm_start ||
1729 rmap_item->address >= vma->vm_end) 1732 rmap_item->address >= vma->vm_end)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c38a6257d082..6c5899b9034a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -400,18 +400,21 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
400 struct vm_area_struct *vma; 400 struct vm_area_struct *vma;
401 struct task_struct *tsk; 401 struct task_struct *tsk;
402 struct anon_vma *av; 402 struct anon_vma *av;
403 pgoff_t pgoff;
403 404
404 av = page_lock_anon_vma(page); 405 av = page_lock_anon_vma(page);
405 if (av == NULL) /* Not actually mapped anymore */ 406 if (av == NULL) /* Not actually mapped anymore */
406 return; 407 return;
407 408
409 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
408 read_lock(&tasklist_lock); 410 read_lock(&tasklist_lock);
409 for_each_process (tsk) { 411 for_each_process (tsk) {
410 struct anon_vma_chain *vmac; 412 struct anon_vma_chain *vmac;
411 413
412 if (!task_early_kill(tsk)) 414 if (!task_early_kill(tsk))
413 continue; 415 continue;
414 list_for_each_entry(vmac, &av->head, same_anon_vma) { 416 anon_vma_interval_tree_foreach(vmac, &av->rb_root,
417 pgoff, pgoff) {
415 vma = vmac->vma; 418 vma = vmac->vma;
416 if (!page_mapped_in_vma(page, vma)) 419 if (!page_mapped_in_vma(page, vma))
417 continue; 420 continue;
diff --git a/mm/mmap.c b/mm/mmap.c
index 66984aab7915..2e580ed79211 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -353,6 +353,38 @@ void validate_mm(struct mm_struct *mm)
353#define validate_mm(mm) do { } while (0) 353#define validate_mm(mm) do { } while (0)
354#endif 354#endif
355 355
356/*
357 * vma has some anon_vma assigned, and is already inserted on that
358 * anon_vma's interval trees.
359 *
360 * Before updating the vma's vm_start / vm_end / vm_pgoff fields, the
361 * vma must be removed from the anon_vma's interval trees using
362 * anon_vma_interval_tree_pre_update_vma().
363 *
364 * After the update, the vma will be reinserted using
365 * anon_vma_interval_tree_post_update_vma().
366 *
367 * The entire update must be protected by exclusive mmap_sem and by
368 * the root anon_vma's mutex.
369 */
370static inline void
371anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
372{
373 struct anon_vma_chain *avc;
374
375 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
376 anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
377}
378
379static inline void
380anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
381{
382 struct anon_vma_chain *avc;
383
384 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
385 anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
386}
387
356static int find_vma_links(struct mm_struct *mm, unsigned long addr, 388static int find_vma_links(struct mm_struct *mm, unsigned long addr,
357 unsigned long end, struct vm_area_struct **pprev, 389 unsigned long end, struct vm_area_struct **pprev,
358 struct rb_node ***rb_link, struct rb_node **rb_parent) 390 struct rb_node ***rb_link, struct rb_node **rb_parent)
@@ -565,20 +597,17 @@ again: remove_next = 1 + (end > next->vm_end);
565 597
566 vma_adjust_trans_huge(vma, start, end, adjust_next); 598 vma_adjust_trans_huge(vma, start, end, adjust_next);
567 599
568 /* 600 anon_vma = vma->anon_vma;
569 * When changing only vma->vm_end, we don't really need anon_vma 601 if (!anon_vma && adjust_next)
570 * lock. This is a fairly rare case by itself, but the anon_vma 602 anon_vma = next->anon_vma;
571 * lock may be shared between many sibling processes. Skipping 603 if (anon_vma) {
572 * the lock for brk adjustments makes a difference sometimes.
573 */
574 if (vma->anon_vma && (importer || start != vma->vm_start)) {
575 anon_vma = vma->anon_vma;
576 VM_BUG_ON(adjust_next && next->anon_vma && 604 VM_BUG_ON(adjust_next && next->anon_vma &&
577 anon_vma != next->anon_vma); 605 anon_vma != next->anon_vma);
578 } else if (adjust_next && next->anon_vma)
579 anon_vma = next->anon_vma;
580 if (anon_vma)
581 anon_vma_lock(anon_vma); 606 anon_vma_lock(anon_vma);
607 anon_vma_interval_tree_pre_update_vma(vma);
608 if (adjust_next)
609 anon_vma_interval_tree_pre_update_vma(next);
610 }
582 611
583 if (root) { 612 if (root) {
584 flush_dcache_mmap_lock(mapping); 613 flush_dcache_mmap_lock(mapping);
@@ -619,8 +648,12 @@ again: remove_next = 1 + (end > next->vm_end);
619 __insert_vm_struct(mm, insert); 648 __insert_vm_struct(mm, insert);
620 } 649 }
621 650
622 if (anon_vma) 651 if (anon_vma) {
652 anon_vma_interval_tree_post_update_vma(vma);
653 if (adjust_next)
654 anon_vma_interval_tree_post_update_vma(next);
623 anon_vma_unlock(anon_vma); 655 anon_vma_unlock(anon_vma);
656 }
624 if (mapping) 657 if (mapping)
625 mutex_unlock(&mapping->i_mmap_mutex); 658 mutex_unlock(&mapping->i_mmap_mutex);
626 659
@@ -1748,7 +1781,9 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1748 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) { 1781 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
1749 error = acct_stack_growth(vma, size, grow); 1782 error = acct_stack_growth(vma, size, grow);
1750 if (!error) { 1783 if (!error) {
1784 anon_vma_interval_tree_pre_update_vma(vma);
1751 vma->vm_end = address; 1785 vma->vm_end = address;
1786 anon_vma_interval_tree_post_update_vma(vma);
1752 perf_event_mmap(vma); 1787 perf_event_mmap(vma);
1753 } 1788 }
1754 } 1789 }
@@ -1798,8 +1833,10 @@ int expand_downwards(struct vm_area_struct *vma,
1798 if (grow <= vma->vm_pgoff) { 1833 if (grow <= vma->vm_pgoff) {
1799 error = acct_stack_growth(vma, size, grow); 1834 error = acct_stack_growth(vma, size, grow);
1800 if (!error) { 1835 if (!error) {
1836 anon_vma_interval_tree_pre_update_vma(vma);
1801 vma->vm_start = address; 1837 vma->vm_start = address;
1802 vma->vm_pgoff -= grow; 1838 vma->vm_pgoff -= grow;
1839 anon_vma_interval_tree_post_update_vma(vma);
1803 perf_event_mmap(vma); 1840 perf_event_mmap(vma);
1804 } 1841 }
1805 } 1842 }
@@ -2515,7 +2552,7 @@ static DEFINE_MUTEX(mm_all_locks_mutex);
2515 2552
2516static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) 2553static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2517{ 2554{
2518 if (!test_bit(0, (unsigned long *) &anon_vma->root->head.next)) { 2555 if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
2519 /* 2556 /*
2520 * The LSB of head.next can't change from under us 2557 * The LSB of head.next can't change from under us
2521 * because we hold the mm_all_locks_mutex. 2558 * because we hold the mm_all_locks_mutex.
@@ -2531,7 +2568,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
2531 * anon_vma->root->mutex. 2568 * anon_vma->root->mutex.
2532 */ 2569 */
2533 if (__test_and_set_bit(0, (unsigned long *) 2570 if (__test_and_set_bit(0, (unsigned long *)
2534 &anon_vma->root->head.next)) 2571 &anon_vma->root->rb_root.rb_node))
2535 BUG(); 2572 BUG();
2536 } 2573 }
2537} 2574}
@@ -2572,7 +2609,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2572 * A single task can't take more than one mm_take_all_locks() in a row 2609 * A single task can't take more than one mm_take_all_locks() in a row
2573 * or it would deadlock. 2610 * or it would deadlock.
2574 * 2611 *
2575 * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in 2612 * The LSB in anon_vma->rb_root.rb_node and the AS_MM_ALL_LOCKS bitflag in
2576 * mapping->flags avoid to take the same lock twice, if more than one 2613 * mapping->flags avoid to take the same lock twice, if more than one
2577 * vma in this mm is backed by the same anon_vma or address_space. 2614 * vma in this mm is backed by the same anon_vma or address_space.
2578 * 2615 *
@@ -2619,13 +2656,13 @@ out_unlock:
2619 2656
2620static void vm_unlock_anon_vma(struct anon_vma *anon_vma) 2657static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2621{ 2658{
2622 if (test_bit(0, (unsigned long *) &anon_vma->root->head.next)) { 2659 if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) {
2623 /* 2660 /*
2624 * The LSB of head.next can't change to 0 from under 2661 * The LSB of head.next can't change to 0 from under
2625 * us because we hold the mm_all_locks_mutex. 2662 * us because we hold the mm_all_locks_mutex.
2626 * 2663 *
2627 * We must however clear the bitflag before unlocking 2664 * We must however clear the bitflag before unlocking
2628 * the vma so the users using the anon_vma->head will 2665 * the vma so the users using the anon_vma->rb_root will
2629 * never see our bitflag. 2666 * never see our bitflag.
2630 * 2667 *
2631 * No need of atomic instructions here, head.next 2668 * No need of atomic instructions here, head.next
@@ -2633,7 +2670,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
2633 * anon_vma->root->mutex. 2670 * anon_vma->root->mutex.
2634 */ 2671 */
2635 if (!__test_and_clear_bit(0, (unsigned long *) 2672 if (!__test_and_clear_bit(0, (unsigned long *)
2636 &anon_vma->root->head.next)) 2673 &anon_vma->root->rb_root.rb_node))
2637 BUG(); 2674 BUG();
2638 anon_vma_unlock(anon_vma); 2675 anon_vma_unlock(anon_vma);
2639 } 2676 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 8cbd62fde0f1..9c61bf387fd1 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -127,12 +127,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
127 avc->vma = vma; 127 avc->vma = vma;
128 avc->anon_vma = anon_vma; 128 avc->anon_vma = anon_vma;
129 list_add(&avc->same_vma, &vma->anon_vma_chain); 129 list_add(&avc->same_vma, &vma->anon_vma_chain);
130 130 anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
131 /*
132 * It's critical to add new vmas to the tail of the anon_vma,
133 * see comment in huge_memory.c:__split_huge_page().
134 */
135 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
136} 131}
137 132
138/** 133/**
@@ -336,13 +331,13 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
336 struct anon_vma *anon_vma = avc->anon_vma; 331 struct anon_vma *anon_vma = avc->anon_vma;
337 332
338 root = lock_anon_vma_root(root, anon_vma); 333 root = lock_anon_vma_root(root, anon_vma);
339 list_del(&avc->same_anon_vma); 334 anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
340 335
341 /* 336 /*
342 * Leave empty anon_vmas on the list - we'll need 337 * Leave empty anon_vmas on the list - we'll need
343 * to free them outside the lock. 338 * to free them outside the lock.
344 */ 339 */
345 if (list_empty(&anon_vma->head)) 340 if (RB_EMPTY_ROOT(&anon_vma->rb_root))
346 continue; 341 continue;
347 342
348 list_del(&avc->same_vma); 343 list_del(&avc->same_vma);
@@ -371,7 +366,7 @@ static void anon_vma_ctor(void *data)
371 366
372 mutex_init(&anon_vma->mutex); 367 mutex_init(&anon_vma->mutex);
373 atomic_set(&anon_vma->refcount, 0); 368 atomic_set(&anon_vma->refcount, 0);
374 INIT_LIST_HEAD(&anon_vma->head); 369 anon_vma->rb_root = RB_ROOT;
375} 370}
376 371
377void __init anon_vma_init(void) 372void __init anon_vma_init(void)
@@ -724,6 +719,7 @@ static int page_referenced_anon(struct page *page,
724{ 719{
725 unsigned int mapcount; 720 unsigned int mapcount;
726 struct anon_vma *anon_vma; 721 struct anon_vma *anon_vma;
722 pgoff_t pgoff;
727 struct anon_vma_chain *avc; 723 struct anon_vma_chain *avc;
728 int referenced = 0; 724 int referenced = 0;
729 725
@@ -732,7 +728,8 @@ static int page_referenced_anon(struct page *page,
732 return referenced; 728 return referenced;
733 729
734 mapcount = page_mapcount(page); 730 mapcount = page_mapcount(page);
735 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { 731 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
732 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
736 struct vm_area_struct *vma = avc->vma; 733 struct vm_area_struct *vma = avc->vma;
737 unsigned long address = vma_address(page, vma); 734 unsigned long address = vma_address(page, vma);
738 if (address == -EFAULT) 735 if (address == -EFAULT)
@@ -1445,6 +1442,7 @@ bool is_vma_temporary_stack(struct vm_area_struct *vma)
1445static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) 1442static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1446{ 1443{
1447 struct anon_vma *anon_vma; 1444 struct anon_vma *anon_vma;
1445 pgoff_t pgoff;
1448 struct anon_vma_chain *avc; 1446 struct anon_vma_chain *avc;
1449 int ret = SWAP_AGAIN; 1447 int ret = SWAP_AGAIN;
1450 1448
@@ -1452,7 +1450,8 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1452 if (!anon_vma) 1450 if (!anon_vma)
1453 return ret; 1451 return ret;
1454 1452
1455 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { 1453 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1454 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1456 struct vm_area_struct *vma = avc->vma; 1455 struct vm_area_struct *vma = avc->vma;
1457 unsigned long address; 1456 unsigned long address;
1458 1457
@@ -1668,6 +1667,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1668 struct vm_area_struct *, unsigned long, void *), void *arg) 1667 struct vm_area_struct *, unsigned long, void *), void *arg)
1669{ 1668{
1670 struct anon_vma *anon_vma; 1669 struct anon_vma *anon_vma;
1670 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1671 struct anon_vma_chain *avc; 1671 struct anon_vma_chain *avc;
1672 int ret = SWAP_AGAIN; 1672 int ret = SWAP_AGAIN;
1673 1673
@@ -1681,7 +1681,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1681 if (!anon_vma) 1681 if (!anon_vma)
1682 return ret; 1682 return ret;
1683 anon_vma_lock(anon_vma); 1683 anon_vma_lock(anon_vma);
1684 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { 1684 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1685 struct vm_area_struct *vma = avc->vma; 1685 struct vm_area_struct *vma = avc->vma;
1686 unsigned long address = vma_address(page, vma); 1686 unsigned long address = vma_address(page, vma);
1687 if (address == -EFAULT) 1687 if (address == -EFAULT)