aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2012-10-08 19:31:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:41 -0400
commitbf181b9f9d8dfbba58b23441ad60d0bc33806d64 (patch)
tree7ad0caaf8998f31c5d910dcbb768f5a1d381b5f4 /include
parent108d6642ad81bb1d62b401490a334d2c12397517 (diff)
mm anon rmap: replace same_anon_vma linked list with an interval tree.
When a large VMA (anon or private file mapping) is first touched, which will populate its anon_vma field, and then split into many regions through the use of mprotect(), the original anon_vma ends up linking all of the vmas on a linked list. This can cause rmap to become inefficient, as we have to walk potentially thousands of irrelevent vmas before finding the one a given anon page might fall into. By replacing the same_anon_vma linked list with an interval tree (where each avc's interval is determined by its vma's start and last pgoffs), we can make rmap efficient for this use case again. While the change is large, all of its pieces are fairly simple. Most places that were walking the same_anon_vma list were looking for a known pgoff, so they can just use the anon_vma_interval_tree_foreach() interval tree iterator instead. The exception here is ksm, where the page's index is not known. It would probably be possible to rework ksm so that the index would be known, but for now I have decided to keep things simple and just walk the entirety of the interval tree there. When updating vma's that already have an anon_vma assigned, we must take care to re-index the corresponding avc's on their interval tree. This is done through the use of anon_vma_interval_tree_pre_update_vma() and anon_vma_interval_tree_post_update_vma(), which remove the avc's from their interval tree before the update and re-insert them after the update. The anon_vma stays locked during the update, so there is no chance that rmap would miss the vmas that are being updated. Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Daniel Santos <daniel.santos@pobox.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/mm.h14
-rw-r--r--include/linux/rmap.h11
2 files changed, 20 insertions, 5 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f1d9aaadb566..0cdab4e0f814 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -20,6 +20,7 @@
20 20
21struct mempolicy; 21struct mempolicy;
22struct anon_vma; 22struct anon_vma;
23struct anon_vma_chain;
23struct file_ra_state; 24struct file_ra_state;
24struct user_struct; 25struct user_struct;
25struct writeback_control; 26struct writeback_control;
@@ -1377,6 +1378,19 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
1377 list_add_tail(&vma->shared.nonlinear, list); 1378 list_add_tail(&vma->shared.nonlinear, list);
1378} 1379}
1379 1380
1381void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
1382 struct rb_root *root);
1383void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
1384 struct rb_root *root);
1385struct anon_vma_chain *anon_vma_interval_tree_iter_first(
1386 struct rb_root *root, unsigned long start, unsigned long last);
1387struct anon_vma_chain *anon_vma_interval_tree_iter_next(
1388 struct anon_vma_chain *node, unsigned long start, unsigned long last);
1389
1390#define anon_vma_interval_tree_foreach(avc, root, start, last) \
1391 for (avc = anon_vma_interval_tree_iter_first(root, start, last); \
1392 avc; avc = anon_vma_interval_tree_iter_next(avc, start, last))
1393
1380/* mmap.c */ 1394/* mmap.c */
1381extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); 1395extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
1382extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, 1396extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 7f32cec57e67..dce44f7d3ed8 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -37,14 +37,14 @@ struct anon_vma {
37 atomic_t refcount; 37 atomic_t refcount;
38 38
39 /* 39 /*
40 * NOTE: the LSB of the head.next is set by 40 * NOTE: the LSB of the rb_root.rb_node is set by
41 * mm_take_all_locks() _after_ taking the above lock. So the 41 * mm_take_all_locks() _after_ taking the above lock. So the
42 * head must only be read/written after taking the above lock 42 * rb_root must only be read/written after taking the above lock
43 * to be sure to see a valid next pointer. The LSB bit itself 43 * to be sure to see a valid next pointer. The LSB bit itself
44 * is serialized by a system wide lock only visible to 44 * is serialized by a system wide lock only visible to
45 * mm_take_all_locks() (mm_all_locks_mutex). 45 * mm_take_all_locks() (mm_all_locks_mutex).
46 */ 46 */
47 struct list_head head; /* Chain of private "related" vmas */ 47 struct rb_root rb_root; /* Interval tree of private "related" vmas */
48}; 48};
49 49
50/* 50/*
@@ -57,14 +57,15 @@ struct anon_vma {
57 * with a VMA, or the VMAs associated with an anon_vma. 57 * with a VMA, or the VMAs associated with an anon_vma.
58 * The "same_vma" list contains the anon_vma_chains linking 58 * The "same_vma" list contains the anon_vma_chains linking
59 * all the anon_vmas associated with this VMA. 59 * all the anon_vmas associated with this VMA.
60 * The "same_anon_vma" list contains the anon_vma_chains 60 * The "rb" field indexes on an interval tree the anon_vma_chains
61 * which link all the VMAs associated with this anon_vma. 61 * which link all the VMAs associated with this anon_vma.
62 */ 62 */
63struct anon_vma_chain { 63struct anon_vma_chain {
64 struct vm_area_struct *vma; 64 struct vm_area_struct *vma;
65 struct anon_vma *anon_vma; 65 struct anon_vma *anon_vma;
66 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ 66 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */
67 struct list_head same_anon_vma; /* locked by anon_vma->mutex */ 67 struct rb_node rb; /* locked by anon_vma->mutex */
68 unsigned long rb_subtree_last;
68}; 69};
69 70
70#ifdef CONFIG_MMU 71#ifdef CONFIG_MMU