aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/mm/fault-armv.c3
-rw-r--r--arch/arm/mm/flush.c3
-rw-r--r--arch/parisc/kernel/cache.c3
-rw-r--r--arch/x86/mm/hugetlbpage.c3
-rw-r--r--fs/hugetlbfs/inode.c9
-rw-r--r--fs/inode.c2
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/interval_tree_tmpl.h215
-rw-r--r--include/linux/mm.h30
-rw-r--r--include/linux/mm_types.h14
-rw-r--r--kernel/events/uprobes.c3
-rw-r--r--kernel/fork.c2
-rw-r--r--lib/interval_tree.c166
-rw-r--r--lib/prio_tree.c19
-rw-r--r--mm/Makefile4
-rw-r--r--mm/filemap_xip.c3
-rw-r--r--mm/fremap.c2
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/interval_tree.c61
-rw-r--r--mm/memory-failure.c3
-rw-r--r--mm/memory.c9
-rw-r--r--mm/mmap.c22
-rw-r--r--mm/nommu.c12
-rw-r--r--mm/prio_tree.c208
-rw-r--r--mm/rmap.c18
25 files changed, 357 insertions, 466 deletions
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 7599e2625c7d..2a5907b5c8d2 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
134{ 134{
135 struct mm_struct *mm = vma->vm_mm; 135 struct mm_struct *mm = vma->vm_mm;
136 struct vm_area_struct *mpnt; 136 struct vm_area_struct *mpnt;
137 struct prio_tree_iter iter;
138 unsigned long offset; 137 unsigned long offset;
139 pgoff_t pgoff; 138 pgoff_t pgoff;
140 int aliases = 0; 139 int aliases = 0;
@@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
147 * cache coherency. 146 * cache coherency.
148 */ 147 */
149 flush_dcache_mmap_lock(mapping); 148 flush_dcache_mmap_lock(mapping);
150 vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { 149 vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
151 /* 150 /*
152 * If this VMA is not in our MM, we can ignore it. 151 * If this VMA is not in our MM, we can ignore it.
153 * Note that we intentionally mask out the VMA 152 * Note that we intentionally mask out the VMA
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 40ca11ed6e5f..1c8f7f564175 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
196{ 196{
197 struct mm_struct *mm = current->active_mm; 197 struct mm_struct *mm = current->active_mm;
198 struct vm_area_struct *mpnt; 198 struct vm_area_struct *mpnt;
199 struct prio_tree_iter iter;
200 pgoff_t pgoff; 199 pgoff_t pgoff;
201 200
202 /* 201 /*
@@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
208 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 207 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
209 208
210 flush_dcache_mmap_lock(mapping); 209 flush_dcache_mmap_lock(mapping);
211 vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { 210 vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
212 unsigned long offset; 211 unsigned long offset;
213 212
214 /* 213 /*
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 9d181890a7e3..48e16dc20102 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -276,7 +276,6 @@ void flush_dcache_page(struct page *page)
276{ 276{
277 struct address_space *mapping = page_mapping(page); 277 struct address_space *mapping = page_mapping(page);
278 struct vm_area_struct *mpnt; 278 struct vm_area_struct *mpnt;
279 struct prio_tree_iter iter;
280 unsigned long offset; 279 unsigned long offset;
281 unsigned long addr, old_addr = 0; 280 unsigned long addr, old_addr = 0;
282 pgoff_t pgoff; 281 pgoff_t pgoff;
@@ -299,7 +298,7 @@ void flush_dcache_page(struct page *page)
299 * to flush one address here for them all to become coherent */ 298 * to flush one address here for them all to become coherent */
300 299
301 flush_dcache_mmap_lock(mapping); 300 flush_dcache_mmap_lock(mapping);
302 vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) { 301 vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
303 offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; 302 offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
304 addr = mpnt->vm_start + offset; 303 addr = mpnt->vm_start + offset;
305 304
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index b91e48512425..937bff5cdaa7 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
71 struct address_space *mapping = vma->vm_file->f_mapping; 71 struct address_space *mapping = vma->vm_file->f_mapping;
72 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + 72 pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
73 vma->vm_pgoff; 73 vma->vm_pgoff;
74 struct prio_tree_iter iter;
75 struct vm_area_struct *svma; 74 struct vm_area_struct *svma;
76 unsigned long saddr; 75 unsigned long saddr;
77 pte_t *spte = NULL; 76 pte_t *spte = NULL;
@@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
81 return (pte_t *)pmd_alloc(mm, pud, addr); 80 return (pte_t *)pmd_alloc(mm, pud, addr);
82 81
83 mutex_lock(&mapping->i_mmap_mutex); 82 mutex_lock(&mapping->i_mmap_mutex);
84 vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { 83 vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
85 if (svma == vma) 84 if (svma == vma)
86 continue; 85 continue;
87 86
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0a0ab8e21b19..c5bc355d8243 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -397,17 +397,16 @@ static void hugetlbfs_evict_inode(struct inode *inode)
397} 397}
398 398
399static inline void 399static inline void
400hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) 400hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff)
401{ 401{
402 struct vm_area_struct *vma; 402 struct vm_area_struct *vma;
403 struct prio_tree_iter iter;
404 403
405 vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { 404 vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) {
406 unsigned long v_offset; 405 unsigned long v_offset;
407 406
408 /* 407 /*
409 * Can the expression below overflow on 32-bit arches? 408 * Can the expression below overflow on 32-bit arches?
410 * No, because the prio_tree returns us only those vmas 409 * No, because the interval tree returns us only those vmas
411 * which overlap the truncated area starting at pgoff, 410 * which overlap the truncated area starting at pgoff,
412 * and no vma on a 32-bit arch can span beyond the 4GB. 411 * and no vma on a 32-bit arch can span beyond the 4GB.
413 */ 412 */
@@ -432,7 +431,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
432 431
433 i_size_write(inode, offset); 432 i_size_write(inode, offset);
434 mutex_lock(&mapping->i_mmap_mutex); 433 mutex_lock(&mapping->i_mmap_mutex);
435 if (!prio_tree_empty(&mapping->i_mmap)) 434 if (!RB_EMPTY_ROOT(&mapping->i_mmap))
436 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 435 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
437 mutex_unlock(&mapping->i_mmap_mutex); 436 mutex_unlock(&mapping->i_mmap_mutex);
438 truncate_hugepages(inode, offset); 437 truncate_hugepages(inode, offset);
diff --git a/fs/inode.c b/fs/inode.c
index ac8d904b3f16..b03c71957246 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -348,7 +348,7 @@ void address_space_init_once(struct address_space *mapping)
348 mutex_init(&mapping->i_mmap_mutex); 348 mutex_init(&mapping->i_mmap_mutex);
349 INIT_LIST_HEAD(&mapping->private_list); 349 INIT_LIST_HEAD(&mapping->private_list);
350 spin_lock_init(&mapping->private_lock); 350 spin_lock_init(&mapping->private_lock);
351 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); 351 mapping->i_mmap = RB_ROOT;
352 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); 352 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
353} 353}
354EXPORT_SYMBOL(address_space_init_once); 354EXPORT_SYMBOL(address_space_init_once);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5a8a273d5b2f..c617ed024df8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -401,7 +401,7 @@ struct inodes_stat_t {
401#include <linux/cache.h> 401#include <linux/cache.h>
402#include <linux/list.h> 402#include <linux/list.h>
403#include <linux/radix-tree.h> 403#include <linux/radix-tree.h>
404#include <linux/prio_tree.h> 404#include <linux/rbtree.h>
405#include <linux/init.h> 405#include <linux/init.h>
406#include <linux/pid.h> 406#include <linux/pid.h>
407#include <linux/bug.h> 407#include <linux/bug.h>
@@ -669,7 +669,7 @@ struct address_space {
669 struct radix_tree_root page_tree; /* radix tree of all pages */ 669 struct radix_tree_root page_tree; /* radix tree of all pages */
670 spinlock_t tree_lock; /* and lock protecting it */ 670 spinlock_t tree_lock; /* and lock protecting it */
671 unsigned int i_mmap_writable;/* count VM_SHARED mappings */ 671 unsigned int i_mmap_writable;/* count VM_SHARED mappings */
672 struct prio_tree_root i_mmap; /* tree of private and shared mappings */ 672 struct rb_root i_mmap; /* tree of private and shared mappings */
673 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ 673 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
674 struct mutex i_mmap_mutex; /* protect tree, count, list */ 674 struct mutex i_mmap_mutex; /* protect tree, count, list */
675 /* Protected by tree_lock together with the radix tree */ 675 /* Protected by tree_lock together with the radix tree */
@@ -741,7 +741,7 @@ int mapping_tagged(struct address_space *mapping, int tag);
741 */ 741 */
742static inline int mapping_mapped(struct address_space *mapping) 742static inline int mapping_mapped(struct address_space *mapping)
743{ 743{
744 return !prio_tree_empty(&mapping->i_mmap) || 744 return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
745 !list_empty(&mapping->i_mmap_nonlinear); 745 !list_empty(&mapping->i_mmap_nonlinear);
746} 746}
747 747
diff --git a/include/linux/interval_tree_tmpl.h b/include/linux/interval_tree_tmpl.h
new file mode 100644
index 000000000000..c65deda31413
--- /dev/null
+++ b/include/linux/interval_tree_tmpl.h
@@ -0,0 +1,215 @@
1/*
2 Interval Trees
3 (C) 2012 Michel Lespinasse <walken@google.com>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19 include/linux/interval_tree_tmpl.h
20*/
21
22/*
23 * Template for implementing interval trees
24 *
25 * ITSTRUCT: struct type of the interval tree nodes
26 * ITRB: name of struct rb_node field within ITSTRUCT
27 * ITTYPE: type of the interval endpoints
28 * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree
29 * ITSTART(n): start endpoint of ITSTRUCT node n
30 * ITLAST(n): last endpoing of ITSTRUCT node n
31 * ITSTATIC: 'static' or empty
32 * ITPREFIX: prefix to use for the inline tree definitions
33 */
34
35/* IT(name) -> ITPREFIX_name */
36#define _ITNAME(prefix, name) prefix ## _ ## name
37#define ITNAME(prefix, name) _ITNAME(prefix, name)
38#define IT(name) ITNAME(ITPREFIX, name)
39
40/* Callbacks for augmented rbtree insert and remove */
41
42static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node)
43{
44 ITTYPE max = ITLAST(node), subtree_last;
45 if (node->ITRB.rb_left) {
46 subtree_last = rb_entry(node->ITRB.rb_left,
47 ITSTRUCT, ITRB)->ITSUBTREE;
48 if (max < subtree_last)
49 max = subtree_last;
50 }
51 if (node->ITRB.rb_right) {
52 subtree_last = rb_entry(node->ITRB.rb_right,
53 ITSTRUCT, ITRB)->ITSUBTREE;
54 if (max < subtree_last)
55 max = subtree_last;
56 }
57 return max;
58}
59
60static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
61{
62 while (rb != stop) {
63 ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB);
64 ITTYPE subtree_last = IT(compute_subtree_last)(node);
65 if (node->ITSUBTREE == subtree_last)
66 break;
67 node->ITSUBTREE = subtree_last;
68 rb = rb_parent(&node->ITRB);
69 }
70}
71
72static void IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new)
73{
74 ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
75 ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
76
77 new->ITSUBTREE = old->ITSUBTREE;
78}
79
80static void IT(augment_rotate)(struct rb_node *rb_old, struct rb_node *rb_new)
81{
82 ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
83 ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
84
85 new->ITSUBTREE = old->ITSUBTREE;
86 old->ITSUBTREE = IT(compute_subtree_last)(old);
87}
88
89static const struct rb_augment_callbacks IT(augment_callbacks) = {
90 IT(augment_propagate), IT(augment_copy), IT(augment_rotate)
91};
92
93/* Insert / remove interval nodes from the tree */
94
95ITSTATIC void IT(insert)(ITSTRUCT *node, struct rb_root *root)
96{
97 struct rb_node **link = &root->rb_node, *rb_parent = NULL;
98 ITTYPE start = ITSTART(node), last = ITLAST(node);
99 ITSTRUCT *parent;
100
101 while (*link) {
102 rb_parent = *link;
103 parent = rb_entry(rb_parent, ITSTRUCT, ITRB);
104 if (parent->ITSUBTREE < last)
105 parent->ITSUBTREE = last;
106 if (start < ITSTART(parent))
107 link = &parent->ITRB.rb_left;
108 else
109 link = &parent->ITRB.rb_right;
110 }
111
112 node->ITSUBTREE = last;
113 rb_link_node(&node->ITRB, rb_parent, link);
114 rb_insert_augmented(&node->ITRB, root, &IT(augment_callbacks));
115}
116
117ITSTATIC void IT(remove)(ITSTRUCT *node, struct rb_root *root)
118{
119 rb_erase_augmented(&node->ITRB, root, &IT(augment_callbacks));
120}
121
122/*
123 * Iterate over intervals intersecting [start;last]
124 *
125 * Note that a node's interval intersects [start;last] iff:
126 * Cond1: ITSTART(node) <= last
127 * and
128 * Cond2: start <= ITLAST(node)
129 */
130
131static ITSTRUCT *IT(subtree_search)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
132{
133 while (true) {
134 /*
135 * Loop invariant: start <= node->ITSUBTREE
136 * (Cond2 is satisfied by one of the subtree nodes)
137 */
138 if (node->ITRB.rb_left) {
139 ITSTRUCT *left = rb_entry(node->ITRB.rb_left,
140 ITSTRUCT, ITRB);
141 if (start <= left->ITSUBTREE) {
142 /*
143 * Some nodes in left subtree satisfy Cond2.
144 * Iterate to find the leftmost such node N.
145 * If it also satisfies Cond1, that's the match
146 * we are looking for. Otherwise, there is no
147 * matching interval as nodes to the right of N
148 * can't satisfy Cond1 either.
149 */
150 node = left;
151 continue;
152 }
153 }
154 if (ITSTART(node) <= last) { /* Cond1 */
155 if (start <= ITLAST(node)) /* Cond2 */
156 return node; /* node is leftmost match */
157 if (node->ITRB.rb_right) {
158 node = rb_entry(node->ITRB.rb_right,
159 ITSTRUCT, ITRB);
160 if (start <= node->ITSUBTREE)
161 continue;
162 }
163 }
164 return NULL; /* No match */
165 }
166}
167
168ITSTATIC ITSTRUCT *IT(iter_first)(struct rb_root *root,
169 ITTYPE start, ITTYPE last)
170{
171 ITSTRUCT *node;
172
173 if (!root->rb_node)
174 return NULL;
175 node = rb_entry(root->rb_node, ITSTRUCT, ITRB);
176 if (node->ITSUBTREE < start)
177 return NULL;
178 return IT(subtree_search)(node, start, last);
179}
180
181ITSTATIC ITSTRUCT *IT(iter_next)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
182{
183 struct rb_node *rb = node->ITRB.rb_right, *prev;
184
185 while (true) {
186 /*
187 * Loop invariants:
188 * Cond1: ITSTART(node) <= last
189 * rb == node->ITRB.rb_right
190 *
191 * First, search right subtree if suitable
192 */
193 if (rb) {
194 ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB);
195 if (start <= right->ITSUBTREE)
196 return IT(subtree_search)(right, start, last);
197 }
198
199 /* Move up the tree until we come from a node's left child */
200 do {
201 rb = rb_parent(&node->ITRB);
202 if (!rb)
203 return NULL;
204 prev = &node->ITRB;
205 node = rb_entry(rb, ITSTRUCT, ITRB);
206 rb = node->ITRB.rb_right;
207 } while (prev == rb);
208
209 /* Check if the node intersects [start;last] */
210 if (last < ITSTART(node)) /* !Cond1 */
211 return NULL;
212 else if (start <= ITLAST(node)) /* Cond2 */
213 return node;
214 }
215}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5ddb11b2b4bb..0f671ef09eba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -10,7 +10,6 @@
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/mmzone.h> 11#include <linux/mmzone.h>
12#include <linux/rbtree.h> 12#include <linux/rbtree.h>
13#include <linux/prio_tree.h>
14#include <linux/atomic.h> 13#include <linux/atomic.h>
15#include <linux/debug_locks.h> 14#include <linux/debug_locks.h>
16#include <linux/mm_types.h> 15#include <linux/mm_types.h>
@@ -1355,22 +1354,27 @@ extern void zone_pcp_reset(struct zone *zone);
1355extern atomic_long_t mmap_pages_allocated; 1354extern atomic_long_t mmap_pages_allocated;
1356extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); 1355extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
1357 1356
1358/* prio_tree.c */ 1357/* interval_tree.c */
1359void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); 1358void vma_interval_tree_add(struct vm_area_struct *vma,
1360void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); 1359 struct vm_area_struct *old,
1361void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *); 1360 struct address_space *mapping);
1362struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma, 1361void vma_interval_tree_insert(struct vm_area_struct *node,
1363 struct prio_tree_iter *iter); 1362 struct rb_root *root);
1364 1363void vma_interval_tree_remove(struct vm_area_struct *node,
1365#define vma_prio_tree_foreach(vma, iter, root, begin, end) \ 1364 struct rb_root *root);
1366 for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \ 1365struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,
1367 (vma = vma_prio_tree_next(vma, iter)); ) 1366 unsigned long start, unsigned long last);
1367struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
1368 unsigned long start, unsigned long last);
1369
1370#define vma_interval_tree_foreach(vma, root, start, last) \
1371 for (vma = vma_interval_tree_iter_first(root, start, last); \
1372 vma; vma = vma_interval_tree_iter_next(vma, start, last))
1368 1373
1369static inline void vma_nonlinear_insert(struct vm_area_struct *vma, 1374static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
1370 struct list_head *list) 1375 struct list_head *list)
1371{ 1376{
1372 vma->shared.vm_set.parent = NULL; 1377 list_add_tail(&vma->shared.nonlinear, list);
1373 list_add_tail(&vma->shared.vm_set.list, list);
1374} 1378}
1375 1379
1376/* mmap.c */ 1380/* mmap.c */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a57a43f5ca7c..31f8a3af7d94 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -6,7 +6,6 @@
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <linux/list.h> 7#include <linux/list.h>
8#include <linux/spinlock.h> 8#include <linux/spinlock.h>
9#include <linux/prio_tree.h>
10#include <linux/rbtree.h> 9#include <linux/rbtree.h>
11#include <linux/rwsem.h> 10#include <linux/rwsem.h>
12#include <linux/completion.h> 11#include <linux/completion.h>
@@ -240,18 +239,15 @@ struct vm_area_struct {
240 239
241 /* 240 /*
242 * For areas with an address space and backing store, 241 * For areas with an address space and backing store,
243 * linkage into the address_space->i_mmap prio tree, or 242 * linkage into the address_space->i_mmap interval tree, or
244 * linkage to the list of like vmas hanging off its node, or
245 * linkage of vma in the address_space->i_mmap_nonlinear list. 243 * linkage of vma in the address_space->i_mmap_nonlinear list.
246 */ 244 */
247 union { 245 union {
248 struct { 246 struct {
249 struct list_head list; 247 struct rb_node rb;
250 void *parent; /* aligns with prio_tree_node parent */ 248 unsigned long rb_subtree_last;
251 struct vm_area_struct *head; 249 } linear;
252 } vm_set; 250 struct list_head nonlinear;
253
254 struct raw_prio_tree_node prio_tree_node;
255 } shared; 251 } shared;
256 252
257 /* 253 /*
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 912ef48d28ab..1d9c0a985960 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -735,7 +735,6 @@ static struct map_info *
735build_map_info(struct address_space *mapping, loff_t offset, bool is_register) 735build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
736{ 736{
737 unsigned long pgoff = offset >> PAGE_SHIFT; 737 unsigned long pgoff = offset >> PAGE_SHIFT;
738 struct prio_tree_iter iter;
739 struct vm_area_struct *vma; 738 struct vm_area_struct *vma;
740 struct map_info *curr = NULL; 739 struct map_info *curr = NULL;
741 struct map_info *prev = NULL; 740 struct map_info *prev = NULL;
@@ -744,7 +743,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
744 743
745 again: 744 again:
746 mutex_lock(&mapping->i_mmap_mutex); 745 mutex_lock(&mapping->i_mmap_mutex);
747 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 746 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
748 if (!valid_vma(vma, is_register)) 747 if (!valid_vma(vma, is_register))
749 continue; 748 continue;
750 749
diff --git a/kernel/fork.c b/kernel/fork.c
index 972762e01024..90dace52715e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -423,7 +423,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
423 mapping->i_mmap_writable++; 423 mapping->i_mmap_writable++;
424 flush_dcache_mmap_lock(mapping); 424 flush_dcache_mmap_lock(mapping);
425 /* insert tmp into the share list, just after mpnt */ 425 /* insert tmp into the share list, just after mpnt */
426 vma_prio_tree_add(tmp, mpnt); 426 vma_interval_tree_add(tmp, mpnt, mapping);
427 flush_dcache_mmap_unlock(mapping); 427 flush_dcache_mmap_unlock(mapping);
428 mutex_unlock(&mapping->i_mmap_mutex); 428 mutex_unlock(&mapping->i_mmap_mutex);
429 } 429 }
diff --git a/lib/interval_tree.c b/lib/interval_tree.c
index 6fd540b1e499..77a793e0644b 100644
--- a/lib/interval_tree.c
+++ b/lib/interval_tree.c
@@ -1,159 +1,13 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/interval_tree.h> 2#include <linux/interval_tree.h>
3 3
4/* Callbacks for augmented rbtree insert and remove */ 4#define ITSTRUCT struct interval_tree_node
5 5#define ITRB rb
6static inline unsigned long 6#define ITTYPE unsigned long
7compute_subtree_last(struct interval_tree_node *node) 7#define ITSUBTREE __subtree_last
8{ 8#define ITSTART(n) ((n)->start)
9 unsigned long max = node->last, subtree_last; 9#define ITLAST(n) ((n)->last)
10 if (node->rb.rb_left) { 10#define ITSTATIC
11 subtree_last = rb_entry(node->rb.rb_left, 11#define ITPREFIX interval_tree
12 struct interval_tree_node, rb)->__subtree_last; 12
13 if (max < subtree_last) 13#include <linux/interval_tree_tmpl.h>
14 max = subtree_last;
15 }
16 if (node->rb.rb_right) {
17 subtree_last = rb_entry(node->rb.rb_right,
18 struct interval_tree_node, rb)->__subtree_last;
19 if (max < subtree_last)
20 max = subtree_last;
21 }
22 return max;
23}
24
25RB_DECLARE_CALLBACKS(static, augment_callbacks, struct interval_tree_node, rb,
26 unsigned long, __subtree_last, compute_subtree_last)
27
28/* Insert / remove interval nodes from the tree */
29
30void interval_tree_insert(struct interval_tree_node *node,
31 struct rb_root *root)
32{
33 struct rb_node **link = &root->rb_node, *rb_parent = NULL;
34 unsigned long start = node->start, last = node->last;
35 struct interval_tree_node *parent;
36
37 while (*link) {
38 rb_parent = *link;
39 parent = rb_entry(rb_parent, struct interval_tree_node, rb);
40 if (parent->__subtree_last < last)
41 parent->__subtree_last = last;
42 if (start < parent->start)
43 link = &parent->rb.rb_left;
44 else
45 link = &parent->rb.rb_right;
46 }
47
48 node->__subtree_last = last;
49 rb_link_node(&node->rb, rb_parent, link);
50 rb_insert_augmented(&node->rb, root, &augment_callbacks);
51}
52
53void interval_tree_remove(struct interval_tree_node *node,
54 struct rb_root *root)
55{
56 rb_erase_augmented(&node->rb, root, &augment_callbacks);
57}
58
59/*
60 * Iterate over intervals intersecting [start;last]
61 *
62 * Note that a node's interval intersects [start;last] iff:
63 * Cond1: node->start <= last
64 * and
65 * Cond2: start <= node->last
66 */
67
68static struct interval_tree_node *
69subtree_search(struct interval_tree_node *node,
70 unsigned long start, unsigned long last)
71{
72 while (true) {
73 /*
74 * Loop invariant: start <= node->__subtree_last
75 * (Cond2 is satisfied by one of the subtree nodes)
76 */
77 if (node->rb.rb_left) {
78 struct interval_tree_node *left =
79 rb_entry(node->rb.rb_left,
80 struct interval_tree_node, rb);
81 if (start <= left->__subtree_last) {
82 /*
83 * Some nodes in left subtree satisfy Cond2.
84 * Iterate to find the leftmost such node N.
85 * If it also satisfies Cond1, that's the match
86 * we are looking for. Otherwise, there is no
87 * matching interval as nodes to the right of N
88 * can't satisfy Cond1 either.
89 */
90 node = left;
91 continue;
92 }
93 }
94 if (node->start <= last) { /* Cond1 */
95 if (start <= node->last) /* Cond2 */
96 return node; /* node is leftmost match */
97 if (node->rb.rb_right) {
98 node = rb_entry(node->rb.rb_right,
99 struct interval_tree_node, rb);
100 if (start <= node->__subtree_last)
101 continue;
102 }
103 }
104 return NULL; /* No match */
105 }
106}
107
108struct interval_tree_node *
109interval_tree_iter_first(struct rb_root *root,
110 unsigned long start, unsigned long last)
111{
112 struct interval_tree_node *node;
113
114 if (!root->rb_node)
115 return NULL;
116 node = rb_entry(root->rb_node, struct interval_tree_node, rb);
117 if (node->__subtree_last < start)
118 return NULL;
119 return subtree_search(node, start, last);
120}
121
122struct interval_tree_node *
123interval_tree_iter_next(struct interval_tree_node *node,
124 unsigned long start, unsigned long last)
125{
126 struct rb_node *rb = node->rb.rb_right, *prev;
127
128 while (true) {
129 /*
130 * Loop invariants:
131 * Cond1: node->start <= last
132 * rb == node->rb.rb_right
133 *
134 * First, search right subtree if suitable
135 */
136 if (rb) {
137 struct interval_tree_node *right =
138 rb_entry(rb, struct interval_tree_node, rb);
139 if (start <= right->__subtree_last)
140 return subtree_search(right, start, last);
141 }
142
143 /* Move up the tree until we come from a node's left child */
144 do {
145 rb = rb_parent(&node->rb);
146 if (!rb)
147 return NULL;
148 prev = &node->rb;
149 node = rb_entry(rb, struct interval_tree_node, rb);
150 rb = node->rb.rb_right;
151 } while (prev == rb);
152
153 /* Check if the node intersects [start;last] */
154 if (last < node->start) /* !Cond1 */
155 return NULL;
156 else if (start <= node->last) /* Cond2 */
157 return node;
158 }
159}
diff --git a/lib/prio_tree.c b/lib/prio_tree.c
index 4e0d2edff2b4..bba37148c15e 100644
--- a/lib/prio_tree.c
+++ b/lib/prio_tree.c
@@ -44,27 +44,12 @@
44 * The following macros are used for implementing prio_tree for i_mmap 44 * The following macros are used for implementing prio_tree for i_mmap
45 */ 45 */
46 46
47#define RADIX_INDEX(vma) ((vma)->vm_pgoff)
48#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
49/* avoid overflow */
50#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
51
52
53static void get_index(const struct prio_tree_root *root, 47static void get_index(const struct prio_tree_root *root,
54 const struct prio_tree_node *node, 48 const struct prio_tree_node *node,
55 unsigned long *radix, unsigned long *heap) 49 unsigned long *radix, unsigned long *heap)
56{ 50{
57 if (root->raw) { 51 *radix = node->start;
58 struct vm_area_struct *vma = prio_tree_entry( 52 *heap = node->last;
59 node, struct vm_area_struct, shared.prio_tree_node);
60
61 *radix = RADIX_INDEX(vma);
62 *heap = HEAP_INDEX(vma);
63 }
64 else {
65 *radix = node->start;
66 *heap = node->last;
67 }
68} 53}
69 54
70static unsigned long index_bits_to_maxindex[BITS_PER_LONG]; 55static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
diff --git a/mm/Makefile b/mm/Makefile
index 92753e2d82da..6b025f80af34 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -14,9 +14,9 @@ endif
14obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ 14obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
15 maccess.o page_alloc.o page-writeback.o \ 15 maccess.o page_alloc.o page-writeback.o \
16 readahead.o swap.o truncate.o vmscan.o shmem.o \ 16 readahead.o swap.o truncate.o vmscan.o shmem.o \
17 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 17 util.o mmzone.o vmstat.o backing-dev.o \
18 mm_init.o mmu_context.o percpu.o slab_common.o \ 18 mm_init.o mmu_context.o percpu.o slab_common.o \
19 compaction.o $(mmu-y) 19 compaction.o interval_tree.o $(mmu-y)
20 20
21obj-y += init-mm.o 21obj-y += init-mm.o
22 22
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 91750227a191..a52daee11d3f 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -167,7 +167,6 @@ __xip_unmap (struct address_space * mapping,
167{ 167{
168 struct vm_area_struct *vma; 168 struct vm_area_struct *vma;
169 struct mm_struct *mm; 169 struct mm_struct *mm;
170 struct prio_tree_iter iter;
171 unsigned long address; 170 unsigned long address;
172 pte_t *pte; 171 pte_t *pte;
173 pte_t pteval; 172 pte_t pteval;
@@ -184,7 +183,7 @@ __xip_unmap (struct address_space * mapping,
184 183
185retry: 184retry:
186 mutex_lock(&mapping->i_mmap_mutex); 185 mutex_lock(&mapping->i_mmap_mutex);
187 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 186 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
188 mm = vma->vm_mm; 187 mm = vma->vm_mm;
189 address = vma->vm_start + 188 address = vma->vm_start +
190 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 189 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
diff --git a/mm/fremap.c b/mm/fremap.c
index 3d731a498788..3899a86851ce 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -214,7 +214,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
214 mutex_lock(&mapping->i_mmap_mutex); 214 mutex_lock(&mapping->i_mmap_mutex);
215 flush_dcache_mmap_lock(mapping); 215 flush_dcache_mmap_lock(mapping);
216 vma->vm_flags |= VM_NONLINEAR; 216 vma->vm_flags |= VM_NONLINEAR;
217 vma_prio_tree_remove(vma, &mapping->i_mmap); 217 vma_interval_tree_remove(vma, &mapping->i_mmap);
218 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); 218 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
219 flush_dcache_mmap_unlock(mapping); 219 flush_dcache_mmap_unlock(mapping);
220 mutex_unlock(&mapping->i_mmap_mutex); 220 mutex_unlock(&mapping->i_mmap_mutex);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f1bb534254f6..c9b40e3a9936 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2474,7 +2474,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2474 struct hstate *h = hstate_vma(vma); 2474 struct hstate *h = hstate_vma(vma);
2475 struct vm_area_struct *iter_vma; 2475 struct vm_area_struct *iter_vma;
2476 struct address_space *mapping; 2476 struct address_space *mapping;
2477 struct prio_tree_iter iter;
2478 pgoff_t pgoff; 2477 pgoff_t pgoff;
2479 2478
2480 /* 2479 /*
@@ -2491,7 +2490,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2491 * __unmap_hugepage_range() is called as the lock is already held 2490 * __unmap_hugepage_range() is called as the lock is already held
2492 */ 2491 */
2493 mutex_lock(&mapping->i_mmap_mutex); 2492 mutex_lock(&mapping->i_mmap_mutex);
2494 vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 2493 vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) {
2495 /* Do not unmap the current VMA */ 2494 /* Do not unmap the current VMA */
2496 if (iter_vma == vma) 2495 if (iter_vma == vma)
2497 continue; 2496 continue;
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
new file mode 100644
index 000000000000..7dc565660e56
--- /dev/null
+++ b/mm/interval_tree.c
@@ -0,0 +1,61 @@
1/*
2 * mm/interval_tree.c - interval tree for mapping->i_mmap
3 *
4 * Copyright (C) 2012, Michel Lespinasse <walken@google.com>
5 *
6 * This file is released under the GPL v2.
7 */
8
9#include <linux/mm.h>
10#include <linux/fs.h>
11
12#define ITSTRUCT struct vm_area_struct
13#define ITRB shared.linear.rb
14#define ITTYPE unsigned long
15#define ITSUBTREE shared.linear.rb_subtree_last
16#define ITSTART(n) ((n)->vm_pgoff)
17#define ITLAST(n) ((n)->vm_pgoff + \
18 (((n)->vm_end - (n)->vm_start) >> PAGE_SHIFT) - 1)
19#define ITSTATIC
20#define ITPREFIX vma_interval_tree
21
22#include <linux/interval_tree_tmpl.h>
23
24/* Insert old immediately after vma in the interval tree */
25void vma_interval_tree_add(struct vm_area_struct *vma,
26 struct vm_area_struct *old,
27 struct address_space *mapping)
28{
29 struct rb_node **link;
30 struct vm_area_struct *parent;
31 unsigned long last;
32
33 if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
34 list_add(&vma->shared.nonlinear, &old->shared.nonlinear);
35 return;
36 }
37
38 last = ITLAST(vma);
39
40 if (!old->shared.linear.rb.rb_right) {
41 parent = old;
42 link = &old->shared.linear.rb.rb_right;
43 } else {
44 parent = rb_entry(old->shared.linear.rb.rb_right,
45 struct vm_area_struct, shared.linear.rb);
46 if (parent->shared.linear.rb_subtree_last < last)
47 parent->shared.linear.rb_subtree_last = last;
48 while (parent->shared.linear.rb.rb_left) {
49 parent = rb_entry(parent->shared.linear.rb.rb_left,
50 struct vm_area_struct, shared.linear.rb);
51 if (parent->shared.linear.rb_subtree_last < last)
52 parent->shared.linear.rb_subtree_last = last;
53 }
54 link = &parent->shared.linear.rb.rb_left;
55 }
56
57 vma->shared.linear.rb_subtree_last = last;
58 rb_link_node(&vma->shared.linear.rb, &parent->shared.linear.rb, link);
59 rb_insert_augmented(&vma->shared.linear.rb, &mapping->i_mmap,
60 &vma_interval_tree_augment_callbacks);
61}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a6e2141a6610..c38a6257d082 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -431,7 +431,6 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
431{ 431{
432 struct vm_area_struct *vma; 432 struct vm_area_struct *vma;
433 struct task_struct *tsk; 433 struct task_struct *tsk;
434 struct prio_tree_iter iter;
435 struct address_space *mapping = page->mapping; 434 struct address_space *mapping = page->mapping;
436 435
437 mutex_lock(&mapping->i_mmap_mutex); 436 mutex_lock(&mapping->i_mmap_mutex);
@@ -442,7 +441,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
442 if (!task_early_kill(tsk)) 441 if (!task_early_kill(tsk))
443 continue; 442 continue;
444 443
445 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, 444 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
446 pgoff) { 445 pgoff) {
447 /* 446 /*
448 * Send early kill signal to tasks where a vma covers 447 * Send early kill signal to tasks where a vma covers
diff --git a/mm/memory.c b/mm/memory.c
index e09c04813186..d205e4381a34 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2801,14 +2801,13 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
2801 zap_page_range_single(vma, start_addr, end_addr - start_addr, details); 2801 zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
2802} 2802}
2803 2803
2804static inline void unmap_mapping_range_tree(struct prio_tree_root *root, 2804static inline void unmap_mapping_range_tree(struct rb_root *root,
2805 struct zap_details *details) 2805 struct zap_details *details)
2806{ 2806{
2807 struct vm_area_struct *vma; 2807 struct vm_area_struct *vma;
2808 struct prio_tree_iter iter;
2809 pgoff_t vba, vea, zba, zea; 2808 pgoff_t vba, vea, zba, zea;
2810 2809
2811 vma_prio_tree_foreach(vma, &iter, root, 2810 vma_interval_tree_foreach(vma, root,
2812 details->first_index, details->last_index) { 2811 details->first_index, details->last_index) {
2813 2812
2814 vba = vma->vm_pgoff; 2813 vba = vma->vm_pgoff;
@@ -2839,7 +2838,7 @@ static inline void unmap_mapping_range_list(struct list_head *head,
2839 * across *all* the pages in each nonlinear VMA, not just the pages 2838 * across *all* the pages in each nonlinear VMA, not just the pages
2840 * whose virtual address lies outside the file truncation point. 2839 * whose virtual address lies outside the file truncation point.
2841 */ 2840 */
2842 list_for_each_entry(vma, head, shared.vm_set.list) { 2841 list_for_each_entry(vma, head, shared.nonlinear) {
2843 details->nonlinear_vma = vma; 2842 details->nonlinear_vma = vma;
2844 unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); 2843 unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
2845 } 2844 }
@@ -2883,7 +2882,7 @@ void unmap_mapping_range(struct address_space *mapping,
2883 2882
2884 2883
2885 mutex_lock(&mapping->i_mmap_mutex); 2884 mutex_lock(&mapping->i_mmap_mutex);
2886 if (unlikely(!prio_tree_empty(&mapping->i_mmap))) 2885 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
2887 unmap_mapping_range_tree(&mapping->i_mmap, &details); 2886 unmap_mapping_range_tree(&mapping->i_mmap, &details);
2888 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) 2887 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
2889 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); 2888 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
diff --git a/mm/mmap.c b/mm/mmap.c
index e3c365ff1b6a..5ac533f88e99 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -199,14 +199,14 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
199 199
200 flush_dcache_mmap_lock(mapping); 200 flush_dcache_mmap_lock(mapping);
201 if (unlikely(vma->vm_flags & VM_NONLINEAR)) 201 if (unlikely(vma->vm_flags & VM_NONLINEAR))
202 list_del_init(&vma->shared.vm_set.list); 202 list_del_init(&vma->shared.nonlinear);
203 else 203 else
204 vma_prio_tree_remove(vma, &mapping->i_mmap); 204 vma_interval_tree_remove(vma, &mapping->i_mmap);
205 flush_dcache_mmap_unlock(mapping); 205 flush_dcache_mmap_unlock(mapping);
206} 206}
207 207
208/* 208/*
209 * Unlink a file-based vm structure from its prio_tree, to hide 209 * Unlink a file-based vm structure from its interval tree, to hide
210 * vma from rmap and vmtruncate before freeing its page tables. 210 * vma from rmap and vmtruncate before freeing its page tables.
211 */ 211 */
212void unlink_file_vma(struct vm_area_struct *vma) 212void unlink_file_vma(struct vm_area_struct *vma)
@@ -411,7 +411,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
411 if (unlikely(vma->vm_flags & VM_NONLINEAR)) 411 if (unlikely(vma->vm_flags & VM_NONLINEAR))
412 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); 412 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
413 else 413 else
414 vma_prio_tree_insert(vma, &mapping->i_mmap); 414 vma_interval_tree_insert(vma, &mapping->i_mmap);
415 flush_dcache_mmap_unlock(mapping); 415 flush_dcache_mmap_unlock(mapping);
416 } 416 }
417} 417}
@@ -449,7 +449,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
449 449
450/* 450/*
451 * Helper for vma_adjust() in the split_vma insert case: insert a vma into the 451 * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
452 * mm's list and rbtree. It has already been inserted into the prio_tree. 452 * mm's list and rbtree. It has already been inserted into the interval tree.
453 */ 453 */
454static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) 454static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
455{ 455{
@@ -491,7 +491,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
491 struct vm_area_struct *next = vma->vm_next; 491 struct vm_area_struct *next = vma->vm_next;
492 struct vm_area_struct *importer = NULL; 492 struct vm_area_struct *importer = NULL;
493 struct address_space *mapping = NULL; 493 struct address_space *mapping = NULL;
494 struct prio_tree_root *root = NULL; 494 struct rb_root *root = NULL;
495 struct anon_vma *anon_vma = NULL; 495 struct anon_vma *anon_vma = NULL;
496 struct file *file = vma->vm_file; 496 struct file *file = vma->vm_file;
497 long adjust_next = 0; 497 long adjust_next = 0;
@@ -554,7 +554,7 @@ again: remove_next = 1 + (end > next->vm_end);
554 mutex_lock(&mapping->i_mmap_mutex); 554 mutex_lock(&mapping->i_mmap_mutex);
555 if (insert) { 555 if (insert) {
556 /* 556 /*
557 * Put into prio_tree now, so instantiated pages 557 * Put into interval tree now, so instantiated pages
558 * are visible to arm/parisc __flush_dcache_page 558 * are visible to arm/parisc __flush_dcache_page
559 * throughout; but we cannot insert into address 559 * throughout; but we cannot insert into address
560 * space until vma start or end is updated. 560 * space until vma start or end is updated.
@@ -582,9 +582,9 @@ again: remove_next = 1 + (end > next->vm_end);
582 582
583 if (root) { 583 if (root) {
584 flush_dcache_mmap_lock(mapping); 584 flush_dcache_mmap_lock(mapping);
585 vma_prio_tree_remove(vma, root); 585 vma_interval_tree_remove(vma, root);
586 if (adjust_next) 586 if (adjust_next)
587 vma_prio_tree_remove(next, root); 587 vma_interval_tree_remove(next, root);
588 } 588 }
589 589
590 vma->vm_start = start; 590 vma->vm_start = start;
@@ -597,8 +597,8 @@ again: remove_next = 1 + (end > next->vm_end);
597 597
598 if (root) { 598 if (root) {
599 if (adjust_next) 599 if (adjust_next)
600 vma_prio_tree_insert(next, root); 600 vma_interval_tree_insert(next, root);
601 vma_prio_tree_insert(vma, root); 601 vma_interval_tree_insert(vma, root);
602 flush_dcache_mmap_unlock(mapping); 602 flush_dcache_mmap_unlock(mapping);
603 } 603 }
604 604
diff --git a/mm/nommu.c b/mm/nommu.c
index 12e84e69dd06..45131b41bcdb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -698,7 +698,7 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
698 698
699 mutex_lock(&mapping->i_mmap_mutex); 699 mutex_lock(&mapping->i_mmap_mutex);
700 flush_dcache_mmap_lock(mapping); 700 flush_dcache_mmap_lock(mapping);
701 vma_prio_tree_insert(vma, &mapping->i_mmap); 701 vma_interval_tree_insert(vma, &mapping->i_mmap);
702 flush_dcache_mmap_unlock(mapping); 702 flush_dcache_mmap_unlock(mapping);
703 mutex_unlock(&mapping->i_mmap_mutex); 703 mutex_unlock(&mapping->i_mmap_mutex);
704 } 704 }
@@ -764,7 +764,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
764 764
765 mutex_lock(&mapping->i_mmap_mutex); 765 mutex_lock(&mapping->i_mmap_mutex);
766 flush_dcache_mmap_lock(mapping); 766 flush_dcache_mmap_lock(mapping);
767 vma_prio_tree_remove(vma, &mapping->i_mmap); 767 vma_interval_tree_remove(vma, &mapping->i_mmap);
768 flush_dcache_mmap_unlock(mapping); 768 flush_dcache_mmap_unlock(mapping);
769 mutex_unlock(&mapping->i_mmap_mutex); 769 mutex_unlock(&mapping->i_mmap_mutex);
770 } 770 }
@@ -2044,7 +2044,6 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
2044 size_t newsize) 2044 size_t newsize)
2045{ 2045{
2046 struct vm_area_struct *vma; 2046 struct vm_area_struct *vma;
2047 struct prio_tree_iter iter;
2048 struct vm_region *region; 2047 struct vm_region *region;
2049 pgoff_t low, high; 2048 pgoff_t low, high;
2050 size_t r_size, r_top; 2049 size_t r_size, r_top;
@@ -2056,8 +2055,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
2056 mutex_lock(&inode->i_mapping->i_mmap_mutex); 2055 mutex_lock(&inode->i_mapping->i_mmap_mutex);
2057 2056
2058 /* search for VMAs that fall within the dead zone */ 2057 /* search for VMAs that fall within the dead zone */
2059 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, 2058 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) {
2060 low, high) {
2061 /* found one - only interested if it's shared out of the page 2059 /* found one - only interested if it's shared out of the page
2062 * cache */ 2060 * cache */
2063 if (vma->vm_flags & VM_SHARED) { 2061 if (vma->vm_flags & VM_SHARED) {
@@ -2073,8 +2071,8 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
2073 * we don't check for any regions that start beyond the EOF as there 2071 * we don't check for any regions that start beyond the EOF as there
2074 * shouldn't be any 2072 * shouldn't be any
2075 */ 2073 */
2076 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, 2074 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap,
2077 0, ULONG_MAX) { 2075 0, ULONG_MAX) {
2078 if (!(vma->vm_flags & VM_SHARED)) 2076 if (!(vma->vm_flags & VM_SHARED))
2079 continue; 2077 continue;
2080 2078
diff --git a/mm/prio_tree.c b/mm/prio_tree.c
deleted file mode 100644
index 799dcfd7cd8c..000000000000
--- a/mm/prio_tree.c
+++ /dev/null
@@ -1,208 +0,0 @@
1/*
2 * mm/prio_tree.c - priority search tree for mapping->i_mmap
3 *
4 * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
5 *
6 * This file is released under the GPL v2.
7 *
8 * Based on the radix priority search tree proposed by Edward M. McCreight
9 * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
10 *
11 * 02Feb2004 Initial version
12 */
13
14#include <linux/mm.h>
15#include <linux/prio_tree.h>
16#include <linux/prefetch.h>
17
18/*
19 * See lib/prio_tree.c for details on the general radix priority search tree
20 * code.
21 */
22
23/*
24 * The following #defines are mirrored from lib/prio_tree.c. They're only used
25 * for debugging, and should be removed (along with the debugging code using
26 * them) when switching also VMAs to the regular prio_tree code.
27 */
28
29#define RADIX_INDEX(vma) ((vma)->vm_pgoff)
30#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
31/* avoid overflow */
32#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
33
34/*
35 * Radix priority search tree for address_space->i_mmap
36 *
37 * For each vma that map a unique set of file pages i.e., unique [radix_index,
38 * heap_index] value, we have a corresponding priority search tree node. If
39 * multiple vmas have identical [radix_index, heap_index] value, then one of
40 * them is used as a tree node and others are stored in a vm_set list. The tree
41 * node points to the first vma (head) of the list using vm_set.head.
42 *
43 * prio_tree_root
44 * |
45 * A vm_set.head
46 * / \ /
47 * L R -> H-I-J-K-M-N-O-P-Q-S
48 * ^ ^ <-- vm_set.list -->
49 * tree nodes
50 *
51 * We need some way to identify whether a vma is a tree node, head of a vm_set
52 * list, or just a member of a vm_set list. We cannot use vm_flags to store
53 * such information. The reason is, in the above figure, it is possible that
54 * vm_flags' of R and H are covered by the different mmap_sems. When R is
55 * removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold
56 * H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now.
57 * That's why some trick involving shared.vm_set.parent is used for identifying
58 * tree nodes and list head nodes.
59 *
60 * vma radix priority search tree node rules:
61 *
62 * vma->shared.vm_set.parent != NULL ==> a tree node
63 * vma->shared.vm_set.head != NULL ==> list of others mapping same range
64 * vma->shared.vm_set.head == NULL ==> no others map the same range
65 *
66 * vma->shared.vm_set.parent == NULL
67 * vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range
68 * vma->shared.vm_set.head == NULL ==> a list node
69 */
70
71/*
72 * Add a new vma known to map the same set of pages as the old vma:
73 * useful for fork's dup_mmap as well as vma_prio_tree_insert below.
74 * Note that it just happens to work correctly on i_mmap_nonlinear too.
75 */
76void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old)
77{
78 /* Leave these BUG_ONs till prio_tree patch stabilizes */
79 BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old));
80 BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old));
81
82 vma->shared.vm_set.head = NULL;
83 vma->shared.vm_set.parent = NULL;
84
85 if (!old->shared.vm_set.parent)
86 list_add(&vma->shared.vm_set.list,
87 &old->shared.vm_set.list);
88 else if (old->shared.vm_set.head)
89 list_add_tail(&vma->shared.vm_set.list,
90 &old->shared.vm_set.head->shared.vm_set.list);
91 else {
92 INIT_LIST_HEAD(&vma->shared.vm_set.list);
93 vma->shared.vm_set.head = old;
94 old->shared.vm_set.head = vma;
95 }
96}
97
98void vma_prio_tree_insert(struct vm_area_struct *vma,
99 struct prio_tree_root *root)
100{
101 struct prio_tree_node *ptr;
102 struct vm_area_struct *old;
103
104 vma->shared.vm_set.head = NULL;
105
106 ptr = raw_prio_tree_insert(root, &vma->shared.prio_tree_node);
107 if (ptr != (struct prio_tree_node *) &vma->shared.prio_tree_node) {
108 old = prio_tree_entry(ptr, struct vm_area_struct,
109 shared.prio_tree_node);
110 vma_prio_tree_add(vma, old);
111 }
112}
113
114void vma_prio_tree_remove(struct vm_area_struct *vma,
115 struct prio_tree_root *root)
116{
117 struct vm_area_struct *node, *head, *new_head;
118
119 if (!vma->shared.vm_set.head) {
120 if (!vma->shared.vm_set.parent)
121 list_del_init(&vma->shared.vm_set.list);
122 else
123 raw_prio_tree_remove(root, &vma->shared.prio_tree_node);
124 } else {
125 /* Leave this BUG_ON till prio_tree patch stabilizes */
126 BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma);
127 if (vma->shared.vm_set.parent) {
128 head = vma->shared.vm_set.head;
129 if (!list_empty(&head->shared.vm_set.list)) {
130 new_head = list_entry(
131 head->shared.vm_set.list.next,
132 struct vm_area_struct,
133 shared.vm_set.list);
134 list_del_init(&head->shared.vm_set.list);
135 } else
136 new_head = NULL;
137
138 raw_prio_tree_replace(root, &vma->shared.prio_tree_node,
139 &head->shared.prio_tree_node);
140 head->shared.vm_set.head = new_head;
141 if (new_head)
142 new_head->shared.vm_set.head = head;
143
144 } else {
145 node = vma->shared.vm_set.head;
146 if (!list_empty(&vma->shared.vm_set.list)) {
147 new_head = list_entry(
148 vma->shared.vm_set.list.next,
149 struct vm_area_struct,
150 shared.vm_set.list);
151 list_del_init(&vma->shared.vm_set.list);
152 node->shared.vm_set.head = new_head;
153 new_head->shared.vm_set.head = node;
154 } else
155 node->shared.vm_set.head = NULL;
156 }
157 }
158}
159
160/*
161 * Helper function to enumerate vmas that map a given file page or a set of
162 * contiguous file pages. The function returns vmas that at least map a single
163 * page in the given range of contiguous file pages.
164 */
165struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
166 struct prio_tree_iter *iter)
167{
168 struct prio_tree_node *ptr;
169 struct vm_area_struct *next;
170
171 if (!vma) {
172 /*
173 * First call is with NULL vma
174 */
175 ptr = prio_tree_next(iter);
176 if (ptr) {
177 next = prio_tree_entry(ptr, struct vm_area_struct,
178 shared.prio_tree_node);
179 prefetch(next->shared.vm_set.head);
180 return next;
181 } else
182 return NULL;
183 }
184
185 if (vma->shared.vm_set.parent) {
186 if (vma->shared.vm_set.head) {
187 next = vma->shared.vm_set.head;
188 prefetch(next->shared.vm_set.list.next);
189 return next;
190 }
191 } else {
192 next = list_entry(vma->shared.vm_set.list.next,
193 struct vm_area_struct, shared.vm_set.list);
194 if (!next->shared.vm_set.head) {
195 prefetch(next->shared.vm_set.list.next);
196 return next;
197 }
198 }
199
200 ptr = prio_tree_next(iter);
201 if (ptr) {
202 next = prio_tree_entry(ptr, struct vm_area_struct,
203 shared.prio_tree_node);
204 prefetch(next->shared.vm_set.head);
205 return next;
206 } else
207 return NULL;
208}
diff --git a/mm/rmap.c b/mm/rmap.c
index 0f3b7cda2a24..7b5b51d25fc5 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -820,7 +820,6 @@ static int page_referenced_file(struct page *page,
820 struct address_space *mapping = page->mapping; 820 struct address_space *mapping = page->mapping;
821 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 821 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
822 struct vm_area_struct *vma; 822 struct vm_area_struct *vma;
823 struct prio_tree_iter iter;
824 int referenced = 0; 823 int referenced = 0;
825 824
826 /* 825 /*
@@ -846,7 +845,7 @@ static int page_referenced_file(struct page *page,
846 */ 845 */
847 mapcount = page_mapcount(page); 846 mapcount = page_mapcount(page);
848 847
849 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 848 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
850 unsigned long address = vma_address(page, vma); 849 unsigned long address = vma_address(page, vma);
851 if (address == -EFAULT) 850 if (address == -EFAULT)
852 continue; 851 continue;
@@ -945,13 +944,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
945{ 944{
946 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 945 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
947 struct vm_area_struct *vma; 946 struct vm_area_struct *vma;
948 struct prio_tree_iter iter;
949 int ret = 0; 947 int ret = 0;
950 948
951 BUG_ON(PageAnon(page)); 949 BUG_ON(PageAnon(page));
952 950
953 mutex_lock(&mapping->i_mmap_mutex); 951 mutex_lock(&mapping->i_mmap_mutex);
954 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 952 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
955 if (vma->vm_flags & VM_SHARED) { 953 if (vma->vm_flags & VM_SHARED) {
956 unsigned long address = vma_address(page, vma); 954 unsigned long address = vma_address(page, vma);
957 if (address == -EFAULT) 955 if (address == -EFAULT)
@@ -1547,7 +1545,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1547 struct address_space *mapping = page->mapping; 1545 struct address_space *mapping = page->mapping;
1548 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 1546 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1549 struct vm_area_struct *vma; 1547 struct vm_area_struct *vma;
1550 struct prio_tree_iter iter;
1551 int ret = SWAP_AGAIN; 1548 int ret = SWAP_AGAIN;
1552 unsigned long cursor; 1549 unsigned long cursor;
1553 unsigned long max_nl_cursor = 0; 1550 unsigned long max_nl_cursor = 0;
@@ -1555,7 +1552,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1555 unsigned int mapcount; 1552 unsigned int mapcount;
1556 1553
1557 mutex_lock(&mapping->i_mmap_mutex); 1554 mutex_lock(&mapping->i_mmap_mutex);
1558 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1555 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1559 unsigned long address = vma_address(page, vma); 1556 unsigned long address = vma_address(page, vma);
1560 if (address == -EFAULT) 1557 if (address == -EFAULT)
1561 continue; 1558 continue;
@@ -1576,7 +1573,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1576 goto out; 1573 goto out;
1577 1574
1578 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1575 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1579 shared.vm_set.list) { 1576 shared.nonlinear) {
1580 cursor = (unsigned long) vma->vm_private_data; 1577 cursor = (unsigned long) vma->vm_private_data;
1581 if (cursor > max_nl_cursor) 1578 if (cursor > max_nl_cursor)
1582 max_nl_cursor = cursor; 1579 max_nl_cursor = cursor;
@@ -1608,7 +1605,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1608 1605
1609 do { 1606 do {
1610 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1607 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1611 shared.vm_set.list) { 1608 shared.nonlinear) {
1612 cursor = (unsigned long) vma->vm_private_data; 1609 cursor = (unsigned long) vma->vm_private_data;
1613 while ( cursor < max_nl_cursor && 1610 while ( cursor < max_nl_cursor &&
1614 cursor < vma->vm_end - vma->vm_start) { 1611 cursor < vma->vm_end - vma->vm_start) {
@@ -1631,7 +1628,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1631 * in locked vmas). Reset cursor on all unreserved nonlinear 1628 * in locked vmas). Reset cursor on all unreserved nonlinear
1632 * vmas, now forgetting on which ones it had fallen behind. 1629 * vmas, now forgetting on which ones it had fallen behind.
1633 */ 1630 */
1634 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) 1631 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
1635 vma->vm_private_data = NULL; 1632 vma->vm_private_data = NULL;
1636out: 1633out:
1637 mutex_unlock(&mapping->i_mmap_mutex); 1634 mutex_unlock(&mapping->i_mmap_mutex);
@@ -1748,13 +1745,12 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1748 struct address_space *mapping = page->mapping; 1745 struct address_space *mapping = page->mapping;
1749 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 1746 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1750 struct vm_area_struct *vma; 1747 struct vm_area_struct *vma;
1751 struct prio_tree_iter iter;
1752 int ret = SWAP_AGAIN; 1748 int ret = SWAP_AGAIN;
1753 1749
1754 if (!mapping) 1750 if (!mapping)
1755 return ret; 1751 return ret;
1756 mutex_lock(&mapping->i_mmap_mutex); 1752 mutex_lock(&mapping->i_mmap_mutex);
1757 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1753 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1758 unsigned long address = vma_address(page, vma); 1754 unsigned long address = vma_address(page, vma);
1759 if (address == -EFAULT) 1755 if (address == -EFAULT)
1760 continue; 1756 continue;