aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2012-10-08 19:31:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:39 -0400
commit6b2dbba8b6ac4df26f72eda1e5ea7bab9f950e08 (patch)
tree422ed8d7ac2fe45069f20cfba84a9a097bf444af /mm
parentfff3fd8a1210a165252cd7cd01206da7a90d3a06 (diff)
mm: replace vma prio_tree with an interval tree
Implement an interval tree as a replacement for the VMA prio_tree. The algorithms are similar to lib/interval_tree.c; however that code can't be directly reused as the interval endpoints are not explicitly stored in the VMA. So instead, the common algorithm is moved into a template and the details (node type, how to get interval endpoints from the node, etc) are filled in using the C preprocessor. Once the interval tree functions are available, using them as a replacement to the VMA prio tree is a relatively simple, mechanical job. Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile4
-rw-r--r--mm/filemap_xip.c3
-rw-r--r--mm/fremap.c2
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/interval_tree.c61
-rw-r--r--mm/memory-failure.c3
-rw-r--r--mm/memory.c9
-rw-r--r--mm/mmap.c22
-rw-r--r--mm/nommu.c12
-rw-r--r--mm/prio_tree.c208
-rw-r--r--mm/rmap.c18
11 files changed, 94 insertions, 251 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 92753e2d82da..6b025f80af34 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -14,9 +14,9 @@ endif
14obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ 14obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
15 maccess.o page_alloc.o page-writeback.o \ 15 maccess.o page_alloc.o page-writeback.o \
16 readahead.o swap.o truncate.o vmscan.o shmem.o \ 16 readahead.o swap.o truncate.o vmscan.o shmem.o \
17 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 17 util.o mmzone.o vmstat.o backing-dev.o \
18 mm_init.o mmu_context.o percpu.o slab_common.o \ 18 mm_init.o mmu_context.o percpu.o slab_common.o \
19 compaction.o $(mmu-y) 19 compaction.o interval_tree.o $(mmu-y)
20 20
21obj-y += init-mm.o 21obj-y += init-mm.o
22 22
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 91750227a191..a52daee11d3f 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -167,7 +167,6 @@ __xip_unmap (struct address_space * mapping,
167{ 167{
168 struct vm_area_struct *vma; 168 struct vm_area_struct *vma;
169 struct mm_struct *mm; 169 struct mm_struct *mm;
170 struct prio_tree_iter iter;
171 unsigned long address; 170 unsigned long address;
172 pte_t *pte; 171 pte_t *pte;
173 pte_t pteval; 172 pte_t pteval;
@@ -184,7 +183,7 @@ __xip_unmap (struct address_space * mapping,
184 183
185retry: 184retry:
186 mutex_lock(&mapping->i_mmap_mutex); 185 mutex_lock(&mapping->i_mmap_mutex);
187 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 186 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
188 mm = vma->vm_mm; 187 mm = vma->vm_mm;
189 address = vma->vm_start + 188 address = vma->vm_start +
190 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 189 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
diff --git a/mm/fremap.c b/mm/fremap.c
index 3d731a498788..3899a86851ce 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -214,7 +214,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
214 mutex_lock(&mapping->i_mmap_mutex); 214 mutex_lock(&mapping->i_mmap_mutex);
215 flush_dcache_mmap_lock(mapping); 215 flush_dcache_mmap_lock(mapping);
216 vma->vm_flags |= VM_NONLINEAR; 216 vma->vm_flags |= VM_NONLINEAR;
217 vma_prio_tree_remove(vma, &mapping->i_mmap); 217 vma_interval_tree_remove(vma, &mapping->i_mmap);
218 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); 218 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
219 flush_dcache_mmap_unlock(mapping); 219 flush_dcache_mmap_unlock(mapping);
220 mutex_unlock(&mapping->i_mmap_mutex); 220 mutex_unlock(&mapping->i_mmap_mutex);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f1bb534254f6..c9b40e3a9936 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2474,7 +2474,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2474 struct hstate *h = hstate_vma(vma); 2474 struct hstate *h = hstate_vma(vma);
2475 struct vm_area_struct *iter_vma; 2475 struct vm_area_struct *iter_vma;
2476 struct address_space *mapping; 2476 struct address_space *mapping;
2477 struct prio_tree_iter iter;
2478 pgoff_t pgoff; 2477 pgoff_t pgoff;
2479 2478
2480 /* 2479 /*
@@ -2491,7 +2490,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2491 * __unmap_hugepage_range() is called as the lock is already held 2490 * __unmap_hugepage_range() is called as the lock is already held
2492 */ 2491 */
2493 mutex_lock(&mapping->i_mmap_mutex); 2492 mutex_lock(&mapping->i_mmap_mutex);
2494 vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 2493 vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) {
2495 /* Do not unmap the current VMA */ 2494 /* Do not unmap the current VMA */
2496 if (iter_vma == vma) 2495 if (iter_vma == vma)
2497 continue; 2496 continue;
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
new file mode 100644
index 000000000000..7dc565660e56
--- /dev/null
+++ b/mm/interval_tree.c
@@ -0,0 +1,61 @@
1/*
2 * mm/interval_tree.c - interval tree for mapping->i_mmap
3 *
4 * Copyright (C) 2012, Michel Lespinasse <walken@google.com>
5 *
6 * This file is released under the GPL v2.
7 */
8
9#include <linux/mm.h>
10#include <linux/fs.h>
11
12#define ITSTRUCT struct vm_area_struct
13#define ITRB shared.linear.rb
14#define ITTYPE unsigned long
15#define ITSUBTREE shared.linear.rb_subtree_last
16#define ITSTART(n) ((n)->vm_pgoff)
17#define ITLAST(n) ((n)->vm_pgoff + \
18 (((n)->vm_end - (n)->vm_start) >> PAGE_SHIFT) - 1)
19#define ITSTATIC
20#define ITPREFIX vma_interval_tree
21
22#include <linux/interval_tree_tmpl.h>
23
24/* Insert old immediately after vma in the interval tree */
25void vma_interval_tree_add(struct vm_area_struct *vma,
26 struct vm_area_struct *old,
27 struct address_space *mapping)
28{
29 struct rb_node **link;
30 struct vm_area_struct *parent;
31 unsigned long last;
32
33 if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
34 list_add(&vma->shared.nonlinear, &old->shared.nonlinear);
35 return;
36 }
37
38 last = ITLAST(vma);
39
40 if (!old->shared.linear.rb.rb_right) {
41 parent = old;
42 link = &old->shared.linear.rb.rb_right;
43 } else {
44 parent = rb_entry(old->shared.linear.rb.rb_right,
45 struct vm_area_struct, shared.linear.rb);
46 if (parent->shared.linear.rb_subtree_last < last)
47 parent->shared.linear.rb_subtree_last = last;
48 while (parent->shared.linear.rb.rb_left) {
49 parent = rb_entry(parent->shared.linear.rb.rb_left,
50 struct vm_area_struct, shared.linear.rb);
51 if (parent->shared.linear.rb_subtree_last < last)
52 parent->shared.linear.rb_subtree_last = last;
53 }
54 link = &parent->shared.linear.rb.rb_left;
55 }
56
57 vma->shared.linear.rb_subtree_last = last;
58 rb_link_node(&vma->shared.linear.rb, &parent->shared.linear.rb, link);
59 rb_insert_augmented(&vma->shared.linear.rb, &mapping->i_mmap,
60 &vma_interval_tree_augment_callbacks);
61}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a6e2141a6610..c38a6257d082 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -431,7 +431,6 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
431{ 431{
432 struct vm_area_struct *vma; 432 struct vm_area_struct *vma;
433 struct task_struct *tsk; 433 struct task_struct *tsk;
434 struct prio_tree_iter iter;
435 struct address_space *mapping = page->mapping; 434 struct address_space *mapping = page->mapping;
436 435
437 mutex_lock(&mapping->i_mmap_mutex); 436 mutex_lock(&mapping->i_mmap_mutex);
@@ -442,7 +441,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
442 if (!task_early_kill(tsk)) 441 if (!task_early_kill(tsk))
443 continue; 442 continue;
444 443
445 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, 444 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
446 pgoff) { 445 pgoff) {
447 /* 446 /*
448 * Send early kill signal to tasks where a vma covers 447 * Send early kill signal to tasks where a vma covers
diff --git a/mm/memory.c b/mm/memory.c
index e09c04813186..d205e4381a34 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2801,14 +2801,13 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
2801 zap_page_range_single(vma, start_addr, end_addr - start_addr, details); 2801 zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
2802} 2802}
2803 2803
2804static inline void unmap_mapping_range_tree(struct prio_tree_root *root, 2804static inline void unmap_mapping_range_tree(struct rb_root *root,
2805 struct zap_details *details) 2805 struct zap_details *details)
2806{ 2806{
2807 struct vm_area_struct *vma; 2807 struct vm_area_struct *vma;
2808 struct prio_tree_iter iter;
2809 pgoff_t vba, vea, zba, zea; 2808 pgoff_t vba, vea, zba, zea;
2810 2809
2811 vma_prio_tree_foreach(vma, &iter, root, 2810 vma_interval_tree_foreach(vma, root,
2812 details->first_index, details->last_index) { 2811 details->first_index, details->last_index) {
2813 2812
2814 vba = vma->vm_pgoff; 2813 vba = vma->vm_pgoff;
@@ -2839,7 +2838,7 @@ static inline void unmap_mapping_range_list(struct list_head *head,
2839 * across *all* the pages in each nonlinear VMA, not just the pages 2838 * across *all* the pages in each nonlinear VMA, not just the pages
2840 * whose virtual address lies outside the file truncation point. 2839 * whose virtual address lies outside the file truncation point.
2841 */ 2840 */
2842 list_for_each_entry(vma, head, shared.vm_set.list) { 2841 list_for_each_entry(vma, head, shared.nonlinear) {
2843 details->nonlinear_vma = vma; 2842 details->nonlinear_vma = vma;
2844 unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); 2843 unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
2845 } 2844 }
@@ -2883,7 +2882,7 @@ void unmap_mapping_range(struct address_space *mapping,
2883 2882
2884 2883
2885 mutex_lock(&mapping->i_mmap_mutex); 2884 mutex_lock(&mapping->i_mmap_mutex);
2886 if (unlikely(!prio_tree_empty(&mapping->i_mmap))) 2885 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
2887 unmap_mapping_range_tree(&mapping->i_mmap, &details); 2886 unmap_mapping_range_tree(&mapping->i_mmap, &details);
2888 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) 2887 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
2889 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); 2888 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
diff --git a/mm/mmap.c b/mm/mmap.c
index e3c365ff1b6a..5ac533f88e99 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -199,14 +199,14 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
199 199
200 flush_dcache_mmap_lock(mapping); 200 flush_dcache_mmap_lock(mapping);
201 if (unlikely(vma->vm_flags & VM_NONLINEAR)) 201 if (unlikely(vma->vm_flags & VM_NONLINEAR))
202 list_del_init(&vma->shared.vm_set.list); 202 list_del_init(&vma->shared.nonlinear);
203 else 203 else
204 vma_prio_tree_remove(vma, &mapping->i_mmap); 204 vma_interval_tree_remove(vma, &mapping->i_mmap);
205 flush_dcache_mmap_unlock(mapping); 205 flush_dcache_mmap_unlock(mapping);
206} 206}
207 207
208/* 208/*
209 * Unlink a file-based vm structure from its prio_tree, to hide 209 * Unlink a file-based vm structure from its interval tree, to hide
210 * vma from rmap and vmtruncate before freeing its page tables. 210 * vma from rmap and vmtruncate before freeing its page tables.
211 */ 211 */
212void unlink_file_vma(struct vm_area_struct *vma) 212void unlink_file_vma(struct vm_area_struct *vma)
@@ -411,7 +411,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
411 if (unlikely(vma->vm_flags & VM_NONLINEAR)) 411 if (unlikely(vma->vm_flags & VM_NONLINEAR))
412 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); 412 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
413 else 413 else
414 vma_prio_tree_insert(vma, &mapping->i_mmap); 414 vma_interval_tree_insert(vma, &mapping->i_mmap);
415 flush_dcache_mmap_unlock(mapping); 415 flush_dcache_mmap_unlock(mapping);
416 } 416 }
417} 417}
@@ -449,7 +449,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
449 449
450/* 450/*
451 * Helper for vma_adjust() in the split_vma insert case: insert a vma into the 451 * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
452 * mm's list and rbtree. It has already been inserted into the prio_tree. 452 * mm's list and rbtree. It has already been inserted into the interval tree.
453 */ 453 */
454static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) 454static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
455{ 455{
@@ -491,7 +491,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
491 struct vm_area_struct *next = vma->vm_next; 491 struct vm_area_struct *next = vma->vm_next;
492 struct vm_area_struct *importer = NULL; 492 struct vm_area_struct *importer = NULL;
493 struct address_space *mapping = NULL; 493 struct address_space *mapping = NULL;
494 struct prio_tree_root *root = NULL; 494 struct rb_root *root = NULL;
495 struct anon_vma *anon_vma = NULL; 495 struct anon_vma *anon_vma = NULL;
496 struct file *file = vma->vm_file; 496 struct file *file = vma->vm_file;
497 long adjust_next = 0; 497 long adjust_next = 0;
@@ -554,7 +554,7 @@ again: remove_next = 1 + (end > next->vm_end);
554 mutex_lock(&mapping->i_mmap_mutex); 554 mutex_lock(&mapping->i_mmap_mutex);
555 if (insert) { 555 if (insert) {
556 /* 556 /*
557 * Put into prio_tree now, so instantiated pages 557 * Put into interval tree now, so instantiated pages
558 * are visible to arm/parisc __flush_dcache_page 558 * are visible to arm/parisc __flush_dcache_page
559 * throughout; but we cannot insert into address 559 * throughout; but we cannot insert into address
560 * space until vma start or end is updated. 560 * space until vma start or end is updated.
@@ -582,9 +582,9 @@ again: remove_next = 1 + (end > next->vm_end);
582 582
583 if (root) { 583 if (root) {
584 flush_dcache_mmap_lock(mapping); 584 flush_dcache_mmap_lock(mapping);
585 vma_prio_tree_remove(vma, root); 585 vma_interval_tree_remove(vma, root);
586 if (adjust_next) 586 if (adjust_next)
587 vma_prio_tree_remove(next, root); 587 vma_interval_tree_remove(next, root);
588 } 588 }
589 589
590 vma->vm_start = start; 590 vma->vm_start = start;
@@ -597,8 +597,8 @@ again: remove_next = 1 + (end > next->vm_end);
597 597
598 if (root) { 598 if (root) {
599 if (adjust_next) 599 if (adjust_next)
600 vma_prio_tree_insert(next, root); 600 vma_interval_tree_insert(next, root);
601 vma_prio_tree_insert(vma, root); 601 vma_interval_tree_insert(vma, root);
602 flush_dcache_mmap_unlock(mapping); 602 flush_dcache_mmap_unlock(mapping);
603 } 603 }
604 604
diff --git a/mm/nommu.c b/mm/nommu.c
index 12e84e69dd06..45131b41bcdb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -698,7 +698,7 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
698 698
699 mutex_lock(&mapping->i_mmap_mutex); 699 mutex_lock(&mapping->i_mmap_mutex);
700 flush_dcache_mmap_lock(mapping); 700 flush_dcache_mmap_lock(mapping);
701 vma_prio_tree_insert(vma, &mapping->i_mmap); 701 vma_interval_tree_insert(vma, &mapping->i_mmap);
702 flush_dcache_mmap_unlock(mapping); 702 flush_dcache_mmap_unlock(mapping);
703 mutex_unlock(&mapping->i_mmap_mutex); 703 mutex_unlock(&mapping->i_mmap_mutex);
704 } 704 }
@@ -764,7 +764,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
764 764
765 mutex_lock(&mapping->i_mmap_mutex); 765 mutex_lock(&mapping->i_mmap_mutex);
766 flush_dcache_mmap_lock(mapping); 766 flush_dcache_mmap_lock(mapping);
767 vma_prio_tree_remove(vma, &mapping->i_mmap); 767 vma_interval_tree_remove(vma, &mapping->i_mmap);
768 flush_dcache_mmap_unlock(mapping); 768 flush_dcache_mmap_unlock(mapping);
769 mutex_unlock(&mapping->i_mmap_mutex); 769 mutex_unlock(&mapping->i_mmap_mutex);
770 } 770 }
@@ -2044,7 +2044,6 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
2044 size_t newsize) 2044 size_t newsize)
2045{ 2045{
2046 struct vm_area_struct *vma; 2046 struct vm_area_struct *vma;
2047 struct prio_tree_iter iter;
2048 struct vm_region *region; 2047 struct vm_region *region;
2049 pgoff_t low, high; 2048 pgoff_t low, high;
2050 size_t r_size, r_top; 2049 size_t r_size, r_top;
@@ -2056,8 +2055,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
2056 mutex_lock(&inode->i_mapping->i_mmap_mutex); 2055 mutex_lock(&inode->i_mapping->i_mmap_mutex);
2057 2056
2058 /* search for VMAs that fall within the dead zone */ 2057 /* search for VMAs that fall within the dead zone */
2059 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, 2058 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) {
2060 low, high) {
2061 /* found one - only interested if it's shared out of the page 2059 /* found one - only interested if it's shared out of the page
2062 * cache */ 2060 * cache */
2063 if (vma->vm_flags & VM_SHARED) { 2061 if (vma->vm_flags & VM_SHARED) {
@@ -2073,8 +2071,8 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
2073 * we don't check for any regions that start beyond the EOF as there 2071 * we don't check for any regions that start beyond the EOF as there
2074 * shouldn't be any 2072 * shouldn't be any
2075 */ 2073 */
2076 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, 2074 vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap,
2077 0, ULONG_MAX) { 2075 0, ULONG_MAX) {
2078 if (!(vma->vm_flags & VM_SHARED)) 2076 if (!(vma->vm_flags & VM_SHARED))
2079 continue; 2077 continue;
2080 2078
diff --git a/mm/prio_tree.c b/mm/prio_tree.c
deleted file mode 100644
index 799dcfd7cd8c..000000000000
--- a/mm/prio_tree.c
+++ /dev/null
@@ -1,208 +0,0 @@
1/*
2 * mm/prio_tree.c - priority search tree for mapping->i_mmap
3 *
4 * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
5 *
6 * This file is released under the GPL v2.
7 *
8 * Based on the radix priority search tree proposed by Edward M. McCreight
9 * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
10 *
11 * 02Feb2004 Initial version
12 */
13
14#include <linux/mm.h>
15#include <linux/prio_tree.h>
16#include <linux/prefetch.h>
17
18/*
19 * See lib/prio_tree.c for details on the general radix priority search tree
20 * code.
21 */
22
23/*
24 * The following #defines are mirrored from lib/prio_tree.c. They're only used
25 * for debugging, and should be removed (along with the debugging code using
26 * them) when switching also VMAs to the regular prio_tree code.
27 */
28
29#define RADIX_INDEX(vma) ((vma)->vm_pgoff)
30#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
31/* avoid overflow */
32#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
33
34/*
35 * Radix priority search tree for address_space->i_mmap
36 *
37 * For each vma that map a unique set of file pages i.e., unique [radix_index,
38 * heap_index] value, we have a corresponding priority search tree node. If
39 * multiple vmas have identical [radix_index, heap_index] value, then one of
40 * them is used as a tree node and others are stored in a vm_set list. The tree
41 * node points to the first vma (head) of the list using vm_set.head.
42 *
43 * prio_tree_root
44 * |
45 * A vm_set.head
46 * / \ /
47 * L R -> H-I-J-K-M-N-O-P-Q-S
48 * ^ ^ <-- vm_set.list -->
49 * tree nodes
50 *
51 * We need some way to identify whether a vma is a tree node, head of a vm_set
52 * list, or just a member of a vm_set list. We cannot use vm_flags to store
53 * such information. The reason is, in the above figure, it is possible that
54 * vm_flags' of R and H are covered by the different mmap_sems. When R is
55 * removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold
56 * H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now.
57 * That's why some trick involving shared.vm_set.parent is used for identifying
58 * tree nodes and list head nodes.
59 *
60 * vma radix priority search tree node rules:
61 *
62 * vma->shared.vm_set.parent != NULL ==> a tree node
63 * vma->shared.vm_set.head != NULL ==> list of others mapping same range
64 * vma->shared.vm_set.head == NULL ==> no others map the same range
65 *
66 * vma->shared.vm_set.parent == NULL
67 * vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range
68 * vma->shared.vm_set.head == NULL ==> a list node
69 */
70
71/*
72 * Add a new vma known to map the same set of pages as the old vma:
73 * useful for fork's dup_mmap as well as vma_prio_tree_insert below.
74 * Note that it just happens to work correctly on i_mmap_nonlinear too.
75 */
76void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old)
77{
78 /* Leave these BUG_ONs till prio_tree patch stabilizes */
79 BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old));
80 BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old));
81
82 vma->shared.vm_set.head = NULL;
83 vma->shared.vm_set.parent = NULL;
84
85 if (!old->shared.vm_set.parent)
86 list_add(&vma->shared.vm_set.list,
87 &old->shared.vm_set.list);
88 else if (old->shared.vm_set.head)
89 list_add_tail(&vma->shared.vm_set.list,
90 &old->shared.vm_set.head->shared.vm_set.list);
91 else {
92 INIT_LIST_HEAD(&vma->shared.vm_set.list);
93 vma->shared.vm_set.head = old;
94 old->shared.vm_set.head = vma;
95 }
96}
97
98void vma_prio_tree_insert(struct vm_area_struct *vma,
99 struct prio_tree_root *root)
100{
101 struct prio_tree_node *ptr;
102 struct vm_area_struct *old;
103
104 vma->shared.vm_set.head = NULL;
105
106 ptr = raw_prio_tree_insert(root, &vma->shared.prio_tree_node);
107 if (ptr != (struct prio_tree_node *) &vma->shared.prio_tree_node) {
108 old = prio_tree_entry(ptr, struct vm_area_struct,
109 shared.prio_tree_node);
110 vma_prio_tree_add(vma, old);
111 }
112}
113
114void vma_prio_tree_remove(struct vm_area_struct *vma,
115 struct prio_tree_root *root)
116{
117 struct vm_area_struct *node, *head, *new_head;
118
119 if (!vma->shared.vm_set.head) {
120 if (!vma->shared.vm_set.parent)
121 list_del_init(&vma->shared.vm_set.list);
122 else
123 raw_prio_tree_remove(root, &vma->shared.prio_tree_node);
124 } else {
125 /* Leave this BUG_ON till prio_tree patch stabilizes */
126 BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma);
127 if (vma->shared.vm_set.parent) {
128 head = vma->shared.vm_set.head;
129 if (!list_empty(&head->shared.vm_set.list)) {
130 new_head = list_entry(
131 head->shared.vm_set.list.next,
132 struct vm_area_struct,
133 shared.vm_set.list);
134 list_del_init(&head->shared.vm_set.list);
135 } else
136 new_head = NULL;
137
138 raw_prio_tree_replace(root, &vma->shared.prio_tree_node,
139 &head->shared.prio_tree_node);
140 head->shared.vm_set.head = new_head;
141 if (new_head)
142 new_head->shared.vm_set.head = head;
143
144 } else {
145 node = vma->shared.vm_set.head;
146 if (!list_empty(&vma->shared.vm_set.list)) {
147 new_head = list_entry(
148 vma->shared.vm_set.list.next,
149 struct vm_area_struct,
150 shared.vm_set.list);
151 list_del_init(&vma->shared.vm_set.list);
152 node->shared.vm_set.head = new_head;
153 new_head->shared.vm_set.head = node;
154 } else
155 node->shared.vm_set.head = NULL;
156 }
157 }
158}
159
160/*
161 * Helper function to enumerate vmas that map a given file page or a set of
162 * contiguous file pages. The function returns vmas that at least map a single
163 * page in the given range of contiguous file pages.
164 */
165struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
166 struct prio_tree_iter *iter)
167{
168 struct prio_tree_node *ptr;
169 struct vm_area_struct *next;
170
171 if (!vma) {
172 /*
173 * First call is with NULL vma
174 */
175 ptr = prio_tree_next(iter);
176 if (ptr) {
177 next = prio_tree_entry(ptr, struct vm_area_struct,
178 shared.prio_tree_node);
179 prefetch(next->shared.vm_set.head);
180 return next;
181 } else
182 return NULL;
183 }
184
185 if (vma->shared.vm_set.parent) {
186 if (vma->shared.vm_set.head) {
187 next = vma->shared.vm_set.head;
188 prefetch(next->shared.vm_set.list.next);
189 return next;
190 }
191 } else {
192 next = list_entry(vma->shared.vm_set.list.next,
193 struct vm_area_struct, shared.vm_set.list);
194 if (!next->shared.vm_set.head) {
195 prefetch(next->shared.vm_set.list.next);
196 return next;
197 }
198 }
199
200 ptr = prio_tree_next(iter);
201 if (ptr) {
202 next = prio_tree_entry(ptr, struct vm_area_struct,
203 shared.prio_tree_node);
204 prefetch(next->shared.vm_set.head);
205 return next;
206 } else
207 return NULL;
208}
diff --git a/mm/rmap.c b/mm/rmap.c
index 0f3b7cda2a24..7b5b51d25fc5 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -820,7 +820,6 @@ static int page_referenced_file(struct page *page,
820 struct address_space *mapping = page->mapping; 820 struct address_space *mapping = page->mapping;
821 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 821 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
822 struct vm_area_struct *vma; 822 struct vm_area_struct *vma;
823 struct prio_tree_iter iter;
824 int referenced = 0; 823 int referenced = 0;
825 824
826 /* 825 /*
@@ -846,7 +845,7 @@ static int page_referenced_file(struct page *page,
846 */ 845 */
847 mapcount = page_mapcount(page); 846 mapcount = page_mapcount(page);
848 847
849 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 848 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
850 unsigned long address = vma_address(page, vma); 849 unsigned long address = vma_address(page, vma);
851 if (address == -EFAULT) 850 if (address == -EFAULT)
852 continue; 851 continue;
@@ -945,13 +944,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
945{ 944{
946 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 945 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
947 struct vm_area_struct *vma; 946 struct vm_area_struct *vma;
948 struct prio_tree_iter iter;
949 int ret = 0; 947 int ret = 0;
950 948
951 BUG_ON(PageAnon(page)); 949 BUG_ON(PageAnon(page));
952 950
953 mutex_lock(&mapping->i_mmap_mutex); 951 mutex_lock(&mapping->i_mmap_mutex);
954 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 952 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
955 if (vma->vm_flags & VM_SHARED) { 953 if (vma->vm_flags & VM_SHARED) {
956 unsigned long address = vma_address(page, vma); 954 unsigned long address = vma_address(page, vma);
957 if (address == -EFAULT) 955 if (address == -EFAULT)
@@ -1547,7 +1545,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1547 struct address_space *mapping = page->mapping; 1545 struct address_space *mapping = page->mapping;
1548 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 1546 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1549 struct vm_area_struct *vma; 1547 struct vm_area_struct *vma;
1550 struct prio_tree_iter iter;
1551 int ret = SWAP_AGAIN; 1548 int ret = SWAP_AGAIN;
1552 unsigned long cursor; 1549 unsigned long cursor;
1553 unsigned long max_nl_cursor = 0; 1550 unsigned long max_nl_cursor = 0;
@@ -1555,7 +1552,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1555 unsigned int mapcount; 1552 unsigned int mapcount;
1556 1553
1557 mutex_lock(&mapping->i_mmap_mutex); 1554 mutex_lock(&mapping->i_mmap_mutex);
1558 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1555 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1559 unsigned long address = vma_address(page, vma); 1556 unsigned long address = vma_address(page, vma);
1560 if (address == -EFAULT) 1557 if (address == -EFAULT)
1561 continue; 1558 continue;
@@ -1576,7 +1573,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1576 goto out; 1573 goto out;
1577 1574
1578 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1575 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1579 shared.vm_set.list) { 1576 shared.nonlinear) {
1580 cursor = (unsigned long) vma->vm_private_data; 1577 cursor = (unsigned long) vma->vm_private_data;
1581 if (cursor > max_nl_cursor) 1578 if (cursor > max_nl_cursor)
1582 max_nl_cursor = cursor; 1579 max_nl_cursor = cursor;
@@ -1608,7 +1605,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1608 1605
1609 do { 1606 do {
1610 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1607 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1611 shared.vm_set.list) { 1608 shared.nonlinear) {
1612 cursor = (unsigned long) vma->vm_private_data; 1609 cursor = (unsigned long) vma->vm_private_data;
1613 while ( cursor < max_nl_cursor && 1610 while ( cursor < max_nl_cursor &&
1614 cursor < vma->vm_end - vma->vm_start) { 1611 cursor < vma->vm_end - vma->vm_start) {
@@ -1631,7 +1628,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1631 * in locked vmas). Reset cursor on all unreserved nonlinear 1628 * in locked vmas). Reset cursor on all unreserved nonlinear
1632 * vmas, now forgetting on which ones it had fallen behind. 1629 * vmas, now forgetting on which ones it had fallen behind.
1633 */ 1630 */
1634 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) 1631 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
1635 vma->vm_private_data = NULL; 1632 vma->vm_private_data = NULL;
1636out: 1633out:
1637 mutex_unlock(&mapping->i_mmap_mutex); 1634 mutex_unlock(&mapping->i_mmap_mutex);
@@ -1748,13 +1745,12 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1748 struct address_space *mapping = page->mapping; 1745 struct address_space *mapping = page->mapping;
1749 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 1746 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1750 struct vm_area_struct *vma; 1747 struct vm_area_struct *vma;
1751 struct prio_tree_iter iter;
1752 int ret = SWAP_AGAIN; 1748 int ret = SWAP_AGAIN;
1753 1749
1754 if (!mapping) 1750 if (!mapping)
1755 return ret; 1751 return ret;
1756 mutex_lock(&mapping->i_mmap_mutex); 1752 mutex_lock(&mapping->i_mmap_mutex);
1757 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1753 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1758 unsigned long address = vma_address(page, vma); 1754 unsigned long address = vma_address(page, vma);
1759 if (address == -EFAULT) 1755 if (address == -EFAULT)
1760 continue; 1756 continue;