aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichel Lespinasse <walken@google.com>2012-12-11 19:01:38 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-11 20:22:25 -0500
commitd37371870ceb1d2165397dc36114725b6dca946c (patch)
tree907bed40a8b2b3a66da1531fe8ff7183639a5cc3
parentfcc1f2d5dd3480214ab52e06d081d123019814ed (diff)
mm: augment vma rbtree with rb_subtree_gap
Define vma->rb_subtree_gap as the largest gap between any vma in the subtree rooted at that vma, and their predecessor. Or, for a recursive definition, vma->rb_subtree_gap is the max of: - vma->vm_start - vma->vm_prev->vm_end - rb_subtree_gap fields of the vmas pointed by vma->rb.rb_left and vma->rb.rb_right This will allow get_unmapped_area_* to find a free area of the right size in O(log(N)) time, instead of potentially having to do a linear walk across all the VMAs. Also define mm->highest_vm_end as the vm_end field of the highest vma, so that we can easily check if the following gap is suitable. This does have the potential to make unmapping VMAs more expensive, especially for processes with very large numbers of VMAs, where the VMA rbtree can grow quite deep. Signed-off-by: Michel Lespinasse <walken@google.com> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Russell King <linux@arm.linux.org.uk> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Paul Mundt <lethal@linux-sh.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mm_types.h9
-rw-r--r--mm/mmap.c143
2 files changed, 141 insertions, 11 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 31f8a3af7d94..94fa52b28ee8 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -238,6 +238,14 @@ struct vm_area_struct {
238 struct rb_node vm_rb; 238 struct rb_node vm_rb;
239 239
240 /* 240 /*
241 * Largest free memory gap in bytes to the left of this VMA.
242 * Either between this VMA and vma->vm_prev, or between one of the
243 * VMAs below us in the VMA rbtree and its ->vm_prev. This helps
244 * get_unmapped_area find a free area of the right size.
245 */
246 unsigned long rb_subtree_gap;
247
248 /*
241 * For areas with an address space and backing store, 249 * For areas with an address space and backing store,
242 * linkage into the address_space->i_mmap interval tree, or 250 * linkage into the address_space->i_mmap interval tree, or
243 * linkage of vma in the address_space->i_mmap_nonlinear list. 251 * linkage of vma in the address_space->i_mmap_nonlinear list.
@@ -322,6 +330,7 @@ struct mm_struct {
322 unsigned long task_size; /* size of task vm space */ 330 unsigned long task_size; /* size of task vm space */
323 unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ 331 unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */
324 unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ 332 unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */
333 unsigned long highest_vm_end; /* highest vma end address */
325 pgd_t * pgd; 334 pgd_t * pgd;
326 atomic_t mm_users; /* How many users with user space? */ 335 atomic_t mm_users; /* How many users with user space? */
327 atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ 336 atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
diff --git a/mm/mmap.c b/mm/mmap.c
index ebf19031c5e4..bdcea6310fff 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -31,6 +31,7 @@
31#include <linux/audit.h> 31#include <linux/audit.h>
32#include <linux/khugepaged.h> 32#include <linux/khugepaged.h>
33#include <linux/uprobes.h> 33#include <linux/uprobes.h>
34#include <linux/rbtree_augmented.h>
34 35
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36#include <asm/cacheflush.h> 37#include <asm/cacheflush.h>
@@ -297,6 +298,27 @@ out:
297 return retval; 298 return retval;
298} 299}
299 300
301static long vma_compute_subtree_gap(struct vm_area_struct *vma)
302{
303 unsigned long max, subtree_gap;
304 max = vma->vm_start;
305 if (vma->vm_prev)
306 max -= vma->vm_prev->vm_end;
307 if (vma->vm_rb.rb_left) {
308 subtree_gap = rb_entry(vma->vm_rb.rb_left,
309 struct vm_area_struct, vm_rb)->rb_subtree_gap;
310 if (subtree_gap > max)
311 max = subtree_gap;
312 }
313 if (vma->vm_rb.rb_right) {
314 subtree_gap = rb_entry(vma->vm_rb.rb_right,
315 struct vm_area_struct, vm_rb)->rb_subtree_gap;
316 if (subtree_gap > max)
317 max = subtree_gap;
318 }
319 return max;
320}
321
300#ifdef CONFIG_DEBUG_VM_RB 322#ifdef CONFIG_DEBUG_VM_RB
301static int browse_rb(struct rb_root *root) 323static int browse_rb(struct rb_root *root)
302{ 324{
@@ -327,6 +349,18 @@ static int browse_rb(struct rb_root *root)
327 return i; 349 return i;
328} 350}
329 351
352static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
353{
354 struct rb_node *nd;
355
356 for (nd = rb_first(root); nd; nd = rb_next(nd)) {
357 struct vm_area_struct *vma;
358 vma = rb_entry(nd, struct vm_area_struct, vm_rb);
359 BUG_ON(vma != ignore &&
360 vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
361 }
362}
363
330void validate_mm(struct mm_struct *mm) 364void validate_mm(struct mm_struct *mm)
331{ 365{
332 int bug = 0; 366 int bug = 0;
@@ -349,9 +383,52 @@ void validate_mm(struct mm_struct *mm)
349 BUG_ON(bug); 383 BUG_ON(bug);
350} 384}
351#else 385#else
386#define validate_mm_rb(root, ignore) do { } while (0)
352#define validate_mm(mm) do { } while (0) 387#define validate_mm(mm) do { } while (0)
353#endif 388#endif
354 389
390RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
391 unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
392
393/*
394 * Update augmented rbtree rb_subtree_gap values after vma->vm_start or
395 * vma->vm_prev->vm_end values changed, without modifying the vma's position
396 * in the rbtree.
397 */
398static void vma_gap_update(struct vm_area_struct *vma)
399{
400 /*
401 * As it turns out, RB_DECLARE_CALLBACKS() already created a callback
402 * function that does exacltly what we want.
403 */
404 vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
405}
406
407static inline void vma_rb_insert(struct vm_area_struct *vma,
408 struct rb_root *root)
409{
410 /* All rb_subtree_gap values must be consistent prior to insertion */
411 validate_mm_rb(root, NULL);
412
413 rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
414}
415
416static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
417{
418 /*
419 * All rb_subtree_gap values must be consistent prior to erase,
420 * with the possible exception of the vma being erased.
421 */
422 validate_mm_rb(root, vma);
423
424 /*
425 * Note rb_erase_augmented is a fairly large inline function,
426 * so make sure we instantiate it only once with our desired
427 * augmented rbtree callbacks.
428 */
429 rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
430}
431
355/* 432/*
356 * vma has some anon_vma assigned, and is already inserted on that 433 * vma has some anon_vma assigned, and is already inserted on that
357 * anon_vma's interval trees. 434 * anon_vma's interval trees.
@@ -421,8 +498,25 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr,
421void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, 498void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
422 struct rb_node **rb_link, struct rb_node *rb_parent) 499 struct rb_node **rb_link, struct rb_node *rb_parent)
423{ 500{
501 /* Update tracking information for the gap following the new vma. */
502 if (vma->vm_next)
503 vma_gap_update(vma->vm_next);
504 else
505 mm->highest_vm_end = vma->vm_end;
506
507 /*
508 * vma->vm_prev wasn't known when we followed the rbtree to find the
509 * correct insertion point for that vma. As a result, we could not
510 * update the vma vm_rb parents rb_subtree_gap values on the way down.
511 * So, we first insert the vma with a zero rb_subtree_gap value
512 * (to be consistent with what we did on the way down), and then
513 * immediately update the gap to the correct value. Finally we
514 * rebalance the rbtree after all augmented values have been set.
515 */
424 rb_link_node(&vma->vm_rb, rb_parent, rb_link); 516 rb_link_node(&vma->vm_rb, rb_parent, rb_link);
425 rb_insert_color(&vma->vm_rb, &mm->mm_rb); 517 vma->rb_subtree_gap = 0;
518 vma_gap_update(vma);
519 vma_rb_insert(vma, &mm->mm_rb);
426} 520}
427 521
428static void __vma_link_file(struct vm_area_struct *vma) 522static void __vma_link_file(struct vm_area_struct *vma)
@@ -498,12 +592,12 @@ static inline void
498__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, 592__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
499 struct vm_area_struct *prev) 593 struct vm_area_struct *prev)
500{ 594{
501 struct vm_area_struct *next = vma->vm_next; 595 struct vm_area_struct *next;
502 596
503 prev->vm_next = next; 597 vma_rb_erase(vma, &mm->mm_rb);
598 prev->vm_next = next = vma->vm_next;
504 if (next) 599 if (next)
505 next->vm_prev = prev; 600 next->vm_prev = prev;
506 rb_erase(&vma->vm_rb, &mm->mm_rb);
507 if (mm->mmap_cache == vma) 601 if (mm->mmap_cache == vma)
508 mm->mmap_cache = prev; 602 mm->mmap_cache = prev;
509} 603}
@@ -525,6 +619,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
525 struct rb_root *root = NULL; 619 struct rb_root *root = NULL;
526 struct anon_vma *anon_vma = NULL; 620 struct anon_vma *anon_vma = NULL;
527 struct file *file = vma->vm_file; 621 struct file *file = vma->vm_file;
622 bool start_changed = false, end_changed = false;
528 long adjust_next = 0; 623 long adjust_next = 0;
529 int remove_next = 0; 624 int remove_next = 0;
530 625
@@ -615,8 +710,14 @@ again: remove_next = 1 + (end > next->vm_end);
615 vma_interval_tree_remove(next, root); 710 vma_interval_tree_remove(next, root);
616 } 711 }
617 712
618 vma->vm_start = start; 713 if (start != vma->vm_start) {
619 vma->vm_end = end; 714 vma->vm_start = start;
715 start_changed = true;
716 }
717 if (end != vma->vm_end) {
718 vma->vm_end = end;
719 end_changed = true;
720 }
620 vma->vm_pgoff = pgoff; 721 vma->vm_pgoff = pgoff;
621 if (adjust_next) { 722 if (adjust_next) {
622 next->vm_start += adjust_next << PAGE_SHIFT; 723 next->vm_start += adjust_next << PAGE_SHIFT;
@@ -645,6 +746,15 @@ again: remove_next = 1 + (end > next->vm_end);
645 * (it may either follow vma or precede it). 746 * (it may either follow vma or precede it).
646 */ 747 */
647 __insert_vm_struct(mm, insert); 748 __insert_vm_struct(mm, insert);
749 } else {
750 if (start_changed)
751 vma_gap_update(vma);
752 if (end_changed) {
753 if (!next)
754 mm->highest_vm_end = end;
755 else if (!adjust_next)
756 vma_gap_update(next);
757 }
648 } 758 }
649 759
650 if (anon_vma) { 760 if (anon_vma) {
@@ -678,10 +788,13 @@ again: remove_next = 1 + (end > next->vm_end);
678 * we must remove another next too. It would clutter 788 * we must remove another next too. It would clutter
679 * up the code too much to do both in one go. 789 * up the code too much to do both in one go.
680 */ 790 */
681 if (remove_next == 2) { 791 next = vma->vm_next;
682 next = vma->vm_next; 792 if (remove_next == 2)
683 goto again; 793 goto again;
684 } 794 else if (next)
795 vma_gap_update(next);
796 else
797 mm->highest_vm_end = end;
685 } 798 }
686 if (insert && file) 799 if (insert && file)
687 uprobe_mmap(insert); 800 uprobe_mmap(insert);
@@ -1784,6 +1897,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1784 anon_vma_interval_tree_pre_update_vma(vma); 1897 anon_vma_interval_tree_pre_update_vma(vma);
1785 vma->vm_end = address; 1898 vma->vm_end = address;
1786 anon_vma_interval_tree_post_update_vma(vma); 1899 anon_vma_interval_tree_post_update_vma(vma);
1900 if (vma->vm_next)
1901 vma_gap_update(vma->vm_next);
1902 else
1903 vma->vm_mm->highest_vm_end = address;
1787 perf_event_mmap(vma); 1904 perf_event_mmap(vma);
1788 } 1905 }
1789 } 1906 }
@@ -1838,6 +1955,7 @@ int expand_downwards(struct vm_area_struct *vma,
1838 vma->vm_start = address; 1955 vma->vm_start = address;
1839 vma->vm_pgoff -= grow; 1956 vma->vm_pgoff -= grow;
1840 anon_vma_interval_tree_post_update_vma(vma); 1957 anon_vma_interval_tree_post_update_vma(vma);
1958 vma_gap_update(vma);
1841 perf_event_mmap(vma); 1959 perf_event_mmap(vma);
1842 } 1960 }
1843 } 1961 }
@@ -1960,14 +2078,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1960 insertion_point = (prev ? &prev->vm_next : &mm->mmap); 2078 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1961 vma->vm_prev = NULL; 2079 vma->vm_prev = NULL;
1962 do { 2080 do {
1963 rb_erase(&vma->vm_rb, &mm->mm_rb); 2081 vma_rb_erase(vma, &mm->mm_rb);
1964 mm->map_count--; 2082 mm->map_count--;
1965 tail_vma = vma; 2083 tail_vma = vma;
1966 vma = vma->vm_next; 2084 vma = vma->vm_next;
1967 } while (vma && vma->vm_start < end); 2085 } while (vma && vma->vm_start < end);
1968 *insertion_point = vma; 2086 *insertion_point = vma;
1969 if (vma) 2087 if (vma) {
1970 vma->vm_prev = prev; 2088 vma->vm_prev = prev;
2089 vma_gap_update(vma);
2090 } else
2091 mm->highest_vm_end = prev ? prev->vm_end : 0;
1971 tail_vma->vm_next = NULL; 2092 tail_vma->vm_next = NULL;
1972 if (mm->unmap_area == arch_unmap_area) 2093 if (mm->unmap_area == arch_unmap_area)
1973 addr = prev ? prev->vm_end : mm->mmap_base; 2094 addr = prev ? prev->vm_end : mm->mmap_base;