aboutsummaryrefslogtreecommitdiffstats
path: root/mm/nommu.c
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2009-01-08 07:04:47 -0500
committerDavid Howells <dhowells@redhat.com>2009-01-08 07:04:47 -0500
commitdd8632a12e500a684478fea0951f380478d56fed (patch)
tree1a12f441f9de14fd233faa92cf13a5fbb0319f41 /mm/nommu.c
parent8feae13110d60cc6287afabc2887366b0eb226c2 (diff)
NOMMU: Make mmap allocation page trimming behaviour configurable.
NOMMU mmap allocates a piece of memory for an mmap that's rounded up in size to the nearest power-of-2 number of pages. Currently it then discards the excess pages back to the page allocator, making that memory available for use by other things. This can, however, cause greater amount of fragmentation. To counter this, a sysctl is added in order to fine-tune the trimming behaviour. The default behaviour remains to trim pages aggressively, while this can either be disabled completely or set to a higher page-granular watermark in order to have finer-grained control. vm region vm_top bits taken from an earlier patch by David Howells. Signed-off-by: Paul Mundt <lethal@linux-sh.org> Signed-off-by: David Howells <dhowells@redhat.com> Tested-by: Mike Frysinger <vapier.adi@gmail.com>
Diffstat (limited to 'mm/nommu.c')
-rw-r--r--mm/nommu.c65
1 files changed, 42 insertions, 23 deletions
diff --git a/mm/nommu.c b/mm/nommu.c
index 0d363dfcf10e..a6e8ccfbd400 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -10,7 +10,7 @@
10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
13 * Copyright (c) 2007 Paul Mundt <lethal@linux-sh.org> 13 * Copyright (c) 2007-2008 Paul Mundt <lethal@linux-sh.org>
14 */ 14 */
15 15
16#include <linux/module.h> 16#include <linux/module.h>
@@ -66,6 +66,7 @@ atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
66int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ 66int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
67int sysctl_overcommit_ratio = 50; /* default is 50% */ 67int sysctl_overcommit_ratio = 50; /* default is 50% */
68int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 68int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
69int sysctl_nr_trim_pages = 1; /* page trimming behaviour */
69int heap_stack_gap = 0; 70int heap_stack_gap = 0;
70 71
71atomic_t mmap_pages_allocated; 72atomic_t mmap_pages_allocated;
@@ -455,6 +456,8 @@ static noinline void validate_nommu_regions(void)
455 last = rb_entry(lastp, struct vm_region, vm_rb); 456 last = rb_entry(lastp, struct vm_region, vm_rb);
456 if (unlikely(last->vm_end <= last->vm_start)) 457 if (unlikely(last->vm_end <= last->vm_start))
457 BUG(); 458 BUG();
459 if (unlikely(last->vm_top < last->vm_end))
460 BUG();
458 461
459 while ((p = rb_next(lastp))) { 462 while ((p = rb_next(lastp))) {
460 region = rb_entry(p, struct vm_region, vm_rb); 463 region = rb_entry(p, struct vm_region, vm_rb);
@@ -462,7 +465,9 @@ static noinline void validate_nommu_regions(void)
462 465
463 if (unlikely(region->vm_end <= region->vm_start)) 466 if (unlikely(region->vm_end <= region->vm_start))
464 BUG(); 467 BUG();
465 if (unlikely(region->vm_start < last->vm_end)) 468 if (unlikely(region->vm_top < region->vm_end))
469 BUG();
470 if (unlikely(region->vm_start < last->vm_top))
466 BUG(); 471 BUG();
467 472
468 lastp = p; 473 lastp = p;
@@ -536,7 +541,7 @@ static void free_page_series(unsigned long from, unsigned long to)
536/* 541/*
537 * release a reference to a region 542 * release a reference to a region
538 * - the caller must hold the region semaphore, which this releases 543 * - the caller must hold the region semaphore, which this releases
539 * - the region may not have been added to the tree yet, in which case vm_end 544 * - the region may not have been added to the tree yet, in which case vm_top
540 * will equal vm_start 545 * will equal vm_start
541 */ 546 */
542static void __put_nommu_region(struct vm_region *region) 547static void __put_nommu_region(struct vm_region *region)
@@ -547,7 +552,7 @@ static void __put_nommu_region(struct vm_region *region)
547 BUG_ON(!nommu_region_tree.rb_node); 552 BUG_ON(!nommu_region_tree.rb_node);
548 553
549 if (atomic_dec_and_test(&region->vm_usage)) { 554 if (atomic_dec_and_test(&region->vm_usage)) {
550 if (region->vm_end > region->vm_start) 555 if (region->vm_top > region->vm_start)
551 delete_nommu_region(region); 556 delete_nommu_region(region);
552 up_write(&nommu_region_sem); 557 up_write(&nommu_region_sem);
553 558
@@ -558,7 +563,7 @@ static void __put_nommu_region(struct vm_region *region)
558 * from ramfs/tmpfs mustn't be released here */ 563 * from ramfs/tmpfs mustn't be released here */
559 if (region->vm_flags & VM_MAPPED_COPY) { 564 if (region->vm_flags & VM_MAPPED_COPY) {
560 kdebug("free series"); 565 kdebug("free series");
561 free_page_series(region->vm_start, region->vm_end); 566 free_page_series(region->vm_start, region->vm_top);
562 } 567 }
563 kmem_cache_free(vm_region_jar, region); 568 kmem_cache_free(vm_region_jar, region);
564 } else { 569 } else {
@@ -999,6 +1004,10 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
999 int ret; 1004 int ret;
1000 1005
1001 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 1006 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
1007 if (ret == 0) {
1008 vma->vm_region->vm_top = vma->vm_region->vm_end;
1009 return ret;
1010 }
1002 if (ret != -ENOSYS) 1011 if (ret != -ENOSYS)
1003 return ret; 1012 return ret;
1004 1013
@@ -1027,11 +1036,14 @@ static int do_mmap_private(struct vm_area_struct *vma,
1027 */ 1036 */
1028 if (vma->vm_file) { 1037 if (vma->vm_file) {
1029 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); 1038 ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
1030 if (ret != -ENOSYS) { 1039 if (ret == 0) {
1031 /* shouldn't return success if we're not sharing */ 1040 /* shouldn't return success if we're not sharing */
1032 BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE)); 1041 BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
1033 return ret; /* success or a real error */ 1042 vma->vm_region->vm_top = vma->vm_region->vm_end;
1043 return ret;
1034 } 1044 }
1045 if (ret != -ENOSYS)
1046 return ret;
1035 1047
1036 /* getting an ENOSYS error indicates that direct mmap isn't 1048 /* getting an ENOSYS error indicates that direct mmap isn't
1037 * possible (as opposed to tried but failed) so we'll try to 1049 * possible (as opposed to tried but failed) so we'll try to
@@ -1051,23 +1063,25 @@ static int do_mmap_private(struct vm_area_struct *vma,
1051 if (!pages) 1063 if (!pages)
1052 goto enomem; 1064 goto enomem;
1053 1065
1054 /* we allocated a power-of-2 sized page set, so we need to trim off the
1055 * excess */
1056 total = 1 << order; 1066 total = 1 << order;
1057 atomic_add(total, &mmap_pages_allocated); 1067 atomic_add(total, &mmap_pages_allocated);
1058 1068
1059 point = rlen >> PAGE_SHIFT; 1069 point = rlen >> PAGE_SHIFT;
1060 while (total > point) { 1070
1061 order = ilog2(total - point); 1071 /* we allocated a power-of-2 sized page set, so we may want to trim off
1062 n = 1 << order; 1072 * the excess */
1063 kdebug("shave %lu/%lu @%lu", n, total - point, total); 1073 if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) {
1064 atomic_sub(n, &mmap_pages_allocated); 1074 while (total > point) {
1065 total -= n; 1075 order = ilog2(total - point);
1066 set_page_refcounted(pages + total); 1076 n = 1 << order;
1067 __free_pages(pages + total, order); 1077 kdebug("shave %lu/%lu @%lu", n, total - point, total);
1078 atomic_sub(n, &mmap_pages_allocated);
1079 total -= n;
1080 set_page_refcounted(pages + total);
1081 __free_pages(pages + total, order);
1082 }
1068 } 1083 }
1069 1084
1070 total = rlen >> PAGE_SHIFT;
1071 for (point = 1; point < total; point++) 1085 for (point = 1; point < total; point++)
1072 set_page_refcounted(&pages[point]); 1086 set_page_refcounted(&pages[point]);
1073 1087
@@ -1075,6 +1089,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
1075 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; 1089 region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
1076 region->vm_start = (unsigned long) base; 1090 region->vm_start = (unsigned long) base;
1077 region->vm_end = region->vm_start + rlen; 1091 region->vm_end = region->vm_start + rlen;
1092 region->vm_top = region->vm_start + (total << PAGE_SHIFT);
1078 1093
1079 vma->vm_start = region->vm_start; 1094 vma->vm_start = region->vm_start;
1080 vma->vm_end = region->vm_start + len; 1095 vma->vm_end = region->vm_start + len;
@@ -1110,6 +1125,7 @@ error_free:
1110 free_page_series(region->vm_start, region->vm_end); 1125 free_page_series(region->vm_start, region->vm_end);
1111 region->vm_start = vma->vm_start = 0; 1126 region->vm_start = vma->vm_start = 0;
1112 region->vm_end = vma->vm_end = 0; 1127 region->vm_end = vma->vm_end = 0;
1128 region->vm_top = 0;
1113 return ret; 1129 return ret;
1114 1130
1115enomem: 1131enomem:
@@ -1401,7 +1417,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
1401 npages = (addr - vma->vm_start) >> PAGE_SHIFT; 1417 npages = (addr - vma->vm_start) >> PAGE_SHIFT;
1402 1418
1403 if (new_below) { 1419 if (new_below) {
1404 region->vm_end = new->vm_end = addr; 1420 region->vm_top = region->vm_end = new->vm_end = addr;
1405 } else { 1421 } else {
1406 region->vm_start = new->vm_start = addr; 1422 region->vm_start = new->vm_start = addr;
1407 region->vm_pgoff = new->vm_pgoff += npages; 1423 region->vm_pgoff = new->vm_pgoff += npages;
@@ -1418,6 +1434,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
1418 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; 1434 vma->vm_region->vm_pgoff = vma->vm_pgoff += npages;
1419 } else { 1435 } else {
1420 vma->vm_region->vm_end = vma->vm_end = addr; 1436 vma->vm_region->vm_end = vma->vm_end = addr;
1437 vma->vm_region->vm_top = addr;
1421 } 1438 }
1422 add_nommu_region(vma->vm_region); 1439 add_nommu_region(vma->vm_region);
1423 add_nommu_region(new->vm_region); 1440 add_nommu_region(new->vm_region);
@@ -1454,10 +1471,12 @@ static int shrink_vma(struct mm_struct *mm,
1454 1471
1455 down_write(&nommu_region_sem); 1472 down_write(&nommu_region_sem);
1456 delete_nommu_region(region); 1473 delete_nommu_region(region);
1457 if (from > region->vm_start) 1474 if (from > region->vm_start) {
1458 region->vm_end = from; 1475 to = region->vm_top;
1459 else 1476 region->vm_top = region->vm_end = from;
1477 } else {
1460 region->vm_start = to; 1478 region->vm_start = to;
1479 }
1461 add_nommu_region(region); 1480 add_nommu_region(region);
1462 up_write(&nommu_region_sem); 1481 up_write(&nommu_region_sem);
1463 1482