diff options
-rw-r--r-- | Documentation/nommu-mmap.txt | 15 | ||||
-rw-r--r-- | Documentation/sysctl/vm.txt | 18 | ||||
-rw-r--r-- | include/linux/mm_types.h | 1 | ||||
-rw-r--r-- | kernel/sysctl.c | 14 | ||||
-rw-r--r-- | mm/nommu.c | 65 |
5 files changed, 90 insertions, 23 deletions
diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt index 02b89dcf38ac..b565e8279d13 100644 --- a/Documentation/nommu-mmap.txt +++ b/Documentation/nommu-mmap.txt | |||
@@ -248,3 +248,18 @@ PROVIDING SHAREABLE BLOCK DEVICE SUPPORT | |||
248 | Provision of shared mappings on block device files is exactly the same as for | 248 | Provision of shared mappings on block device files is exactly the same as for |
249 | character devices. If there isn't a real device underneath, then the driver | 249 | character devices. If there isn't a real device underneath, then the driver |
250 | should allocate sufficient contiguous memory to honour any supported mapping. | 250 | should allocate sufficient contiguous memory to honour any supported mapping. |
251 | |||
252 | |||
253 | ================================= | ||
254 | ADJUSTING PAGE TRIMMING BEHAVIOUR | ||
255 | ================================= | ||
256 | |||
257 | NOMMU mmap automatically rounds up to the nearest power-of-2 number of pages | ||
258 | when performing an allocation. This can have adverse effects on memory | ||
259 | fragmentation, and as such, is left configurable. The default behaviour is to | ||
260 | aggressively trim allocations and discard any excess pages back in to the page | ||
261 | allocator. In order to retain finer-grained control over fragmentation, this | ||
262 | behaviour can either be disabled completely, or bumped up to a higher page | ||
263 | watermark where trimming begins. | ||
264 | |||
265 | Page trimming behaviour is configurable via the sysctl `vm.nr_trim_pages'. | ||
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index cd05994a49e6..a3415070bcac 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -38,6 +38,7 @@ Currently, these files are in /proc/sys/vm: | |||
38 | - numa_zonelist_order | 38 | - numa_zonelist_order |
39 | - nr_hugepages | 39 | - nr_hugepages |
40 | - nr_overcommit_hugepages | 40 | - nr_overcommit_hugepages |
41 | - nr_trim_pages (only if CONFIG_MMU=n) | ||
41 | 42 | ||
42 | ============================================================== | 43 | ============================================================== |
43 | 44 | ||
@@ -348,3 +349,20 @@ Change the maximum size of the hugepage pool. The maximum is | |||
348 | nr_hugepages + nr_overcommit_hugepages. | 349 | nr_hugepages + nr_overcommit_hugepages. |
349 | 350 | ||
350 | See Documentation/vm/hugetlbpage.txt | 351 | See Documentation/vm/hugetlbpage.txt |
352 | |||
353 | ============================================================== | ||
354 | |||
355 | nr_trim_pages | ||
356 | |||
357 | This is available only on NOMMU kernels. | ||
358 | |||
359 | This value adjusts the excess page trimming behaviour of power-of-2 aligned | ||
360 | NOMMU mmap allocations. | ||
361 | |||
362 | A value of 0 disables trimming of allocations entirely, while a value of 1 | ||
363 | trims excess pages aggressively. Any value >= 1 acts as the watermark where | ||
364 | trimming of allocations is initiated. | ||
365 | |||
366 | The default value is 1. | ||
367 | |||
368 | See Documentation/nommu-mmap.txt for more information. | ||
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1c1e0d3a1714..92915e81443f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -106,6 +106,7 @@ struct vm_region { | |||
106 | unsigned long vm_flags; /* VMA vm_flags */ | 106 | unsigned long vm_flags; /* VMA vm_flags */ |
107 | unsigned long vm_start; /* start address of region */ | 107 | unsigned long vm_start; /* start address of region */ |
108 | unsigned long vm_end; /* region initialised to here */ | 108 | unsigned long vm_end; /* region initialised to here */ |
109 | unsigned long vm_top; /* region allocated to here */ | ||
109 | unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ | 110 | unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */ |
110 | struct file *vm_file; /* the backing file or NULL */ | 111 | struct file *vm_file; /* the backing file or NULL */ |
111 | 112 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 92f6e5bc3c24..89d74436318c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -82,6 +82,9 @@ extern int percpu_pagelist_fraction; | |||
82 | extern int compat_log; | 82 | extern int compat_log; |
83 | extern int latencytop_enabled; | 83 | extern int latencytop_enabled; |
84 | extern int sysctl_nr_open_min, sysctl_nr_open_max; | 84 | extern int sysctl_nr_open_min, sysctl_nr_open_max; |
85 | #ifndef CONFIG_MMU | ||
86 | extern int sysctl_nr_trim_pages; | ||
87 | #endif | ||
85 | #ifdef CONFIG_RCU_TORTURE_TEST | 88 | #ifdef CONFIG_RCU_TORTURE_TEST |
86 | extern int rcutorture_runnable; | 89 | extern int rcutorture_runnable; |
87 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 90 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
@@ -1102,6 +1105,17 @@ static struct ctl_table vm_table[] = { | |||
1102 | .mode = 0644, | 1105 | .mode = 0644, |
1103 | .proc_handler = &proc_dointvec | 1106 | .proc_handler = &proc_dointvec |
1104 | }, | 1107 | }, |
1108 | #else | ||
1109 | { | ||
1110 | .ctl_name = CTL_UNNUMBERED, | ||
1111 | .procname = "nr_trim_pages", | ||
1112 | .data = &sysctl_nr_trim_pages, | ||
1113 | .maxlen = sizeof(sysctl_nr_trim_pages), | ||
1114 | .mode = 0644, | ||
1115 | .proc_handler = &proc_dointvec_minmax, | ||
1116 | .strategy = &sysctl_intvec, | ||
1117 | .extra1 = &zero, | ||
1118 | }, | ||
1105 | #endif | 1119 | #endif |
1106 | { | 1120 | { |
1107 | .ctl_name = VM_LAPTOP_MODE, | 1121 | .ctl_name = VM_LAPTOP_MODE, |
diff --git a/mm/nommu.c b/mm/nommu.c index 0d363dfcf10e..a6e8ccfbd400 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> | 10 | * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> |
11 | * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> | 11 | * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> |
12 | * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> | 12 | * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> |
13 | * Copyright (c) 2007 Paul Mundt <lethal@linux-sh.org> | 13 | * Copyright (c) 2007-2008 Paul Mundt <lethal@linux-sh.org> |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
@@ -66,6 +66,7 @@ atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0); | |||
66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 66 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 67 | int sysctl_overcommit_ratio = 50; /* default is 50% */ |
68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; | 68 | int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; |
69 | int sysctl_nr_trim_pages = 1; /* page trimming behaviour */ | ||
69 | int heap_stack_gap = 0; | 70 | int heap_stack_gap = 0; |
70 | 71 | ||
71 | atomic_t mmap_pages_allocated; | 72 | atomic_t mmap_pages_allocated; |
@@ -455,6 +456,8 @@ static noinline void validate_nommu_regions(void) | |||
455 | last = rb_entry(lastp, struct vm_region, vm_rb); | 456 | last = rb_entry(lastp, struct vm_region, vm_rb); |
456 | if (unlikely(last->vm_end <= last->vm_start)) | 457 | if (unlikely(last->vm_end <= last->vm_start)) |
457 | BUG(); | 458 | BUG(); |
459 | if (unlikely(last->vm_top < last->vm_end)) | ||
460 | BUG(); | ||
458 | 461 | ||
459 | while ((p = rb_next(lastp))) { | 462 | while ((p = rb_next(lastp))) { |
460 | region = rb_entry(p, struct vm_region, vm_rb); | 463 | region = rb_entry(p, struct vm_region, vm_rb); |
@@ -462,7 +465,9 @@ static noinline void validate_nommu_regions(void) | |||
462 | 465 | ||
463 | if (unlikely(region->vm_end <= region->vm_start)) | 466 | if (unlikely(region->vm_end <= region->vm_start)) |
464 | BUG(); | 467 | BUG(); |
465 | if (unlikely(region->vm_start < last->vm_end)) | 468 | if (unlikely(region->vm_top < region->vm_end)) |
469 | BUG(); | ||
470 | if (unlikely(region->vm_start < last->vm_top)) | ||
466 | BUG(); | 471 | BUG(); |
467 | 472 | ||
468 | lastp = p; | 473 | lastp = p; |
@@ -536,7 +541,7 @@ static void free_page_series(unsigned long from, unsigned long to) | |||
536 | /* | 541 | /* |
537 | * release a reference to a region | 542 | * release a reference to a region |
538 | * - the caller must hold the region semaphore, which this releases | 543 | * - the caller must hold the region semaphore, which this releases |
539 | * - the region may not have been added to the tree yet, in which case vm_end | 544 | * - the region may not have been added to the tree yet, in which case vm_top |
540 | * will equal vm_start | 545 | * will equal vm_start |
541 | */ | 546 | */ |
542 | static void __put_nommu_region(struct vm_region *region) | 547 | static void __put_nommu_region(struct vm_region *region) |
@@ -547,7 +552,7 @@ static void __put_nommu_region(struct vm_region *region) | |||
547 | BUG_ON(!nommu_region_tree.rb_node); | 552 | BUG_ON(!nommu_region_tree.rb_node); |
548 | 553 | ||
549 | if (atomic_dec_and_test(®ion->vm_usage)) { | 554 | if (atomic_dec_and_test(®ion->vm_usage)) { |
550 | if (region->vm_end > region->vm_start) | 555 | if (region->vm_top > region->vm_start) |
551 | delete_nommu_region(region); | 556 | delete_nommu_region(region); |
552 | up_write(&nommu_region_sem); | 557 | up_write(&nommu_region_sem); |
553 | 558 | ||
@@ -558,7 +563,7 @@ static void __put_nommu_region(struct vm_region *region) | |||
558 | * from ramfs/tmpfs mustn't be released here */ | 563 | * from ramfs/tmpfs mustn't be released here */ |
559 | if (region->vm_flags & VM_MAPPED_COPY) { | 564 | if (region->vm_flags & VM_MAPPED_COPY) { |
560 | kdebug("free series"); | 565 | kdebug("free series"); |
561 | free_page_series(region->vm_start, region->vm_end); | 566 | free_page_series(region->vm_start, region->vm_top); |
562 | } | 567 | } |
563 | kmem_cache_free(vm_region_jar, region); | 568 | kmem_cache_free(vm_region_jar, region); |
564 | } else { | 569 | } else { |
@@ -999,6 +1004,10 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) | |||
999 | int ret; | 1004 | int ret; |
1000 | 1005 | ||
1001 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); | 1006 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); |
1007 | if (ret == 0) { | ||
1008 | vma->vm_region->vm_top = vma->vm_region->vm_end; | ||
1009 | return ret; | ||
1010 | } | ||
1002 | if (ret != -ENOSYS) | 1011 | if (ret != -ENOSYS) |
1003 | return ret; | 1012 | return ret; |
1004 | 1013 | ||
@@ -1027,11 +1036,14 @@ static int do_mmap_private(struct vm_area_struct *vma, | |||
1027 | */ | 1036 | */ |
1028 | if (vma->vm_file) { | 1037 | if (vma->vm_file) { |
1029 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); | 1038 | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); |
1030 | if (ret != -ENOSYS) { | 1039 | if (ret == 0) { |
1031 | /* shouldn't return success if we're not sharing */ | 1040 | /* shouldn't return success if we're not sharing */ |
1032 | BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE)); | 1041 | BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); |
1033 | return ret; /* success or a real error */ | 1042 | vma->vm_region->vm_top = vma->vm_region->vm_end; |
1043 | return ret; | ||
1034 | } | 1044 | } |
1045 | if (ret != -ENOSYS) | ||
1046 | return ret; | ||
1035 | 1047 | ||
1036 | /* getting an ENOSYS error indicates that direct mmap isn't | 1048 | /* getting an ENOSYS error indicates that direct mmap isn't |
1037 | * possible (as opposed to tried but failed) so we'll try to | 1049 | * possible (as opposed to tried but failed) so we'll try to |
@@ -1051,23 +1063,25 @@ static int do_mmap_private(struct vm_area_struct *vma, | |||
1051 | if (!pages) | 1063 | if (!pages) |
1052 | goto enomem; | 1064 | goto enomem; |
1053 | 1065 | ||
1054 | /* we allocated a power-of-2 sized page set, so we need to trim off the | ||
1055 | * excess */ | ||
1056 | total = 1 << order; | 1066 | total = 1 << order; |
1057 | atomic_add(total, &mmap_pages_allocated); | 1067 | atomic_add(total, &mmap_pages_allocated); |
1058 | 1068 | ||
1059 | point = rlen >> PAGE_SHIFT; | 1069 | point = rlen >> PAGE_SHIFT; |
1060 | while (total > point) { | 1070 | |
1061 | order = ilog2(total - point); | 1071 | /* we allocated a power-of-2 sized page set, so we may want to trim off |
1062 | n = 1 << order; | 1072 | * the excess */ |
1063 | kdebug("shave %lu/%lu @%lu", n, total - point, total); | 1073 | if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) { |
1064 | atomic_sub(n, &mmap_pages_allocated); | 1074 | while (total > point) { |
1065 | total -= n; | 1075 | order = ilog2(total - point); |
1066 | set_page_refcounted(pages + total); | 1076 | n = 1 << order; |
1067 | __free_pages(pages + total, order); | 1077 | kdebug("shave %lu/%lu @%lu", n, total - point, total); |
1078 | atomic_sub(n, &mmap_pages_allocated); | ||
1079 | total -= n; | ||
1080 | set_page_refcounted(pages + total); | ||
1081 | __free_pages(pages + total, order); | ||
1082 | } | ||
1068 | } | 1083 | } |
1069 | 1084 | ||
1070 | total = rlen >> PAGE_SHIFT; | ||
1071 | for (point = 1; point < total; point++) | 1085 | for (point = 1; point < total; point++) |
1072 | set_page_refcounted(&pages[point]); | 1086 | set_page_refcounted(&pages[point]); |
1073 | 1087 | ||
@@ -1075,6 +1089,7 @@ static int do_mmap_private(struct vm_area_struct *vma, | |||
1075 | region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; | 1089 | region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY; |
1076 | region->vm_start = (unsigned long) base; | 1090 | region->vm_start = (unsigned long) base; |
1077 | region->vm_end = region->vm_start + rlen; | 1091 | region->vm_end = region->vm_start + rlen; |
1092 | region->vm_top = region->vm_start + (total << PAGE_SHIFT); | ||
1078 | 1093 | ||
1079 | vma->vm_start = region->vm_start; | 1094 | vma->vm_start = region->vm_start; |
1080 | vma->vm_end = region->vm_start + len; | 1095 | vma->vm_end = region->vm_start + len; |
@@ -1110,6 +1125,7 @@ error_free: | |||
1110 | free_page_series(region->vm_start, region->vm_end); | 1125 | free_page_series(region->vm_start, region->vm_end); |
1111 | region->vm_start = vma->vm_start = 0; | 1126 | region->vm_start = vma->vm_start = 0; |
1112 | region->vm_end = vma->vm_end = 0; | 1127 | region->vm_end = vma->vm_end = 0; |
1128 | region->vm_top = 0; | ||
1113 | return ret; | 1129 | return ret; |
1114 | 1130 | ||
1115 | enomem: | 1131 | enomem: |
@@ -1401,7 +1417,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1401 | npages = (addr - vma->vm_start) >> PAGE_SHIFT; | 1417 | npages = (addr - vma->vm_start) >> PAGE_SHIFT; |
1402 | 1418 | ||
1403 | if (new_below) { | 1419 | if (new_below) { |
1404 | region->vm_end = new->vm_end = addr; | 1420 | region->vm_top = region->vm_end = new->vm_end = addr; |
1405 | } else { | 1421 | } else { |
1406 | region->vm_start = new->vm_start = addr; | 1422 | region->vm_start = new->vm_start = addr; |
1407 | region->vm_pgoff = new->vm_pgoff += npages; | 1423 | region->vm_pgoff = new->vm_pgoff += npages; |
@@ -1418,6 +1434,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1418 | vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; | 1434 | vma->vm_region->vm_pgoff = vma->vm_pgoff += npages; |
1419 | } else { | 1435 | } else { |
1420 | vma->vm_region->vm_end = vma->vm_end = addr; | 1436 | vma->vm_region->vm_end = vma->vm_end = addr; |
1437 | vma->vm_region->vm_top = addr; | ||
1421 | } | 1438 | } |
1422 | add_nommu_region(vma->vm_region); | 1439 | add_nommu_region(vma->vm_region); |
1423 | add_nommu_region(new->vm_region); | 1440 | add_nommu_region(new->vm_region); |
@@ -1454,10 +1471,12 @@ static int shrink_vma(struct mm_struct *mm, | |||
1454 | 1471 | ||
1455 | down_write(&nommu_region_sem); | 1472 | down_write(&nommu_region_sem); |
1456 | delete_nommu_region(region); | 1473 | delete_nommu_region(region); |
1457 | if (from > region->vm_start) | 1474 | if (from > region->vm_start) { |
1458 | region->vm_end = from; | 1475 | to = region->vm_top; |
1459 | else | 1476 | region->vm_top = region->vm_end = from; |
1477 | } else { | ||
1460 | region->vm_start = to; | 1478 | region->vm_start = to; |
1479 | } | ||
1461 | add_nommu_region(region); | 1480 | add_nommu_region(region); |
1462 | up_write(&nommu_region_sem); | 1481 | up_write(&nommu_region_sem); |
1463 | 1482 | ||