aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2010-05-24 17:32:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-25 11:06:59 -0400
commit5e7719058079a1423ccce56148b0aaa56b2df821 (patch)
tree3666cb3d5540dcaa3d8e7df8c293a0ad603a181c
parent56de7263fcf3eb10c8dcdf8d59a9cec831795f3f (diff)
mm: compaction: add a tunable that decides when memory should be compacted and when it should be reclaimed
The kernel applies some heuristics when deciding if memory should be compacted or reclaimed to satisfy a high-order allocation. One of these is based on the fragmentation. If the index is below 500, memory will not be compacted. This choice is arbitrary and not based on data. To help optimise the system and set a sensible default for this value, this patch adds a sysctl extfrag_threshold. The kernel will only compact memory if the fragmentation index is above the extfrag_threshold. [randy.dunlap@oracle.com: Fix build errors when proc fs is not configured] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/sysctl/vm.txt15
-rw-r--r--include/linux/compaction.h3
-rw-r--r--kernel/sysctl.c15
-rw-r--r--mm/compaction.c12
4 files changed, 44 insertions, 1 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 56dd29b97a91..5fdbb612aeb8 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -27,6 +27,7 @@ Currently, these files are in /proc/sys/vm:
27- dirty_ratio 27- dirty_ratio
28- dirty_writeback_centisecs 28- dirty_writeback_centisecs
29- drop_caches 29- drop_caches
30- extfrag_threshold
30- hugepages_treat_as_movable 31- hugepages_treat_as_movable
31- hugetlb_shm_group 32- hugetlb_shm_group
32- laptop_mode 33- laptop_mode
@@ -149,6 +150,20 @@ user should run `sync' first.
149 150
150============================================================== 151==============================================================
151 152
153extfrag_threshold
154
155This parameter affects whether the kernel will compact memory or direct
156reclaim to satisfy a high-order allocation. /proc/extfrag_index shows what
157the fragmentation index for each order is in each zone in the system. Values
158tending towards 0 imply allocations would fail due to lack of memory,
159values towards 1000 imply failures are due to fragmentation and -1 implies
160that the allocation will succeed as long as watermarks are met.
161
162The kernel will not compact memory in a zone if the
163fragmentation index is <= extfrag_threshold. The default value is 500.
164
165==============================================================
166
152hugepages_treat_as_movable 167hugepages_treat_as_movable
153 168
154This parameter is only useful when kernelcore= is specified at boot time to 169This parameter is only useful when kernelcore= is specified at boot time to
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index eed40ec4280b..3719325c6091 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -15,6 +15,9 @@
15extern int sysctl_compact_memory; 15extern int sysctl_compact_memory;
16extern int sysctl_compaction_handler(struct ctl_table *table, int write, 16extern int sysctl_compaction_handler(struct ctl_table *table, int write,
17 void __user *buffer, size_t *length, loff_t *ppos); 17 void __user *buffer, size_t *length, loff_t *ppos);
18extern int sysctl_extfrag_threshold;
19extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
20 void __user *buffer, size_t *length, loff_t *ppos);
18 21
19extern int fragmentation_index(struct zone *zone, unsigned int order); 22extern int fragmentation_index(struct zone *zone, unsigned int order);
20extern unsigned long try_to_compact_pages(struct zonelist *zonelist, 23extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 284f330d6a01..84ff5e75c084 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -263,6 +263,11 @@ static int min_sched_shares_ratelimit = 100000; /* 100 usec */
263static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */ 263static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
264#endif 264#endif
265 265
266#ifdef CONFIG_COMPACTION
267static int min_extfrag_threshold;
268static int max_extfrag_threshold = 1000;
269#endif
270
266static struct ctl_table kern_table[] = { 271static struct ctl_table kern_table[] = {
267 { 272 {
268 .procname = "sched_child_runs_first", 273 .procname = "sched_child_runs_first",
@@ -1130,6 +1135,16 @@ static struct ctl_table vm_table[] = {
1130 .mode = 0200, 1135 .mode = 0200,
1131 .proc_handler = sysctl_compaction_handler, 1136 .proc_handler = sysctl_compaction_handler,
1132 }, 1137 },
1138 {
1139 .procname = "extfrag_threshold",
1140 .data = &sysctl_extfrag_threshold,
1141 .maxlen = sizeof(int),
1142 .mode = 0644,
1143 .proc_handler = sysctl_extfrag_handler,
1144 .extra1 = &min_extfrag_threshold,
1145 .extra2 = &max_extfrag_threshold,
1146 },
1147
1133#endif /* CONFIG_COMPACTION */ 1148#endif /* CONFIG_COMPACTION */
1134 { 1149 {
1135 .procname = "min_free_kbytes", 1150 .procname = "min_free_kbytes",
diff --git a/mm/compaction.c b/mm/compaction.c
index 9583e193dc47..94cce51b0b35 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -433,6 +433,8 @@ static unsigned long compact_zone_order(struct zone *zone,
433 return compact_zone(zone, &cc); 433 return compact_zone(zone, &cc);
434} 434}
435 435
436int sysctl_extfrag_threshold = 500;
437
436/** 438/**
437 * try_to_compact_pages - Direct compact to satisfy a high-order allocation 439 * try_to_compact_pages - Direct compact to satisfy a high-order allocation
438 * @zonelist: The zonelist used for the current allocation 440 * @zonelist: The zonelist used for the current allocation
@@ -491,7 +493,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
491 * Only compact if a failure would be due to fragmentation. 493 * Only compact if a failure would be due to fragmentation.
492 */ 494 */
493 fragindex = fragmentation_index(zone, order); 495 fragindex = fragmentation_index(zone, order);
494 if (fragindex >= 0 && fragindex <= 500) 496 if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
495 continue; 497 continue;
496 498
497 if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) { 499 if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
@@ -572,6 +574,14 @@ int sysctl_compaction_handler(struct ctl_table *table, int write,
572 return 0; 574 return 0;
573} 575}
574 576
577int sysctl_extfrag_handler(struct ctl_table *table, int write,
578 void __user *buffer, size_t *length, loff_t *ppos)
579{
580 proc_dointvec_minmax(table, write, buffer, length, ppos);
581
582 return 0;
583}
584
575#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) 585#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
576ssize_t sysfs_compact_node(struct sys_device *dev, 586ssize_t sysfs_compact_node(struct sys_device *dev,
577 struct sysdev_attribute *attr, 587 struct sysdev_attribute *attr,