aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-07-03 03:24:13 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-03 18:26:59 -0400
commit9614634fe6a138fd8ae044950700d2af8d203f97 (patch)
tree9b020c1d36d8625f4048c057058efb2e17c81973 /include
parentcb6358eb69d9854f65f2979c0ce9280eee041828 (diff)
[PATCH] ZVC/zone_reclaim: Leave 1% of unmapped pagecache pages for file I/O
It turns out that it is advantageous to leave a small portion of unmapped file backed pages if all of a zone's pages (or almost all pages) are allocated and so the page allocator has to go off-node. This allows recently used file I/O buffers to stay on the node and reduces the times that zone reclaim is invoked if file I/O occurs when we run out of memory in a zone. The problem is that zone reclaim runs too frequently when the page cache is used for file I/O (read write and therefore unmapped pages!) alone and we have almost all pages of the zone allocated. Zone reclaim may remove 32 unmapped pages. File I/O will use these pages for the next read/write requests and the unmapped pages increase. After the zone has filled up again zone reclaim will remove it again after only 32 pages. This cycle is too inefficient and there are potentially too many zone reclaim cycles. With the 1% boundary we may still remove all unmapped pages for file I/O in zone reclaim pass. However. it will take a large number of read and writes to get back to 1% again where we trigger zone reclaim again. The zone reclaim 2.6.16/17 does not show this behavior because we have a 30 second timeout. [akpm@osdl.org: rename the /proc file and the variable] Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/sysctl.h2
3 files changed, 8 insertions, 1 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 27e748eb72b0..656b588a9f96 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -150,6 +150,10 @@ struct zone {
150 unsigned long lowmem_reserve[MAX_NR_ZONES]; 150 unsigned long lowmem_reserve[MAX_NR_ZONES];
151 151
152#ifdef CONFIG_NUMA 152#ifdef CONFIG_NUMA
153 /*
154 * zone reclaim becomes active if more unmapped pages exist.
155 */
156 unsigned long min_unmapped_ratio;
153 struct per_cpu_pageset *pageset[NR_CPUS]; 157 struct per_cpu_pageset *pageset[NR_CPUS];
154#else 158#else
155 struct per_cpu_pageset pageset[NR_CPUS]; 159 struct per_cpu_pageset pageset[NR_CPUS];
@@ -414,6 +418,8 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
414 void __user *, size_t *, loff_t *); 418 void __user *, size_t *, loff_t *);
415int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *, 419int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
416 void __user *, size_t *, loff_t *); 420 void __user *, size_t *, loff_t *);
421int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
422 struct file *, void __user *, size_t *, loff_t *);
417 423
418#include <linux/topology.h> 424#include <linux/topology.h>
419/* Returns the number of the current Node. */ 425/* Returns the number of the current Node. */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index cf6ca6e377bd..5e59184c9096 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -189,6 +189,7 @@ extern long vm_total_pages;
189 189
190#ifdef CONFIG_NUMA 190#ifdef CONFIG_NUMA
191extern int zone_reclaim_mode; 191extern int zone_reclaim_mode;
192extern int sysctl_min_unmapped_ratio;
192extern int zone_reclaim(struct zone *, gfp_t, unsigned int); 193extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
193#else 194#else
194#define zone_reclaim_mode 0 195#define zone_reclaim_mode 0
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 46e4d8f2771f..e4b1a4d4dcf3 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -188,7 +188,7 @@ enum
188 VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */ 188 VM_DROP_PAGECACHE=29, /* int: nuke lots of pagecache */
189 VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */ 189 VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
190 VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */ 190 VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
191 VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */ 191 VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */
192 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ 192 VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
193 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ 193 VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
194}; 194};