aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-07-03 03:24:13 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-03 18:26:59 -0400
commit9614634fe6a138fd8ae044950700d2af8d203f97 (patch)
tree9b020c1d36d8625f4048c057058efb2e17c81973 /mm/page_alloc.c
parentcb6358eb69d9854f65f2979c0ce9280eee041828 (diff)
[PATCH] ZVC/zone_reclaim: Leave 1% of unmapped pagecache pages for file I/O
It turns out that it is advantageous to leave a small portion of unmapped file backed pages if all of a zone's pages (or almost all pages) are allocated and so the page allocator has to go off-node. This allows recently used file I/O buffers to stay on the node and reduces the times that zone reclaim is invoked if file I/O occurs when we run out of memory in a zone. The problem is that zone reclaim runs too frequently when the page cache is used for file I/O (read write and therefore unmapped pages!) alone and we have almost all pages of the zone allocated. Zone reclaim may remove 32 unmapped pages. File I/O will use these pages for the next read/write requests and the unmapped pages increase. After the zone has filled up again zone reclaim will remove it again after only 32 pages. This cycle is too inefficient and there are potentially too many zone reclaim cycles. With the 1% boundary we may still remove all unmapped pages for file I/O in zone reclaim pass. However. it will take a large number of read and writes to get back to 1% again where we trigger zone reclaim again. The zone reclaim 2.6.16/17 does not show this behavior because we have a 30 second timeout. [akpm@osdl.org: rename the /proc file and the variable] Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3e792a583f3b..54a4f5375bba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2005,6 +2005,10 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2005 2005
2006 zone->spanned_pages = size; 2006 zone->spanned_pages = size;
2007 zone->present_pages = realsize; 2007 zone->present_pages = realsize;
2008#ifdef CONFIG_NUMA
2009 zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
2010 / 100;
2011#endif
2008 zone->name = zone_names[j]; 2012 zone->name = zone_names[j];
2009 spin_lock_init(&zone->lock); 2013 spin_lock_init(&zone->lock);
2010 spin_lock_init(&zone->lru_lock); 2014 spin_lock_init(&zone->lru_lock);
@@ -2298,6 +2302,24 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
2298 return 0; 2302 return 0;
2299} 2303}
2300 2304
2305#ifdef CONFIG_NUMA
2306int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
2307 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2308{
2309 struct zone *zone;
2310 int rc;
2311
2312 rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2313 if (rc)
2314 return rc;
2315
2316 for_each_zone(zone)
2317 zone->min_unmapped_ratio = (zone->present_pages *
2318 sysctl_min_unmapped_ratio) / 100;
2319 return 0;
2320}
2321#endif
2322
2301/* 2323/*
2302 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around 2324 * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
2303 * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() 2325 * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()