aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2008-10-18 23:26:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:50:25 -0400
commit556adecba110bf5f1db6c6b56416cfab5bcab698 (patch)
treea721d84d28c4d99a54632b472b452ea3d4b2b137 /mm/page_alloc.c
parent4f98a2fee8acdb4ac84545df98cccecfd130f8db (diff)
vmscan: second chance replacement for anonymous pages
We avoid evicting and scanning anonymous pages for the most part, but under some workloads we can end up with most of memory filled with anonymous pages. At that point, we suddenly need to clear the referenced bits on all of memory, which can take ages on very large memory systems. We can reduce the maximum number of pages that need to be scanned by not taking the referenced state into account when deactivating an anonymous page. After all, every anonymous page starts out referenced, so why check? If an anonymous page gets referenced again before it reaches the end of the inactive list, we move it back to the active list. To keep the maximum amount of necessary work reasonable, we scale the active to inactive ratio with the size of memory, using the formula active:inactive ratio = sqrt(memory in GB * 10). Kswapd CPU use now seems to scale by the amount of pageout bandwidth, instead of by the amount of memory present in the system. [kamezawa.hiroyu@jp.fujitsu.com: fix OOM with memcg] [kamezawa.hiroyu@jp.fujitsu.com: memcg: lru scan fix] Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 740a16a32c22..79c0981b1d32 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4263,6 +4263,46 @@ void setup_per_zone_pages_min(void)
4263 calculate_totalreserve_pages(); 4263 calculate_totalreserve_pages();
4264} 4264}
4265 4265
4266/**
4267 * setup_per_zone_inactive_ratio - called when min_free_kbytes changes.
4268 *
4269 * The inactive anon list should be small enough that the VM never has to
4270 * do too much work, but large enough that each inactive page has a chance
4271 * to be referenced again before it is swapped out.
4272 *
4273 * The inactive_anon ratio is the target ratio of ACTIVE_ANON to
4274 * INACTIVE_ANON pages on this zone's LRU, maintained by the
4275 * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of
4276 * the anonymous pages are kept on the inactive list.
4277 *
4278 * total target max
4279 * memory ratio inactive anon
4280 * -------------------------------------
4281 * 10MB 1 5MB
4282 * 100MB 1 50MB
4283 * 1GB 3 250MB
4284 * 10GB 10 0.9GB
4285 * 100GB 31 3GB
4286 * 1TB 101 10GB
4287 * 10TB 320 32GB
4288 */
4289void setup_per_zone_inactive_ratio(void)
4290{
4291 struct zone *zone;
4292
4293 for_each_zone(zone) {
4294 unsigned int gb, ratio;
4295
4296 /* Zone size in gigabytes */
4297 gb = zone->present_pages >> (30 - PAGE_SHIFT);
4298 ratio = int_sqrt(10 * gb);
4299 if (!ratio)
4300 ratio = 1;
4301
4302 zone->inactive_ratio = ratio;
4303 }
4304}
4305
4266/* 4306/*
4267 * Initialise min_free_kbytes. 4307 * Initialise min_free_kbytes.
4268 * 4308 *
@@ -4300,6 +4340,7 @@ static int __init init_per_zone_pages_min(void)
4300 min_free_kbytes = 65536; 4340 min_free_kbytes = 65536;
4301 setup_per_zone_pages_min(); 4341 setup_per_zone_pages_min();
4302 setup_per_zone_lowmem_reserve(); 4342 setup_per_zone_lowmem_reserve();
4343 setup_per_zone_inactive_ratio();
4303 return 0; 4344 return 0;
4304} 4345}
4305module_init(init_per_zone_pages_min) 4346module_init(init_per_zone_pages_min)