aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2011-01-13 18:46:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:37 -0500
commit1741c87757448cedd03224f01586504f9256415d (patch)
treee8f3bace5f0cd1652a3a2a682189b19f7b3af875 /mm/vmscan.c
parent9950474883e027e6e728cbcff25f7f2bf0c96530 (diff)
mm: kswapd: keep kswapd awake for high-order allocations until a percentage of the node is balanced
When reclaiming for high-orders, kswapd is responsible for balancing a node but it should not reclaim excessively. It avoids excessive reclaim by considering if any zone in a node is balanced then the node is balanced. In the cases where there are imbalanced zone sizes (e.g. ZONE_DMA with both ZONE_DMA32 and ZONE_NORMAL), kswapd can go to sleep prematurely as just one small zone was balanced. This alters the sleep logic of kswapd slightly. It counts the number of pages that make up the balanced zones. If the total number of balanced pages is more than a quarter of the zone, kswapd will go back to sleep. This should keep a node balanced without reclaiming an excessive number of pages. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Eric B Munson <emunson@mgebm.net> Cc: Simon Kirby <sim@hostway.ca> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Shaohua Li <shaohua.li@intel.com> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c58
1 files changed, 49 insertions, 9 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3584067800e1..d3488828331a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2198,10 +2198,40 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2198} 2198}
2199#endif 2199#endif
2200 2200
2201/*
2202 * pgdat_balanced is used when checking if a node is balanced for high-order
2203 * allocations. Only zones that meet watermarks and are in a zone allowed
2204 * by the callers classzone_idx are added to balanced_pages. The total of
2205 * balanced pages must be at least 25% of the zones allowed by classzone_idx
2206 * for the node to be considered balanced. Forcing all zones to be balanced
2207 * for high orders can cause excessive reclaim when there are imbalanced zones.
2208 * The choice of 25% is due to
2209 * o a 16M DMA zone that is balanced will not balance a zone on any
2210 * reasonable sized machine
2211 * o On all other machines, the top zone must be at least a reasonable
2212 * precentage of the middle zones. For example, on 32-bit x86, highmem
2213 * would need to be at least 256M for it to be balance a whole node.
2214 * Similarly, on x86-64 the Normal zone would need to be at least 1G
2215 * to balance a node on its own. These seemed like reasonable ratios.
2216 */
2217static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
2218 int classzone_idx)
2219{
2220 unsigned long present_pages = 0;
2221 int i;
2222
2223 for (i = 0; i <= classzone_idx; i++)
2224 present_pages += pgdat->node_zones[i].present_pages;
2225
2226 return balanced_pages > (present_pages >> 2);
2227}
2228
2201/* is kswapd sleeping prematurely? */ 2229/* is kswapd sleeping prematurely? */
2202static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) 2230static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
2203{ 2231{
2204 int i; 2232 int i;
2233 unsigned long balanced = 0;
2234 bool all_zones_ok = true;
2205 2235
2206 /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ 2236 /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
2207 if (remaining) 2237 if (remaining)
@@ -2219,10 +2249,20 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
2219 2249
2220 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), 2250 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
2221 0, 0)) 2251 0, 0))
2222 return 1; 2252 all_zones_ok = false;
2253 else
2254 balanced += zone->present_pages;
2223 } 2255 }
2224 2256
2225 return 0; 2257 /*
2258 * For high-order requests, the balanced zones must contain at least
2259 * 25% of the nodes pages for kswapd to sleep. For order-0, all zones
2260 * must be balanced
2261 */
2262 if (order)
2263 return pgdat_balanced(pgdat, balanced, 0);
2264 else
2265 return !all_zones_ok;
2226} 2266}
2227 2267
2228/* 2268/*
@@ -2250,7 +2290,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2250 int classzone_idx) 2290 int classzone_idx)
2251{ 2291{
2252 int all_zones_ok; 2292 int all_zones_ok;
2253 int any_zone_ok; 2293 unsigned long balanced;
2254 int priority; 2294 int priority;
2255 int i; 2295 int i;
2256 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 2296 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
@@ -2284,7 +2324,7 @@ loop_again:
2284 disable_swap_token(); 2324 disable_swap_token();
2285 2325
2286 all_zones_ok = 1; 2326 all_zones_ok = 1;
2287 any_zone_ok = 0; 2327 balanced = 0;
2288 2328
2289 /* 2329 /*
2290 * Scan in the highmem->dma direction for the highest 2330 * Scan in the highmem->dma direction for the highest
@@ -2404,11 +2444,11 @@ loop_again:
2404 */ 2444 */
2405 zone_clear_flag(zone, ZONE_CONGESTED); 2445 zone_clear_flag(zone, ZONE_CONGESTED);
2406 if (i <= classzone_idx) 2446 if (i <= classzone_idx)
2407 any_zone_ok = 1; 2447 balanced += zone->present_pages;
2408 } 2448 }
2409 2449
2410 } 2450 }
2411 if (all_zones_ok || (order && any_zone_ok)) 2451 if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))
2412 break; /* kswapd: all done */ 2452 break; /* kswapd: all done */
2413 /* 2453 /*
2414 * OK, kswapd is getting into trouble. Take a nap, then take 2454 * OK, kswapd is getting into trouble. Take a nap, then take
@@ -2434,10 +2474,10 @@ out:
2434 2474
2435 /* 2475 /*
2436 * order-0: All zones must meet high watermark for a balanced node 2476 * order-0: All zones must meet high watermark for a balanced node
2437 * high-order: Any zone below pgdats classzone_idx must meet the high 2477 * high-order: Balanced zones must make up at least 25% of the node
2438 * watermark for a balanced node 2478 * for the node to be balanced
2439 */ 2479 */
2440 if (!(all_zones_ok || (order && any_zone_ok))) { 2480 if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))) {
2441 cond_resched(); 2481 cond_resched();
2442 2482
2443 try_to_freeze(); 2483 try_to_freeze();