aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2011-01-13 18:46:26 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:37 -0500
commitdc83edd941f412e938841b4989be24aa288a1aa6 (patch)
tree07dbc04d544f3200b3b13be1af6c57f44ffa63c8 /mm/vmscan.c
parent355b09c47a0cbb73b3e65a57c03f157f2e7ddb0b (diff)
mm: kswapd: use the classzone idx that kswapd was using for sleeping_prematurely()
When kswapd is woken up for a high-order allocation, it takes account of the highest usable zone by the caller (the classzone idx). During allocation, this index is used to select the lowmem_reserve[] that should be applied to the watermark calculation in zone_watermark_ok(). When balancing a node, kswapd considers the highest unbalanced zone to be the classzone index. This will always be at least be the callers classzone_idx and can be higher. However, sleeping_prematurely() always considers the lowest zone (e.g. ZONE_DMA) to be the classzone index. This means that sleeping_prematurely() can consider a zone to be balanced that is unusable by the allocation request that originally woke kswapd. This patch changes sleeping_prematurely() to use a classzone_idx matching the value it used in balance_pgdat(). Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Reviewed-by: Eric B Munson <emunson@mgebm.net> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Simon Kirby <sim@hostway.ca> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Shaohua Li <shaohua.li@intel.com> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c29
1 files changed, 16 insertions, 13 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 388a0447b8e8..cfdef0bcc7ab 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2227,7 +2227,8 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
2227} 2227}
2228 2228
2229/* is kswapd sleeping prematurely? */ 2229/* is kswapd sleeping prematurely? */
2230static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) 2230static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining,
2231 int classzone_idx)
2231{ 2232{
2232 int i; 2233 int i;
2233 unsigned long balanced = 0; 2234 unsigned long balanced = 0;
@@ -2235,7 +2236,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
2235 2236
2236 /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ 2237 /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
2237 if (remaining) 2238 if (remaining)
2238 return 1; 2239 return true;
2239 2240
2240 /* Check the watermark levels */ 2241 /* Check the watermark levels */
2241 for (i = 0; i < pgdat->nr_zones; i++) { 2242 for (i = 0; i < pgdat->nr_zones; i++) {
@@ -2256,7 +2257,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
2256 } 2257 }
2257 2258
2258 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), 2259 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
2259 0, 0)) 2260 classzone_idx, 0))
2260 all_zones_ok = false; 2261 all_zones_ok = false;
2261 else 2262 else
2262 balanced += zone->present_pages; 2263 balanced += zone->present_pages;
@@ -2268,7 +2269,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
2268 * must be balanced 2269 * must be balanced
2269 */ 2270 */
2270 if (order) 2271 if (order)
2271 return pgdat_balanced(pgdat, balanced, 0); 2272 return pgdat_balanced(pgdat, balanced, classzone_idx);
2272 else 2273 else
2273 return !all_zones_ok; 2274 return !all_zones_ok;
2274} 2275}
@@ -2295,7 +2296,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
2295 * of pages is balanced across the zones. 2296 * of pages is balanced across the zones.
2296 */ 2297 */
2297static unsigned long balance_pgdat(pg_data_t *pgdat, int order, 2298static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2298 int classzone_idx) 2299 int *classzone_idx)
2299{ 2300{
2300 int all_zones_ok; 2301 int all_zones_ok;
2301 unsigned long balanced; 2302 unsigned long balanced;
@@ -2358,6 +2359,7 @@ loop_again:
2358 if (!zone_watermark_ok_safe(zone, order, 2359 if (!zone_watermark_ok_safe(zone, order,
2359 high_wmark_pages(zone), 0, 0)) { 2360 high_wmark_pages(zone), 0, 0)) {
2360 end_zone = i; 2361 end_zone = i;
2362 *classzone_idx = i;
2361 break; 2363 break;
2362 } 2364 }
2363 } 2365 }
@@ -2451,12 +2453,12 @@ loop_again:
2451 * spectulatively avoid congestion waits 2453 * spectulatively avoid congestion waits
2452 */ 2454 */
2453 zone_clear_flag(zone, ZONE_CONGESTED); 2455 zone_clear_flag(zone, ZONE_CONGESTED);
2454 if (i <= classzone_idx) 2456 if (i <= *classzone_idx)
2455 balanced += zone->present_pages; 2457 balanced += zone->present_pages;
2456 } 2458 }
2457 2459
2458 } 2460 }
2459 if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx))) 2461 if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))
2460 break; /* kswapd: all done */ 2462 break; /* kswapd: all done */
2461 /* 2463 /*
2462 * OK, kswapd is getting into trouble. Take a nap, then take 2464 * OK, kswapd is getting into trouble. Take a nap, then take
@@ -2485,7 +2487,7 @@ out:
2485 * high-order: Balanced zones must make up at least 25% of the node 2487 * high-order: Balanced zones must make up at least 25% of the node
2486 * for the node to be balanced 2488 * for the node to be balanced
2487 */ 2489 */
2488 if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))) { 2490 if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))) {
2489 cond_resched(); 2491 cond_resched();
2490 2492
2491 try_to_freeze(); 2493 try_to_freeze();
@@ -2546,10 +2548,11 @@ out:
2546 * if another caller entered the allocator slow path while kswapd 2548 * if another caller entered the allocator slow path while kswapd
2547 * was awake, order will remain at the higher level 2549 * was awake, order will remain at the higher level
2548 */ 2550 */
2551 *classzone_idx = end_zone;
2549 return order; 2552 return order;
2550} 2553}
2551 2554
2552static void kswapd_try_to_sleep(pg_data_t *pgdat, int order) 2555static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
2553{ 2556{
2554 long remaining = 0; 2557 long remaining = 0;
2555 DEFINE_WAIT(wait); 2558 DEFINE_WAIT(wait);
@@ -2560,7 +2563,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order)
2560 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); 2563 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
2561 2564
2562 /* Try to sleep for a short interval */ 2565 /* Try to sleep for a short interval */
2563 if (!sleeping_prematurely(pgdat, order, remaining)) { 2566 if (!sleeping_prematurely(pgdat, order, remaining, classzone_idx)) {
2564 remaining = schedule_timeout(HZ/10); 2567 remaining = schedule_timeout(HZ/10);
2565 finish_wait(&pgdat->kswapd_wait, &wait); 2568 finish_wait(&pgdat->kswapd_wait, &wait);
2566 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); 2569 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
@@ -2570,7 +2573,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order)
2570 * After a short sleep, check if it was a premature sleep. If not, then 2573 * After a short sleep, check if it was a premature sleep. If not, then
2571 * go fully to sleep until explicitly woken up. 2574 * go fully to sleep until explicitly woken up.
2572 */ 2575 */
2573 if (!sleeping_prematurely(pgdat, order, remaining)) { 2576 if (!sleeping_prematurely(pgdat, order, remaining, classzone_idx)) {
2574 trace_mm_vmscan_kswapd_sleep(pgdat->node_id); 2577 trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
2575 2578
2576 /* 2579 /*
@@ -2658,7 +2661,7 @@ static int kswapd(void *p)
2658 order = new_order; 2661 order = new_order;
2659 classzone_idx = new_classzone_idx; 2662 classzone_idx = new_classzone_idx;
2660 } else { 2663 } else {
2661 kswapd_try_to_sleep(pgdat, order); 2664 kswapd_try_to_sleep(pgdat, order, classzone_idx);
2662 order = pgdat->kswapd_max_order; 2665 order = pgdat->kswapd_max_order;
2663 classzone_idx = pgdat->classzone_idx; 2666 classzone_idx = pgdat->classzone_idx;
2664 pgdat->kswapd_max_order = 0; 2667 pgdat->kswapd_max_order = 0;
@@ -2675,7 +2678,7 @@ static int kswapd(void *p)
2675 */ 2678 */
2676 if (!ret) { 2679 if (!ret) {
2677 trace_mm_vmscan_kswapd_wake(pgdat->node_id, order); 2680 trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
2678 order = balance_pgdat(pgdat, order, classzone_idx); 2681 order = balance_pgdat(pgdat, order, &classzone_idx);
2679 } 2682 }
2680 } 2683 }
2681 return 0; 2684 return 0;