diff options
author | Mel Gorman <mel@csn.ul.ie> | 2011-01-13 18:46:26 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:37 -0500 |
commit | dc83edd941f412e938841b4989be24aa288a1aa6 (patch) | |
tree | 07dbc04d544f3200b3b13be1af6c57f44ffa63c8 /mm | |
parent | 355b09c47a0cbb73b3e65a57c03f157f2e7ddb0b (diff) |
mm: kswapd: use the classzone idx that kswapd was using for sleeping_prematurely()
When kswapd is woken up for a high-order allocation, it takes account of
the highest usable zone by the caller (the classzone idx). During
allocation, this index is used to select the lowmem_reserve[] that should
be applied to the watermark calculation in zone_watermark_ok().
When balancing a node, kswapd considers the highest unbalanced zone to be
the classzone index. This will always be at least be the callers
classzone_idx and can be higher. However, sleeping_prematurely() always
considers the lowest zone (e.g. ZONE_DMA) to be the classzone index.
This means that sleeping_prematurely() can consider a zone to be balanced
that is unusable by the allocation request that originally woke kswapd.
This patch changes sleeping_prematurely() to use a classzone_idx matching
the value it used in balance_pgdat().
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Eric B Munson <emunson@mgebm.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Simon Kirby <sim@hostway.ca>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/vmscan.c | 29 |
1 files changed, 16 insertions, 13 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 388a0447b8e8..cfdef0bcc7ab 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2227,7 +2227,8 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, | |||
2227 | } | 2227 | } |
2228 | 2228 | ||
2229 | /* is kswapd sleeping prematurely? */ | 2229 | /* is kswapd sleeping prematurely? */ |
2230 | static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | 2230 | static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining, |
2231 | int classzone_idx) | ||
2231 | { | 2232 | { |
2232 | int i; | 2233 | int i; |
2233 | unsigned long balanced = 0; | 2234 | unsigned long balanced = 0; |
@@ -2235,7 +2236,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | |||
2235 | 2236 | ||
2236 | /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ | 2237 | /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ |
2237 | if (remaining) | 2238 | if (remaining) |
2238 | return 1; | 2239 | return true; |
2239 | 2240 | ||
2240 | /* Check the watermark levels */ | 2241 | /* Check the watermark levels */ |
2241 | for (i = 0; i < pgdat->nr_zones; i++) { | 2242 | for (i = 0; i < pgdat->nr_zones; i++) { |
@@ -2256,7 +2257,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | |||
2256 | } | 2257 | } |
2257 | 2258 | ||
2258 | if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), | 2259 | if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), |
2259 | 0, 0)) | 2260 | classzone_idx, 0)) |
2260 | all_zones_ok = false; | 2261 | all_zones_ok = false; |
2261 | else | 2262 | else |
2262 | balanced += zone->present_pages; | 2263 | balanced += zone->present_pages; |
@@ -2268,7 +2269,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | |||
2268 | * must be balanced | 2269 | * must be balanced |
2269 | */ | 2270 | */ |
2270 | if (order) | 2271 | if (order) |
2271 | return pgdat_balanced(pgdat, balanced, 0); | 2272 | return pgdat_balanced(pgdat, balanced, classzone_idx); |
2272 | else | 2273 | else |
2273 | return !all_zones_ok; | 2274 | return !all_zones_ok; |
2274 | } | 2275 | } |
@@ -2295,7 +2296,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | |||
2295 | * of pages is balanced across the zones. | 2296 | * of pages is balanced across the zones. |
2296 | */ | 2297 | */ |
2297 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | 2298 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order, |
2298 | int classzone_idx) | 2299 | int *classzone_idx) |
2299 | { | 2300 | { |
2300 | int all_zones_ok; | 2301 | int all_zones_ok; |
2301 | unsigned long balanced; | 2302 | unsigned long balanced; |
@@ -2358,6 +2359,7 @@ loop_again: | |||
2358 | if (!zone_watermark_ok_safe(zone, order, | 2359 | if (!zone_watermark_ok_safe(zone, order, |
2359 | high_wmark_pages(zone), 0, 0)) { | 2360 | high_wmark_pages(zone), 0, 0)) { |
2360 | end_zone = i; | 2361 | end_zone = i; |
2362 | *classzone_idx = i; | ||
2361 | break; | 2363 | break; |
2362 | } | 2364 | } |
2363 | } | 2365 | } |
@@ -2451,12 +2453,12 @@ loop_again: | |||
2451 | * spectulatively avoid congestion waits | 2453 | * spectulatively avoid congestion waits |
2452 | */ | 2454 | */ |
2453 | zone_clear_flag(zone, ZONE_CONGESTED); | 2455 | zone_clear_flag(zone, ZONE_CONGESTED); |
2454 | if (i <= classzone_idx) | 2456 | if (i <= *classzone_idx) |
2455 | balanced += zone->present_pages; | 2457 | balanced += zone->present_pages; |
2456 | } | 2458 | } |
2457 | 2459 | ||
2458 | } | 2460 | } |
2459 | if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx))) | 2461 | if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) |
2460 | break; /* kswapd: all done */ | 2462 | break; /* kswapd: all done */ |
2461 | /* | 2463 | /* |
2462 | * OK, kswapd is getting into trouble. Take a nap, then take | 2464 | * OK, kswapd is getting into trouble. Take a nap, then take |
@@ -2485,7 +2487,7 @@ out: | |||
2485 | * high-order: Balanced zones must make up at least 25% of the node | 2487 | * high-order: Balanced zones must make up at least 25% of the node |
2486 | * for the node to be balanced | 2488 | * for the node to be balanced |
2487 | */ | 2489 | */ |
2488 | if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))) { | 2490 | if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))) { |
2489 | cond_resched(); | 2491 | cond_resched(); |
2490 | 2492 | ||
2491 | try_to_freeze(); | 2493 | try_to_freeze(); |
@@ -2546,10 +2548,11 @@ out: | |||
2546 | * if another caller entered the allocator slow path while kswapd | 2548 | * if another caller entered the allocator slow path while kswapd |
2547 | * was awake, order will remain at the higher level | 2549 | * was awake, order will remain at the higher level |
2548 | */ | 2550 | */ |
2551 | *classzone_idx = end_zone; | ||
2549 | return order; | 2552 | return order; |
2550 | } | 2553 | } |
2551 | 2554 | ||
2552 | static void kswapd_try_to_sleep(pg_data_t *pgdat, int order) | 2555 | static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) |
2553 | { | 2556 | { |
2554 | long remaining = 0; | 2557 | long remaining = 0; |
2555 | DEFINE_WAIT(wait); | 2558 | DEFINE_WAIT(wait); |
@@ -2560,7 +2563,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order) | |||
2560 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); | 2563 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); |
2561 | 2564 | ||
2562 | /* Try to sleep for a short interval */ | 2565 | /* Try to sleep for a short interval */ |
2563 | if (!sleeping_prematurely(pgdat, order, remaining)) { | 2566 | if (!sleeping_prematurely(pgdat, order, remaining, classzone_idx)) { |
2564 | remaining = schedule_timeout(HZ/10); | 2567 | remaining = schedule_timeout(HZ/10); |
2565 | finish_wait(&pgdat->kswapd_wait, &wait); | 2568 | finish_wait(&pgdat->kswapd_wait, &wait); |
2566 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); | 2569 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); |
@@ -2570,7 +2573,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order) | |||
2570 | * After a short sleep, check if it was a premature sleep. If not, then | 2573 | * After a short sleep, check if it was a premature sleep. If not, then |
2571 | * go fully to sleep until explicitly woken up. | 2574 | * go fully to sleep until explicitly woken up. |
2572 | */ | 2575 | */ |
2573 | if (!sleeping_prematurely(pgdat, order, remaining)) { | 2576 | if (!sleeping_prematurely(pgdat, order, remaining, classzone_idx)) { |
2574 | trace_mm_vmscan_kswapd_sleep(pgdat->node_id); | 2577 | trace_mm_vmscan_kswapd_sleep(pgdat->node_id); |
2575 | 2578 | ||
2576 | /* | 2579 | /* |
@@ -2658,7 +2661,7 @@ static int kswapd(void *p) | |||
2658 | order = new_order; | 2661 | order = new_order; |
2659 | classzone_idx = new_classzone_idx; | 2662 | classzone_idx = new_classzone_idx; |
2660 | } else { | 2663 | } else { |
2661 | kswapd_try_to_sleep(pgdat, order); | 2664 | kswapd_try_to_sleep(pgdat, order, classzone_idx); |
2662 | order = pgdat->kswapd_max_order; | 2665 | order = pgdat->kswapd_max_order; |
2663 | classzone_idx = pgdat->classzone_idx; | 2666 | classzone_idx = pgdat->classzone_idx; |
2664 | pgdat->kswapd_max_order = 0; | 2667 | pgdat->kswapd_max_order = 0; |
@@ -2675,7 +2678,7 @@ static int kswapd(void *p) | |||
2675 | */ | 2678 | */ |
2676 | if (!ret) { | 2679 | if (!ret) { |
2677 | trace_mm_vmscan_kswapd_wake(pgdat->node_id, order); | 2680 | trace_mm_vmscan_kswapd_wake(pgdat->node_id, order); |
2678 | order = balance_pgdat(pgdat, order, classzone_idx); | 2681 | order = balance_pgdat(pgdat, order, &classzone_idx); |
2679 | } | 2682 | } |
2680 | } | 2683 | } |
2681 | return 0; | 2684 | return 0; |