diff options
-rw-r--r-- | mm/vmscan.c | 58 |
1 files changed, 49 insertions, 9 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 3584067800e1..d3488828331a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2198,10 +2198,40 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2198 | } | 2198 | } |
2199 | #endif | 2199 | #endif |
2200 | 2200 | ||
2201 | /* | ||
2202 | * pgdat_balanced is used when checking if a node is balanced for high-order | ||
2203 | * allocations. Only zones that meet watermarks and are in a zone allowed | ||
2204 | * by the callers classzone_idx are added to balanced_pages. The total of | ||
2205 | * balanced pages must be at least 25% of the zones allowed by classzone_idx | ||
2206 | * for the node to be considered balanced. Forcing all zones to be balanced | ||
2207 | * for high orders can cause excessive reclaim when there are imbalanced zones. | ||
2208 | * The choice of 25% is due to | ||
2209 | * o a 16M DMA zone that is balanced will not balance a zone on any | ||
2210 | * reasonable sized machine | ||
2211 | * o On all other machines, the top zone must be at least a reasonable | ||
2212 | * precentage of the middle zones. For example, on 32-bit x86, highmem | ||
2213 | * would need to be at least 256M for it to be balance a whole node. | ||
2214 | * Similarly, on x86-64 the Normal zone would need to be at least 1G | ||
2215 | * to balance a node on its own. These seemed like reasonable ratios. | ||
2216 | */ | ||
2217 | static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, | ||
2218 | int classzone_idx) | ||
2219 | { | ||
2220 | unsigned long present_pages = 0; | ||
2221 | int i; | ||
2222 | |||
2223 | for (i = 0; i <= classzone_idx; i++) | ||
2224 | present_pages += pgdat->node_zones[i].present_pages; | ||
2225 | |||
2226 | return balanced_pages > (present_pages >> 2); | ||
2227 | } | ||
2228 | |||
2201 | /* is kswapd sleeping prematurely? */ | 2229 | /* is kswapd sleeping prematurely? */ |
2202 | static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | 2230 | static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) |
2203 | { | 2231 | { |
2204 | int i; | 2232 | int i; |
2233 | unsigned long balanced = 0; | ||
2234 | bool all_zones_ok = true; | ||
2205 | 2235 | ||
2206 | /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ | 2236 | /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ |
2207 | if (remaining) | 2237 | if (remaining) |
@@ -2219,10 +2249,20 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | |||
2219 | 2249 | ||
2220 | if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), | 2250 | if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), |
2221 | 0, 0)) | 2251 | 0, 0)) |
2222 | return 1; | 2252 | all_zones_ok = false; |
2253 | else | ||
2254 | balanced += zone->present_pages; | ||
2223 | } | 2255 | } |
2224 | 2256 | ||
2225 | return 0; | 2257 | /* |
2258 | * For high-order requests, the balanced zones must contain at least | ||
2259 | * 25% of the nodes pages for kswapd to sleep. For order-0, all zones | ||
2260 | * must be balanced | ||
2261 | */ | ||
2262 | if (order) | ||
2263 | return pgdat_balanced(pgdat, balanced, 0); | ||
2264 | else | ||
2265 | return !all_zones_ok; | ||
2226 | } | 2266 | } |
2227 | 2267 | ||
2228 | /* | 2268 | /* |
@@ -2250,7 +2290,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2250 | int classzone_idx) | 2290 | int classzone_idx) |
2251 | { | 2291 | { |
2252 | int all_zones_ok; | 2292 | int all_zones_ok; |
2253 | int any_zone_ok; | 2293 | unsigned long balanced; |
2254 | int priority; | 2294 | int priority; |
2255 | int i; | 2295 | int i; |
2256 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2296 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
@@ -2284,7 +2324,7 @@ loop_again: | |||
2284 | disable_swap_token(); | 2324 | disable_swap_token(); |
2285 | 2325 | ||
2286 | all_zones_ok = 1; | 2326 | all_zones_ok = 1; |
2287 | any_zone_ok = 0; | 2327 | balanced = 0; |
2288 | 2328 | ||
2289 | /* | 2329 | /* |
2290 | * Scan in the highmem->dma direction for the highest | 2330 | * Scan in the highmem->dma direction for the highest |
@@ -2404,11 +2444,11 @@ loop_again: | |||
2404 | */ | 2444 | */ |
2405 | zone_clear_flag(zone, ZONE_CONGESTED); | 2445 | zone_clear_flag(zone, ZONE_CONGESTED); |
2406 | if (i <= classzone_idx) | 2446 | if (i <= classzone_idx) |
2407 | any_zone_ok = 1; | 2447 | balanced += zone->present_pages; |
2408 | } | 2448 | } |
2409 | 2449 | ||
2410 | } | 2450 | } |
2411 | if (all_zones_ok || (order && any_zone_ok)) | 2451 | if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx))) |
2412 | break; /* kswapd: all done */ | 2452 | break; /* kswapd: all done */ |
2413 | /* | 2453 | /* |
2414 | * OK, kswapd is getting into trouble. Take a nap, then take | 2454 | * OK, kswapd is getting into trouble. Take a nap, then take |
@@ -2434,10 +2474,10 @@ out: | |||
2434 | 2474 | ||
2435 | /* | 2475 | /* |
2436 | * order-0: All zones must meet high watermark for a balanced node | 2476 | * order-0: All zones must meet high watermark for a balanced node |
2437 | * high-order: Any zone below pgdats classzone_idx must meet the high | 2477 | * high-order: Balanced zones must make up at least 25% of the node |
2438 | * watermark for a balanced node | 2478 | * for the node to be balanced |
2439 | */ | 2479 | */ |
2440 | if (!(all_zones_ok || (order && any_zone_ok))) { | 2480 | if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))) { |
2441 | cond_resched(); | 2481 | cond_resched(); |
2442 | 2482 | ||
2443 | try_to_freeze(); | 2483 | try_to_freeze(); |