diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/mempolicy.c | 130 | ||||
-rw-r--r-- | mm/shmem.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 107 |
3 files changed, 120 insertions, 121 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d1b315e98627..e2df1c1fb41f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -2132,7 +2132,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) | |||
2132 | */ | 2132 | */ |
2133 | 2133 | ||
2134 | /* lookup first element intersecting start-end */ | 2134 | /* lookup first element intersecting start-end */ |
2135 | /* Caller holds sp->mutex */ | 2135 | /* Caller holds sp->lock */ |
2136 | static struct sp_node * | 2136 | static struct sp_node * |
2137 | sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) | 2137 | sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) |
2138 | { | 2138 | { |
@@ -2196,13 +2196,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) | |||
2196 | 2196 | ||
2197 | if (!sp->root.rb_node) | 2197 | if (!sp->root.rb_node) |
2198 | return NULL; | 2198 | return NULL; |
2199 | mutex_lock(&sp->mutex); | 2199 | spin_lock(&sp->lock); |
2200 | sn = sp_lookup(sp, idx, idx+1); | 2200 | sn = sp_lookup(sp, idx, idx+1); |
2201 | if (sn) { | 2201 | if (sn) { |
2202 | mpol_get(sn->policy); | 2202 | mpol_get(sn->policy); |
2203 | pol = sn->policy; | 2203 | pol = sn->policy; |
2204 | } | 2204 | } |
2205 | mutex_unlock(&sp->mutex); | 2205 | spin_unlock(&sp->lock); |
2206 | return pol; | 2206 | return pol; |
2207 | } | 2207 | } |
2208 | 2208 | ||
@@ -2328,6 +2328,14 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n) | |||
2328 | sp_free(n); | 2328 | sp_free(n); |
2329 | } | 2329 | } |
2330 | 2330 | ||
2331 | static void sp_node_init(struct sp_node *node, unsigned long start, | ||
2332 | unsigned long end, struct mempolicy *pol) | ||
2333 | { | ||
2334 | node->start = start; | ||
2335 | node->end = end; | ||
2336 | node->policy = pol; | ||
2337 | } | ||
2338 | |||
2331 | static struct sp_node *sp_alloc(unsigned long start, unsigned long end, | 2339 | static struct sp_node *sp_alloc(unsigned long start, unsigned long end, |
2332 | struct mempolicy *pol) | 2340 | struct mempolicy *pol) |
2333 | { | 2341 | { |
@@ -2344,10 +2352,7 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end, | |||
2344 | return NULL; | 2352 | return NULL; |
2345 | } | 2353 | } |
2346 | newpol->flags |= MPOL_F_SHARED; | 2354 | newpol->flags |= MPOL_F_SHARED; |
2347 | 2355 | sp_node_init(n, start, end, newpol); | |
2348 | n->start = start; | ||
2349 | n->end = end; | ||
2350 | n->policy = newpol; | ||
2351 | 2356 | ||
2352 | return n; | 2357 | return n; |
2353 | } | 2358 | } |
@@ -2357,9 +2362,12 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, | |||
2357 | unsigned long end, struct sp_node *new) | 2362 | unsigned long end, struct sp_node *new) |
2358 | { | 2363 | { |
2359 | struct sp_node *n; | 2364 | struct sp_node *n; |
2365 | struct sp_node *n_new = NULL; | ||
2366 | struct mempolicy *mpol_new = NULL; | ||
2360 | int ret = 0; | 2367 | int ret = 0; |
2361 | 2368 | ||
2362 | mutex_lock(&sp->mutex); | 2369 | restart: |
2370 | spin_lock(&sp->lock); | ||
2363 | n = sp_lookup(sp, start, end); | 2371 | n = sp_lookup(sp, start, end); |
2364 | /* Take care of old policies in the same range. */ | 2372 | /* Take care of old policies in the same range. */ |
2365 | while (n && n->start < end) { | 2373 | while (n && n->start < end) { |
@@ -2372,14 +2380,16 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, | |||
2372 | } else { | 2380 | } else { |
2373 | /* Old policy spanning whole new range. */ | 2381 | /* Old policy spanning whole new range. */ |
2374 | if (n->end > end) { | 2382 | if (n->end > end) { |
2375 | struct sp_node *new2; | 2383 | if (!n_new) |
2376 | new2 = sp_alloc(end, n->end, n->policy); | 2384 | goto alloc_new; |
2377 | if (!new2) { | 2385 | |
2378 | ret = -ENOMEM; | 2386 | *mpol_new = *n->policy; |
2379 | goto out; | 2387 | atomic_set(&mpol_new->refcnt, 1); |
2380 | } | 2388 | sp_node_init(n_new, n->end, end, mpol_new); |
2389 | sp_insert(sp, n_new); | ||
2381 | n->end = start; | 2390 | n->end = start; |
2382 | sp_insert(sp, new2); | 2391 | n_new = NULL; |
2392 | mpol_new = NULL; | ||
2383 | break; | 2393 | break; |
2384 | } else | 2394 | } else |
2385 | n->end = start; | 2395 | n->end = start; |
@@ -2390,9 +2400,27 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, | |||
2390 | } | 2400 | } |
2391 | if (new) | 2401 | if (new) |
2392 | sp_insert(sp, new); | 2402 | sp_insert(sp, new); |
2393 | out: | 2403 | spin_unlock(&sp->lock); |
2394 | mutex_unlock(&sp->mutex); | 2404 | ret = 0; |
2405 | |||
2406 | err_out: | ||
2407 | if (mpol_new) | ||
2408 | mpol_put(mpol_new); | ||
2409 | if (n_new) | ||
2410 | kmem_cache_free(sn_cache, n_new); | ||
2411 | |||
2395 | return ret; | 2412 | return ret; |
2413 | |||
2414 | alloc_new: | ||
2415 | spin_unlock(&sp->lock); | ||
2416 | ret = -ENOMEM; | ||
2417 | n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL); | ||
2418 | if (!n_new) | ||
2419 | goto err_out; | ||
2420 | mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); | ||
2421 | if (!mpol_new) | ||
2422 | goto err_out; | ||
2423 | goto restart; | ||
2396 | } | 2424 | } |
2397 | 2425 | ||
2398 | /** | 2426 | /** |
@@ -2410,7 +2438,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
2410 | int ret; | 2438 | int ret; |
2411 | 2439 | ||
2412 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ | 2440 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ |
2413 | mutex_init(&sp->mutex); | 2441 | spin_lock_init(&sp->lock); |
2414 | 2442 | ||
2415 | if (mpol) { | 2443 | if (mpol) { |
2416 | struct vm_area_struct pvma; | 2444 | struct vm_area_struct pvma; |
@@ -2476,14 +2504,14 @@ void mpol_free_shared_policy(struct shared_policy *p) | |||
2476 | 2504 | ||
2477 | if (!p->root.rb_node) | 2505 | if (!p->root.rb_node) |
2478 | return; | 2506 | return; |
2479 | mutex_lock(&p->mutex); | 2507 | spin_lock(&p->lock); |
2480 | next = rb_first(&p->root); | 2508 | next = rb_first(&p->root); |
2481 | while (next) { | 2509 | while (next) { |
2482 | n = rb_entry(next, struct sp_node, nd); | 2510 | n = rb_entry(next, struct sp_node, nd); |
2483 | next = rb_next(&n->nd); | 2511 | next = rb_next(&n->nd); |
2484 | sp_delete(p, n); | 2512 | sp_delete(p, n); |
2485 | } | 2513 | } |
2486 | mutex_unlock(&p->mutex); | 2514 | spin_unlock(&p->lock); |
2487 | } | 2515 | } |
2488 | 2516 | ||
2489 | #ifdef CONFIG_NUMA_BALANCING | 2517 | #ifdef CONFIG_NUMA_BALANCING |
@@ -2595,8 +2623,7 @@ void numa_default_policy(void) | |||
2595 | */ | 2623 | */ |
2596 | 2624 | ||
2597 | /* | 2625 | /* |
2598 | * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag | 2626 | * "local" is implemented internally by MPOL_PREFERRED with MPOL_F_LOCAL flag. |
2599 | * Used only for mpol_parse_str() and mpol_to_str() | ||
2600 | */ | 2627 | */ |
2601 | static const char * const policy_modes[] = | 2628 | static const char * const policy_modes[] = |
2602 | { | 2629 | { |
@@ -2610,28 +2637,20 @@ static const char * const policy_modes[] = | |||
2610 | 2637 | ||
2611 | #ifdef CONFIG_TMPFS | 2638 | #ifdef CONFIG_TMPFS |
2612 | /** | 2639 | /** |
2613 | * mpol_parse_str - parse string to mempolicy | 2640 | * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option. |
2614 | * @str: string containing mempolicy to parse | 2641 | * @str: string containing mempolicy to parse |
2615 | * @mpol: pointer to struct mempolicy pointer, returned on success. | 2642 | * @mpol: pointer to struct mempolicy pointer, returned on success. |
2616 | * @no_context: flag whether to "contextualize" the mempolicy | ||
2617 | * | 2643 | * |
2618 | * Format of input: | 2644 | * Format of input: |
2619 | * <mode>[=<flags>][:<nodelist>] | 2645 | * <mode>[=<flags>][:<nodelist>] |
2620 | * | 2646 | * |
2621 | * if @no_context is true, save the input nodemask in w.user_nodemask in | ||
2622 | * the returned mempolicy. This will be used to "clone" the mempolicy in | ||
2623 | * a specific context [cpuset] at a later time. Used to parse tmpfs mpol | ||
2624 | * mount option. Note that if 'static' or 'relative' mode flags were | ||
2625 | * specified, the input nodemask will already have been saved. Saving | ||
2626 | * it again is redundant, but safe. | ||
2627 | * | ||
2628 | * On success, returns 0, else 1 | 2647 | * On success, returns 0, else 1 |
2629 | */ | 2648 | */ |
2630 | int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | 2649 | int mpol_parse_str(char *str, struct mempolicy **mpol) |
2631 | { | 2650 | { |
2632 | struct mempolicy *new = NULL; | 2651 | struct mempolicy *new = NULL; |
2633 | unsigned short mode; | 2652 | unsigned short mode; |
2634 | unsigned short uninitialized_var(mode_flags); | 2653 | unsigned short mode_flags; |
2635 | nodemask_t nodes; | 2654 | nodemask_t nodes; |
2636 | char *nodelist = strchr(str, ':'); | 2655 | char *nodelist = strchr(str, ':'); |
2637 | char *flags = strchr(str, '='); | 2656 | char *flags = strchr(str, '='); |
@@ -2719,24 +2738,23 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
2719 | if (IS_ERR(new)) | 2738 | if (IS_ERR(new)) |
2720 | goto out; | 2739 | goto out; |
2721 | 2740 | ||
2722 | if (no_context) { | 2741 | /* |
2723 | /* save for contextualization */ | 2742 | * Save nodes for mpol_to_str() to show the tmpfs mount options |
2724 | new->w.user_nodemask = nodes; | 2743 | * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo. |
2725 | } else { | 2744 | */ |
2726 | int ret; | 2745 | if (mode != MPOL_PREFERRED) |
2727 | NODEMASK_SCRATCH(scratch); | 2746 | new->v.nodes = nodes; |
2728 | if (scratch) { | 2747 | else if (nodelist) |
2729 | task_lock(current); | 2748 | new->v.preferred_node = first_node(nodes); |
2730 | ret = mpol_set_nodemask(new, &nodes, scratch); | 2749 | else |
2731 | task_unlock(current); | 2750 | new->flags |= MPOL_F_LOCAL; |
2732 | } else | 2751 | |
2733 | ret = -ENOMEM; | 2752 | /* |
2734 | NODEMASK_SCRATCH_FREE(scratch); | 2753 | * Save nodes for contextualization: this will be used to "clone" |
2735 | if (ret) { | 2754 | * the mempolicy in a specific context [cpuset] at a later time. |
2736 | mpol_put(new); | 2755 | */ |
2737 | goto out; | 2756 | new->w.user_nodemask = nodes; |
2738 | } | 2757 | |
2739 | } | ||
2740 | err = 0; | 2758 | err = 0; |
2741 | 2759 | ||
2742 | out: | 2760 | out: |
@@ -2756,13 +2774,12 @@ out: | |||
2756 | * @buffer: to contain formatted mempolicy string | 2774 | * @buffer: to contain formatted mempolicy string |
2757 | * @maxlen: length of @buffer | 2775 | * @maxlen: length of @buffer |
2758 | * @pol: pointer to mempolicy to be formatted | 2776 | * @pol: pointer to mempolicy to be formatted |
2759 | * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask | ||
2760 | * | 2777 | * |
2761 | * Convert a mempolicy into a string. | 2778 | * Convert a mempolicy into a string. |
2762 | * Returns the number of characters in buffer (if positive) | 2779 | * Returns the number of characters in buffer (if positive) |
2763 | * or an error (negative) | 2780 | * or an error (negative) |
2764 | */ | 2781 | */ |
2765 | int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | 2782 | int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) |
2766 | { | 2783 | { |
2767 | char *p = buffer; | 2784 | char *p = buffer; |
2768 | int l; | 2785 | int l; |
@@ -2788,7 +2805,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
2788 | case MPOL_PREFERRED: | 2805 | case MPOL_PREFERRED: |
2789 | nodes_clear(nodes); | 2806 | nodes_clear(nodes); |
2790 | if (flags & MPOL_F_LOCAL) | 2807 | if (flags & MPOL_F_LOCAL) |
2791 | mode = MPOL_LOCAL; /* pseudo-policy */ | 2808 | mode = MPOL_LOCAL; |
2792 | else | 2809 | else |
2793 | node_set(pol->v.preferred_node, nodes); | 2810 | node_set(pol->v.preferred_node, nodes); |
2794 | break; | 2811 | break; |
@@ -2796,10 +2813,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
2796 | case MPOL_BIND: | 2813 | case MPOL_BIND: |
2797 | /* Fall through */ | 2814 | /* Fall through */ |
2798 | case MPOL_INTERLEAVE: | 2815 | case MPOL_INTERLEAVE: |
2799 | if (no_context) | 2816 | nodes = pol->v.nodes; |
2800 | nodes = pol->w.user_nodemask; | ||
2801 | else | ||
2802 | nodes = pol->v.nodes; | ||
2803 | break; | 2817 | break; |
2804 | 2818 | ||
2805 | default: | 2819 | default: |
diff --git a/mm/shmem.c b/mm/shmem.c index 5c90d84c2b02..5dd56f6efdbd 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -889,7 +889,7 @@ static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) | |||
889 | if (!mpol || mpol->mode == MPOL_DEFAULT) | 889 | if (!mpol || mpol->mode == MPOL_DEFAULT) |
890 | return; /* show nothing */ | 890 | return; /* show nothing */ |
891 | 891 | ||
892 | mpol_to_str(buffer, sizeof(buffer), mpol, 1); | 892 | mpol_to_str(buffer, sizeof(buffer), mpol); |
893 | 893 | ||
894 | seq_printf(seq, ",mpol=%s", buffer); | 894 | seq_printf(seq, ",mpol=%s", buffer); |
895 | } | 895 | } |
@@ -2463,7 +2463,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, | |||
2463 | if (!gid_valid(sbinfo->gid)) | 2463 | if (!gid_valid(sbinfo->gid)) |
2464 | goto bad_val; | 2464 | goto bad_val; |
2465 | } else if (!strcmp(this_char,"mpol")) { | 2465 | } else if (!strcmp(this_char,"mpol")) { |
2466 | if (mpol_parse_str(value, &sbinfo->mpol, 1)) | 2466 | if (mpol_parse_str(value, &sbinfo->mpol)) |
2467 | goto bad_val; | 2467 | goto bad_val; |
2468 | } else { | 2468 | } else { |
2469 | printk(KERN_ERR "tmpfs: Bad mount option %s\n", | 2469 | printk(KERN_ERR "tmpfs: Bad mount option %s\n", |
diff --git a/mm/vmscan.c b/mm/vmscan.c index adc7e9058181..16b42af393ac 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -2452,12 +2452,16 @@ static bool zone_balanced(struct zone *zone, int order, | |||
2452 | } | 2452 | } |
2453 | 2453 | ||
2454 | /* | 2454 | /* |
2455 | * pgdat_balanced is used when checking if a node is balanced for high-order | 2455 | * pgdat_balanced() is used when checking if a node is balanced. |
2456 | * allocations. Only zones that meet watermarks and are in a zone allowed | 2456 | * |
2457 | * by the callers classzone_idx are added to balanced_pages. The total of | 2457 | * For order-0, all zones must be balanced! |
2458 | * balanced pages must be at least 25% of the zones allowed by classzone_idx | 2458 | * |
2459 | * for the node to be considered balanced. Forcing all zones to be balanced | 2459 | * For high-order allocations only zones that meet watermarks and are in a |
2460 | * for high orders can cause excessive reclaim when there are imbalanced zones. | 2460 | * zone allowed by the callers classzone_idx are added to balanced_pages. The |
2461 | * total of balanced pages must be at least 25% of the zones allowed by | ||
2462 | * classzone_idx for the node to be considered balanced. Forcing all zones to | ||
2463 | * be balanced for high orders can cause excessive reclaim when there are | ||
2464 | * imbalanced zones. | ||
2461 | * The choice of 25% is due to | 2465 | * The choice of 25% is due to |
2462 | * o a 16M DMA zone that is balanced will not balance a zone on any | 2466 | * o a 16M DMA zone that is balanced will not balance a zone on any |
2463 | * reasonable sized machine | 2467 | * reasonable sized machine |
@@ -2467,17 +2471,43 @@ static bool zone_balanced(struct zone *zone, int order, | |||
2467 | * Similarly, on x86-64 the Normal zone would need to be at least 1G | 2471 | * Similarly, on x86-64 the Normal zone would need to be at least 1G |
2468 | * to balance a node on its own. These seemed like reasonable ratios. | 2472 | * to balance a node on its own. These seemed like reasonable ratios. |
2469 | */ | 2473 | */ |
2470 | static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, | 2474 | static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx) |
2471 | int classzone_idx) | ||
2472 | { | 2475 | { |
2473 | unsigned long present_pages = 0; | 2476 | unsigned long present_pages = 0; |
2477 | unsigned long balanced_pages = 0; | ||
2474 | int i; | 2478 | int i; |
2475 | 2479 | ||
2476 | for (i = 0; i <= classzone_idx; i++) | 2480 | /* Check the watermark levels */ |
2477 | present_pages += pgdat->node_zones[i].present_pages; | 2481 | for (i = 0; i <= classzone_idx; i++) { |
2482 | struct zone *zone = pgdat->node_zones + i; | ||
2478 | 2483 | ||
2479 | /* A special case here: if zone has no page, we think it's balanced */ | 2484 | if (!populated_zone(zone)) |
2480 | return balanced_pages >= (present_pages >> 2); | 2485 | continue; |
2486 | |||
2487 | present_pages += zone->present_pages; | ||
2488 | |||
2489 | /* | ||
2490 | * A special case here: | ||
2491 | * | ||
2492 | * balance_pgdat() skips over all_unreclaimable after | ||
2493 | * DEF_PRIORITY. Effectively, it considers them balanced so | ||
2494 | * they must be considered balanced here as well! | ||
2495 | */ | ||
2496 | if (zone->all_unreclaimable) { | ||
2497 | balanced_pages += zone->present_pages; | ||
2498 | continue; | ||
2499 | } | ||
2500 | |||
2501 | if (zone_balanced(zone, order, 0, i)) | ||
2502 | balanced_pages += zone->present_pages; | ||
2503 | else if (!order) | ||
2504 | return false; | ||
2505 | } | ||
2506 | |||
2507 | if (order) | ||
2508 | return balanced_pages >= (present_pages >> 2); | ||
2509 | else | ||
2510 | return true; | ||
2481 | } | 2511 | } |
2482 | 2512 | ||
2483 | /* | 2513 | /* |
@@ -2489,10 +2519,6 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, | |||
2489 | static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, | 2519 | static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, |
2490 | int classzone_idx) | 2520 | int classzone_idx) |
2491 | { | 2521 | { |
2492 | int i; | ||
2493 | unsigned long balanced = 0; | ||
2494 | bool all_zones_ok = true; | ||
2495 | |||
2496 | /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ | 2522 | /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ |
2497 | if (remaining) | 2523 | if (remaining) |
2498 | return false; | 2524 | return false; |
@@ -2511,39 +2537,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, | |||
2511 | return false; | 2537 | return false; |
2512 | } | 2538 | } |
2513 | 2539 | ||
2514 | /* Check the watermark levels */ | 2540 | return pgdat_balanced(pgdat, order, classzone_idx); |
2515 | for (i = 0; i <= classzone_idx; i++) { | ||
2516 | struct zone *zone = pgdat->node_zones + i; | ||
2517 | |||
2518 | if (!populated_zone(zone)) | ||
2519 | continue; | ||
2520 | |||
2521 | /* | ||
2522 | * balance_pgdat() skips over all_unreclaimable after | ||
2523 | * DEF_PRIORITY. Effectively, it considers them balanced so | ||
2524 | * they must be considered balanced here as well if kswapd | ||
2525 | * is to sleep | ||
2526 | */ | ||
2527 | if (zone->all_unreclaimable) { | ||
2528 | balanced += zone->present_pages; | ||
2529 | continue; | ||
2530 | } | ||
2531 | |||
2532 | if (!zone_balanced(zone, order, 0, i)) | ||
2533 | all_zones_ok = false; | ||
2534 | else | ||
2535 | balanced += zone->present_pages; | ||
2536 | } | ||
2537 | |||
2538 | /* | ||
2539 | * For high-order requests, the balanced zones must contain at least | ||
2540 | * 25% of the nodes pages for kswapd to sleep. For order-0, all zones | ||
2541 | * must be balanced | ||
2542 | */ | ||
2543 | if (order) | ||
2544 | return pgdat_balanced(pgdat, balanced, classzone_idx); | ||
2545 | else | ||
2546 | return all_zones_ok; | ||
2547 | } | 2541 | } |
2548 | 2542 | ||
2549 | /* | 2543 | /* |
@@ -2571,7 +2565,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2571 | int *classzone_idx) | 2565 | int *classzone_idx) |
2572 | { | 2566 | { |
2573 | struct zone *unbalanced_zone; | 2567 | struct zone *unbalanced_zone; |
2574 | unsigned long balanced; | ||
2575 | int i; | 2568 | int i; |
2576 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2569 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2577 | unsigned long total_scanned; | 2570 | unsigned long total_scanned; |
@@ -2605,7 +2598,6 @@ loop_again: | |||
2605 | int has_under_min_watermark_zone = 0; | 2598 | int has_under_min_watermark_zone = 0; |
2606 | 2599 | ||
2607 | unbalanced_zone = NULL; | 2600 | unbalanced_zone = NULL; |
2608 | balanced = 0; | ||
2609 | 2601 | ||
2610 | /* | 2602 | /* |
2611 | * Scan in the highmem->dma direction for the highest | 2603 | * Scan in the highmem->dma direction for the highest |
@@ -2761,8 +2753,6 @@ loop_again: | |||
2761 | * speculatively avoid congestion waits | 2753 | * speculatively avoid congestion waits |
2762 | */ | 2754 | */ |
2763 | zone_clear_flag(zone, ZONE_CONGESTED); | 2755 | zone_clear_flag(zone, ZONE_CONGESTED); |
2764 | if (i <= *classzone_idx) | ||
2765 | balanced += zone->present_pages; | ||
2766 | } | 2756 | } |
2767 | 2757 | ||
2768 | } | 2758 | } |
@@ -2776,7 +2766,7 @@ loop_again: | |||
2776 | pfmemalloc_watermark_ok(pgdat)) | 2766 | pfmemalloc_watermark_ok(pgdat)) |
2777 | wake_up(&pgdat->pfmemalloc_wait); | 2767 | wake_up(&pgdat->pfmemalloc_wait); |
2778 | 2768 | ||
2779 | if (!unbalanced_zone || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) | 2769 | if (pgdat_balanced(pgdat, order, *classzone_idx)) |
2780 | break; /* kswapd: all done */ | 2770 | break; /* kswapd: all done */ |
2781 | /* | 2771 | /* |
2782 | * OK, kswapd is getting into trouble. Take a nap, then take | 2772 | * OK, kswapd is getting into trouble. Take a nap, then take |
@@ -2785,7 +2775,7 @@ loop_again: | |||
2785 | if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { | 2775 | if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { |
2786 | if (has_under_min_watermark_zone) | 2776 | if (has_under_min_watermark_zone) |
2787 | count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); | 2777 | count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); |
2788 | else | 2778 | else if (unbalanced_zone) |
2789 | wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10); | 2779 | wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10); |
2790 | } | 2780 | } |
2791 | 2781 | ||
@@ -2800,12 +2790,7 @@ loop_again: | |||
2800 | } while (--sc.priority >= 0); | 2790 | } while (--sc.priority >= 0); |
2801 | out: | 2791 | out: |
2802 | 2792 | ||
2803 | /* | 2793 | if (!pgdat_balanced(pgdat, order, *classzone_idx)) { |
2804 | * order-0: All zones must meet high watermark for a balanced node | ||
2805 | * high-order: Balanced zones must make up at least 25% of the node | ||
2806 | * for the node to be balanced | ||
2807 | */ | ||
2808 | if (unbalanced_zone && (!order || !pgdat_balanced(pgdat, balanced, *classzone_idx))) { | ||
2809 | cond_resched(); | 2794 | cond_resched(); |
2810 | 2795 | ||
2811 | try_to_freeze(); | 2796 | try_to_freeze(); |