diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/memory.c | 13 | ||||
| -rw-r--r-- | mm/mempolicy.c | 130 | ||||
| -rw-r--r-- | mm/page_alloc.c | 27 | ||||
| -rw-r--r-- | mm/page_isolation.c | 26 | ||||
| -rw-r--r-- | mm/shmem.c | 4 | ||||
| -rw-r--r-- | mm/vmscan.c | 111 |
6 files changed, 137 insertions, 174 deletions
diff --git a/mm/memory.c b/mm/memory.c index e0a9b0ce4f10..bb1369f7b9b4 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -184,10 +184,14 @@ static int tlb_next_batch(struct mmu_gather *tlb) | |||
| 184 | return 1; | 184 | return 1; |
| 185 | } | 185 | } |
| 186 | 186 | ||
| 187 | if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) | ||
| 188 | return 0; | ||
| 189 | |||
| 187 | batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); | 190 | batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); |
| 188 | if (!batch) | 191 | if (!batch) |
| 189 | return 0; | 192 | return 0; |
| 190 | 193 | ||
| 194 | tlb->batch_count++; | ||
| 191 | batch->next = NULL; | 195 | batch->next = NULL; |
| 192 | batch->nr = 0; | 196 | batch->nr = 0; |
| 193 | batch->max = MAX_GATHER_BATCH; | 197 | batch->max = MAX_GATHER_BATCH; |
| @@ -216,6 +220,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) | |||
| 216 | tlb->local.nr = 0; | 220 | tlb->local.nr = 0; |
| 217 | tlb->local.max = ARRAY_SIZE(tlb->__pages); | 221 | tlb->local.max = ARRAY_SIZE(tlb->__pages); |
| 218 | tlb->active = &tlb->local; | 222 | tlb->active = &tlb->local; |
| 223 | tlb->batch_count = 0; | ||
| 219 | 224 | ||
| 220 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | 225 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE |
| 221 | tlb->batch = NULL; | 226 | tlb->batch = NULL; |
| @@ -3706,6 +3711,14 @@ retry: | |||
| 3706 | if (pmd_trans_huge(orig_pmd)) { | 3711 | if (pmd_trans_huge(orig_pmd)) { |
| 3707 | unsigned int dirty = flags & FAULT_FLAG_WRITE; | 3712 | unsigned int dirty = flags & FAULT_FLAG_WRITE; |
| 3708 | 3713 | ||
| 3714 | /* | ||
| 3715 | * If the pmd is splitting, return and retry the | ||
| 3716 | * the fault. Alternative: wait until the split | ||
| 3717 | * is done, and goto retry. | ||
| 3718 | */ | ||
| 3719 | if (pmd_trans_splitting(orig_pmd)) | ||
| 3720 | return 0; | ||
| 3721 | |||
| 3709 | if (pmd_numa(orig_pmd)) | 3722 | if (pmd_numa(orig_pmd)) |
| 3710 | return do_huge_pmd_numa_page(mm, vma, address, | 3723 | return do_huge_pmd_numa_page(mm, vma, address, |
| 3711 | orig_pmd, pmd); | 3724 | orig_pmd, pmd); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d1b315e98627..e2df1c1fb41f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -2132,7 +2132,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) | |||
| 2132 | */ | 2132 | */ |
| 2133 | 2133 | ||
| 2134 | /* lookup first element intersecting start-end */ | 2134 | /* lookup first element intersecting start-end */ |
| 2135 | /* Caller holds sp->mutex */ | 2135 | /* Caller holds sp->lock */ |
| 2136 | static struct sp_node * | 2136 | static struct sp_node * |
| 2137 | sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) | 2137 | sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) |
| 2138 | { | 2138 | { |
| @@ -2196,13 +2196,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) | |||
| 2196 | 2196 | ||
| 2197 | if (!sp->root.rb_node) | 2197 | if (!sp->root.rb_node) |
| 2198 | return NULL; | 2198 | return NULL; |
| 2199 | mutex_lock(&sp->mutex); | 2199 | spin_lock(&sp->lock); |
| 2200 | sn = sp_lookup(sp, idx, idx+1); | 2200 | sn = sp_lookup(sp, idx, idx+1); |
| 2201 | if (sn) { | 2201 | if (sn) { |
| 2202 | mpol_get(sn->policy); | 2202 | mpol_get(sn->policy); |
| 2203 | pol = sn->policy; | 2203 | pol = sn->policy; |
| 2204 | } | 2204 | } |
| 2205 | mutex_unlock(&sp->mutex); | 2205 | spin_unlock(&sp->lock); |
| 2206 | return pol; | 2206 | return pol; |
| 2207 | } | 2207 | } |
| 2208 | 2208 | ||
| @@ -2328,6 +2328,14 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n) | |||
| 2328 | sp_free(n); | 2328 | sp_free(n); |
| 2329 | } | 2329 | } |
| 2330 | 2330 | ||
| 2331 | static void sp_node_init(struct sp_node *node, unsigned long start, | ||
| 2332 | unsigned long end, struct mempolicy *pol) | ||
| 2333 | { | ||
| 2334 | node->start = start; | ||
| 2335 | node->end = end; | ||
| 2336 | node->policy = pol; | ||
| 2337 | } | ||
| 2338 | |||
| 2331 | static struct sp_node *sp_alloc(unsigned long start, unsigned long end, | 2339 | static struct sp_node *sp_alloc(unsigned long start, unsigned long end, |
| 2332 | struct mempolicy *pol) | 2340 | struct mempolicy *pol) |
| 2333 | { | 2341 | { |
| @@ -2344,10 +2352,7 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end, | |||
| 2344 | return NULL; | 2352 | return NULL; |
| 2345 | } | 2353 | } |
| 2346 | newpol->flags |= MPOL_F_SHARED; | 2354 | newpol->flags |= MPOL_F_SHARED; |
| 2347 | 2355 | sp_node_init(n, start, end, newpol); | |
| 2348 | n->start = start; | ||
| 2349 | n->end = end; | ||
| 2350 | n->policy = newpol; | ||
| 2351 | 2356 | ||
| 2352 | return n; | 2357 | return n; |
| 2353 | } | 2358 | } |
| @@ -2357,9 +2362,12 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, | |||
| 2357 | unsigned long end, struct sp_node *new) | 2362 | unsigned long end, struct sp_node *new) |
| 2358 | { | 2363 | { |
| 2359 | struct sp_node *n; | 2364 | struct sp_node *n; |
| 2365 | struct sp_node *n_new = NULL; | ||
| 2366 | struct mempolicy *mpol_new = NULL; | ||
| 2360 | int ret = 0; | 2367 | int ret = 0; |
| 2361 | 2368 | ||
| 2362 | mutex_lock(&sp->mutex); | 2369 | restart: |
| 2370 | spin_lock(&sp->lock); | ||
| 2363 | n = sp_lookup(sp, start, end); | 2371 | n = sp_lookup(sp, start, end); |
| 2364 | /* Take care of old policies in the same range. */ | 2372 | /* Take care of old policies in the same range. */ |
| 2365 | while (n && n->start < end) { | 2373 | while (n && n->start < end) { |
| @@ -2372,14 +2380,16 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, | |||
| 2372 | } else { | 2380 | } else { |
| 2373 | /* Old policy spanning whole new range. */ | 2381 | /* Old policy spanning whole new range. */ |
| 2374 | if (n->end > end) { | 2382 | if (n->end > end) { |
| 2375 | struct sp_node *new2; | 2383 | if (!n_new) |
| 2376 | new2 = sp_alloc(end, n->end, n->policy); | 2384 | goto alloc_new; |
| 2377 | if (!new2) { | 2385 | |
| 2378 | ret = -ENOMEM; | 2386 | *mpol_new = *n->policy; |
| 2379 | goto out; | 2387 | atomic_set(&mpol_new->refcnt, 1); |
| 2380 | } | 2388 | sp_node_init(n_new, n->end, end, mpol_new); |
| 2389 | sp_insert(sp, n_new); | ||
| 2381 | n->end = start; | 2390 | n->end = start; |
| 2382 | sp_insert(sp, new2); | 2391 | n_new = NULL; |
| 2392 | mpol_new = NULL; | ||
| 2383 | break; | 2393 | break; |
| 2384 | } else | 2394 | } else |
| 2385 | n->end = start; | 2395 | n->end = start; |
| @@ -2390,9 +2400,27 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, | |||
| 2390 | } | 2400 | } |
| 2391 | if (new) | 2401 | if (new) |
| 2392 | sp_insert(sp, new); | 2402 | sp_insert(sp, new); |
| 2393 | out: | 2403 | spin_unlock(&sp->lock); |
| 2394 | mutex_unlock(&sp->mutex); | 2404 | ret = 0; |
| 2405 | |||
| 2406 | err_out: | ||
| 2407 | if (mpol_new) | ||
| 2408 | mpol_put(mpol_new); | ||
| 2409 | if (n_new) | ||
| 2410 | kmem_cache_free(sn_cache, n_new); | ||
| 2411 | |||
| 2395 | return ret; | 2412 | return ret; |
| 2413 | |||
| 2414 | alloc_new: | ||
| 2415 | spin_unlock(&sp->lock); | ||
| 2416 | ret = -ENOMEM; | ||
| 2417 | n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL); | ||
| 2418 | if (!n_new) | ||
| 2419 | goto err_out; | ||
| 2420 | mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL); | ||
| 2421 | if (!mpol_new) | ||
| 2422 | goto err_out; | ||
| 2423 | goto restart; | ||
| 2396 | } | 2424 | } |
| 2397 | 2425 | ||
| 2398 | /** | 2426 | /** |
| @@ -2410,7 +2438,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) | |||
| 2410 | int ret; | 2438 | int ret; |
| 2411 | 2439 | ||
| 2412 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ | 2440 | sp->root = RB_ROOT; /* empty tree == default mempolicy */ |
| 2413 | mutex_init(&sp->mutex); | 2441 | spin_lock_init(&sp->lock); |
| 2414 | 2442 | ||
| 2415 | if (mpol) { | 2443 | if (mpol) { |
| 2416 | struct vm_area_struct pvma; | 2444 | struct vm_area_struct pvma; |
| @@ -2476,14 +2504,14 @@ void mpol_free_shared_policy(struct shared_policy *p) | |||
| 2476 | 2504 | ||
| 2477 | if (!p->root.rb_node) | 2505 | if (!p->root.rb_node) |
| 2478 | return; | 2506 | return; |
| 2479 | mutex_lock(&p->mutex); | 2507 | spin_lock(&p->lock); |
| 2480 | next = rb_first(&p->root); | 2508 | next = rb_first(&p->root); |
| 2481 | while (next) { | 2509 | while (next) { |
| 2482 | n = rb_entry(next, struct sp_node, nd); | 2510 | n = rb_entry(next, struct sp_node, nd); |
| 2483 | next = rb_next(&n->nd); | 2511 | next = rb_next(&n->nd); |
| 2484 | sp_delete(p, n); | 2512 | sp_delete(p, n); |
| 2485 | } | 2513 | } |
| 2486 | mutex_unlock(&p->mutex); | 2514 | spin_unlock(&p->lock); |
| 2487 | } | 2515 | } |
| 2488 | 2516 | ||
| 2489 | #ifdef CONFIG_NUMA_BALANCING | 2517 | #ifdef CONFIG_NUMA_BALANCING |
| @@ -2595,8 +2623,7 @@ void numa_default_policy(void) | |||
| 2595 | */ | 2623 | */ |
| 2596 | 2624 | ||
| 2597 | /* | 2625 | /* |
| 2598 | * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag | 2626 | * "local" is implemented internally by MPOL_PREFERRED with MPOL_F_LOCAL flag. |
| 2599 | * Used only for mpol_parse_str() and mpol_to_str() | ||
| 2600 | */ | 2627 | */ |
| 2601 | static const char * const policy_modes[] = | 2628 | static const char * const policy_modes[] = |
| 2602 | { | 2629 | { |
| @@ -2610,28 +2637,20 @@ static const char * const policy_modes[] = | |||
| 2610 | 2637 | ||
| 2611 | #ifdef CONFIG_TMPFS | 2638 | #ifdef CONFIG_TMPFS |
| 2612 | /** | 2639 | /** |
| 2613 | * mpol_parse_str - parse string to mempolicy | 2640 | * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option. |
| 2614 | * @str: string containing mempolicy to parse | 2641 | * @str: string containing mempolicy to parse |
| 2615 | * @mpol: pointer to struct mempolicy pointer, returned on success. | 2642 | * @mpol: pointer to struct mempolicy pointer, returned on success. |
| 2616 | * @no_context: flag whether to "contextualize" the mempolicy | ||
| 2617 | * | 2643 | * |
| 2618 | * Format of input: | 2644 | * Format of input: |
| 2619 | * <mode>[=<flags>][:<nodelist>] | 2645 | * <mode>[=<flags>][:<nodelist>] |
| 2620 | * | 2646 | * |
| 2621 | * if @no_context is true, save the input nodemask in w.user_nodemask in | ||
| 2622 | * the returned mempolicy. This will be used to "clone" the mempolicy in | ||
| 2623 | * a specific context [cpuset] at a later time. Used to parse tmpfs mpol | ||
| 2624 | * mount option. Note that if 'static' or 'relative' mode flags were | ||
| 2625 | * specified, the input nodemask will already have been saved. Saving | ||
| 2626 | * it again is redundant, but safe. | ||
| 2627 | * | ||
| 2628 | * On success, returns 0, else 1 | 2647 | * On success, returns 0, else 1 |
| 2629 | */ | 2648 | */ |
| 2630 | int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | 2649 | int mpol_parse_str(char *str, struct mempolicy **mpol) |
| 2631 | { | 2650 | { |
| 2632 | struct mempolicy *new = NULL; | 2651 | struct mempolicy *new = NULL; |
| 2633 | unsigned short mode; | 2652 | unsigned short mode; |
| 2634 | unsigned short uninitialized_var(mode_flags); | 2653 | unsigned short mode_flags; |
| 2635 | nodemask_t nodes; | 2654 | nodemask_t nodes; |
| 2636 | char *nodelist = strchr(str, ':'); | 2655 | char *nodelist = strchr(str, ':'); |
| 2637 | char *flags = strchr(str, '='); | 2656 | char *flags = strchr(str, '='); |
| @@ -2719,24 +2738,23 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) | |||
| 2719 | if (IS_ERR(new)) | 2738 | if (IS_ERR(new)) |
| 2720 | goto out; | 2739 | goto out; |
| 2721 | 2740 | ||
| 2722 | if (no_context) { | 2741 | /* |
| 2723 | /* save for contextualization */ | 2742 | * Save nodes for mpol_to_str() to show the tmpfs mount options |
| 2724 | new->w.user_nodemask = nodes; | 2743 | * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo. |
| 2725 | } else { | 2744 | */ |
| 2726 | int ret; | 2745 | if (mode != MPOL_PREFERRED) |
| 2727 | NODEMASK_SCRATCH(scratch); | 2746 | new->v.nodes = nodes; |
| 2728 | if (scratch) { | 2747 | else if (nodelist) |
| 2729 | task_lock(current); | 2748 | new->v.preferred_node = first_node(nodes); |
| 2730 | ret = mpol_set_nodemask(new, &nodes, scratch); | 2749 | else |
| 2731 | task_unlock(current); | 2750 | new->flags |= MPOL_F_LOCAL; |
| 2732 | } else | 2751 | |
| 2733 | ret = -ENOMEM; | 2752 | /* |
| 2734 | NODEMASK_SCRATCH_FREE(scratch); | 2753 | * Save nodes for contextualization: this will be used to "clone" |
| 2735 | if (ret) { | 2754 | * the mempolicy in a specific context [cpuset] at a later time. |
| 2736 | mpol_put(new); | 2755 | */ |
| 2737 | goto out; | 2756 | new->w.user_nodemask = nodes; |
| 2738 | } | 2757 | |
| 2739 | } | ||
| 2740 | err = 0; | 2758 | err = 0; |
| 2741 | 2759 | ||
| 2742 | out: | 2760 | out: |
| @@ -2756,13 +2774,12 @@ out: | |||
| 2756 | * @buffer: to contain formatted mempolicy string | 2774 | * @buffer: to contain formatted mempolicy string |
| 2757 | * @maxlen: length of @buffer | 2775 | * @maxlen: length of @buffer |
| 2758 | * @pol: pointer to mempolicy to be formatted | 2776 | * @pol: pointer to mempolicy to be formatted |
| 2759 | * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask | ||
| 2760 | * | 2777 | * |
| 2761 | * Convert a mempolicy into a string. | 2778 | * Convert a mempolicy into a string. |
| 2762 | * Returns the number of characters in buffer (if positive) | 2779 | * Returns the number of characters in buffer (if positive) |
| 2763 | * or an error (negative) | 2780 | * or an error (negative) |
| 2764 | */ | 2781 | */ |
| 2765 | int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | 2782 | int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) |
| 2766 | { | 2783 | { |
| 2767 | char *p = buffer; | 2784 | char *p = buffer; |
| 2768 | int l; | 2785 | int l; |
| @@ -2788,7 +2805,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
| 2788 | case MPOL_PREFERRED: | 2805 | case MPOL_PREFERRED: |
| 2789 | nodes_clear(nodes); | 2806 | nodes_clear(nodes); |
| 2790 | if (flags & MPOL_F_LOCAL) | 2807 | if (flags & MPOL_F_LOCAL) |
| 2791 | mode = MPOL_LOCAL; /* pseudo-policy */ | 2808 | mode = MPOL_LOCAL; |
| 2792 | else | 2809 | else |
| 2793 | node_set(pol->v.preferred_node, nodes); | 2810 | node_set(pol->v.preferred_node, nodes); |
| 2794 | break; | 2811 | break; |
| @@ -2796,10 +2813,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context) | |||
| 2796 | case MPOL_BIND: | 2813 | case MPOL_BIND: |
| 2797 | /* Fall through */ | 2814 | /* Fall through */ |
| 2798 | case MPOL_INTERLEAVE: | 2815 | case MPOL_INTERLEAVE: |
| 2799 | if (no_context) | 2816 | nodes = pol->v.nodes; |
| 2800 | nodes = pol->w.user_nodemask; | ||
| 2801 | else | ||
| 2802 | nodes = pol->v.nodes; | ||
| 2803 | break; | 2817 | break; |
| 2804 | 2818 | ||
| 2805 | default: | 2819 | default: |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4ba5e37127fc..bc6cc0e913bd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -221,11 +221,6 @@ EXPORT_SYMBOL(nr_online_nodes); | |||
| 221 | 221 | ||
| 222 | int page_group_by_mobility_disabled __read_mostly; | 222 | int page_group_by_mobility_disabled __read_mostly; |
| 223 | 223 | ||
| 224 | /* | ||
| 225 | * NOTE: | ||
| 226 | * Don't use set_pageblock_migratetype(page, MIGRATE_ISOLATE) directly. | ||
| 227 | * Instead, use {un}set_pageblock_isolate. | ||
| 228 | */ | ||
| 229 | void set_pageblock_migratetype(struct page *page, int migratetype) | 224 | void set_pageblock_migratetype(struct page *page, int migratetype) |
| 230 | { | 225 | { |
| 231 | 226 | ||
| @@ -1655,20 +1650,6 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
| 1655 | return true; | 1650 | return true; |
| 1656 | } | 1651 | } |
| 1657 | 1652 | ||
| 1658 | #ifdef CONFIG_MEMORY_ISOLATION | ||
| 1659 | static inline unsigned long nr_zone_isolate_freepages(struct zone *zone) | ||
| 1660 | { | ||
| 1661 | if (unlikely(zone->nr_pageblock_isolate)) | ||
| 1662 | return zone->nr_pageblock_isolate * pageblock_nr_pages; | ||
| 1663 | return 0; | ||
| 1664 | } | ||
| 1665 | #else | ||
| 1666 | static inline unsigned long nr_zone_isolate_freepages(struct zone *zone) | ||
| 1667 | { | ||
| 1668 | return 0; | ||
| 1669 | } | ||
| 1670 | #endif | ||
| 1671 | |||
| 1672 | bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, | 1653 | bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, |
| 1673 | int classzone_idx, int alloc_flags) | 1654 | int classzone_idx, int alloc_flags) |
| 1674 | { | 1655 | { |
| @@ -1684,14 +1665,6 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, | |||
| 1684 | if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) | 1665 | if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) |
| 1685 | free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); | 1666 | free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); |
| 1686 | 1667 | ||
| 1687 | /* | ||
| 1688 | * If the zone has MIGRATE_ISOLATE type free pages, we should consider | ||
| 1689 | * it. nr_zone_isolate_freepages is never accurate so kswapd might not | ||
| 1690 | * sleep although it could do so. But this is more desirable for memory | ||
| 1691 | * hotplug than sleeping which can cause a livelock in the direct | ||
| 1692 | * reclaim path. | ||
| 1693 | */ | ||
| 1694 | free_pages -= nr_zone_isolate_freepages(z); | ||
| 1695 | return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, | 1668 | return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, |
| 1696 | free_pages); | 1669 | free_pages); |
| 1697 | } | 1670 | } |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 9d2264ea4606..383bdbb98b04 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
| @@ -8,28 +8,6 @@ | |||
| 8 | #include <linux/memory.h> | 8 | #include <linux/memory.h> |
| 9 | #include "internal.h" | 9 | #include "internal.h" |
| 10 | 10 | ||
| 11 | /* called while holding zone->lock */ | ||
| 12 | static void set_pageblock_isolate(struct page *page) | ||
| 13 | { | ||
| 14 | if (get_pageblock_migratetype(page) == MIGRATE_ISOLATE) | ||
| 15 | return; | ||
| 16 | |||
| 17 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | ||
| 18 | page_zone(page)->nr_pageblock_isolate++; | ||
| 19 | } | ||
| 20 | |||
| 21 | /* called while holding zone->lock */ | ||
| 22 | static void restore_pageblock_isolate(struct page *page, int migratetype) | ||
| 23 | { | ||
| 24 | struct zone *zone = page_zone(page); | ||
| 25 | if (WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) | ||
| 26 | return; | ||
| 27 | |||
| 28 | BUG_ON(zone->nr_pageblock_isolate <= 0); | ||
| 29 | set_pageblock_migratetype(page, migratetype); | ||
| 30 | zone->nr_pageblock_isolate--; | ||
| 31 | } | ||
| 32 | |||
| 33 | int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages) | 11 | int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages) |
| 34 | { | 12 | { |
| 35 | struct zone *zone; | 13 | struct zone *zone; |
| @@ -80,7 +58,7 @@ out: | |||
| 80 | unsigned long nr_pages; | 58 | unsigned long nr_pages; |
| 81 | int migratetype = get_pageblock_migratetype(page); | 59 | int migratetype = get_pageblock_migratetype(page); |
| 82 | 60 | ||
| 83 | set_pageblock_isolate(page); | 61 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); |
| 84 | nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); | 62 | nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); |
| 85 | 63 | ||
| 86 | __mod_zone_freepage_state(zone, -nr_pages, migratetype); | 64 | __mod_zone_freepage_state(zone, -nr_pages, migratetype); |
| @@ -103,7 +81,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) | |||
| 103 | goto out; | 81 | goto out; |
| 104 | nr_pages = move_freepages_block(zone, page, migratetype); | 82 | nr_pages = move_freepages_block(zone, page, migratetype); |
| 105 | __mod_zone_freepage_state(zone, nr_pages, migratetype); | 83 | __mod_zone_freepage_state(zone, nr_pages, migratetype); |
| 106 | restore_pageblock_isolate(page, migratetype); | 84 | set_pageblock_migratetype(page, migratetype); |
| 107 | out: | 85 | out: |
| 108 | spin_unlock_irqrestore(&zone->lock, flags); | 86 | spin_unlock_irqrestore(&zone->lock, flags); |
| 109 | } | 87 | } |
diff --git a/mm/shmem.c b/mm/shmem.c index 5c90d84c2b02..5dd56f6efdbd 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
| @@ -889,7 +889,7 @@ static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) | |||
| 889 | if (!mpol || mpol->mode == MPOL_DEFAULT) | 889 | if (!mpol || mpol->mode == MPOL_DEFAULT) |
| 890 | return; /* show nothing */ | 890 | return; /* show nothing */ |
| 891 | 891 | ||
| 892 | mpol_to_str(buffer, sizeof(buffer), mpol, 1); | 892 | mpol_to_str(buffer, sizeof(buffer), mpol); |
| 893 | 893 | ||
| 894 | seq_printf(seq, ",mpol=%s", buffer); | 894 | seq_printf(seq, ",mpol=%s", buffer); |
| 895 | } | 895 | } |
| @@ -2463,7 +2463,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo, | |||
| 2463 | if (!gid_valid(sbinfo->gid)) | 2463 | if (!gid_valid(sbinfo->gid)) |
| 2464 | goto bad_val; | 2464 | goto bad_val; |
| 2465 | } else if (!strcmp(this_char,"mpol")) { | 2465 | } else if (!strcmp(this_char,"mpol")) { |
| 2466 | if (mpol_parse_str(value, &sbinfo->mpol, 1)) | 2466 | if (mpol_parse_str(value, &sbinfo->mpol)) |
| 2467 | goto bad_val; | 2467 | goto bad_val; |
| 2468 | } else { | 2468 | } else { |
| 2469 | printk(KERN_ERR "tmpfs: Bad mount option %s\n", | 2469 | printk(KERN_ERR "tmpfs: Bad mount option %s\n", |
diff --git a/mm/vmscan.c b/mm/vmscan.c index adc7e9058181..196709f5ee58 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -2452,12 +2452,16 @@ static bool zone_balanced(struct zone *zone, int order, | |||
| 2452 | } | 2452 | } |
| 2453 | 2453 | ||
| 2454 | /* | 2454 | /* |
| 2455 | * pgdat_balanced is used when checking if a node is balanced for high-order | 2455 | * pgdat_balanced() is used when checking if a node is balanced. |
| 2456 | * allocations. Only zones that meet watermarks and are in a zone allowed | 2456 | * |
| 2457 | * by the callers classzone_idx are added to balanced_pages. The total of | 2457 | * For order-0, all zones must be balanced! |
| 2458 | * balanced pages must be at least 25% of the zones allowed by classzone_idx | 2458 | * |
| 2459 | * for the node to be considered balanced. Forcing all zones to be balanced | 2459 | * For high-order allocations only zones that meet watermarks and are in a |
| 2460 | * for high orders can cause excessive reclaim when there are imbalanced zones. | 2460 | * zone allowed by the callers classzone_idx are added to balanced_pages. The |
| 2461 | * total of balanced pages must be at least 25% of the zones allowed by | ||
| 2462 | * classzone_idx for the node to be considered balanced. Forcing all zones to | ||
| 2463 | * be balanced for high orders can cause excessive reclaim when there are | ||
| 2464 | * imbalanced zones. | ||
| 2461 | * The choice of 25% is due to | 2465 | * The choice of 25% is due to |
| 2462 | * o a 16M DMA zone that is balanced will not balance a zone on any | 2466 | * o a 16M DMA zone that is balanced will not balance a zone on any |
| 2463 | * reasonable sized machine | 2467 | * reasonable sized machine |
| @@ -2467,17 +2471,43 @@ static bool zone_balanced(struct zone *zone, int order, | |||
| 2467 | * Similarly, on x86-64 the Normal zone would need to be at least 1G | 2471 | * Similarly, on x86-64 the Normal zone would need to be at least 1G |
| 2468 | * to balance a node on its own. These seemed like reasonable ratios. | 2472 | * to balance a node on its own. These seemed like reasonable ratios. |
| 2469 | */ | 2473 | */ |
| 2470 | static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, | 2474 | static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx) |
| 2471 | int classzone_idx) | ||
| 2472 | { | 2475 | { |
| 2473 | unsigned long present_pages = 0; | 2476 | unsigned long present_pages = 0; |
| 2477 | unsigned long balanced_pages = 0; | ||
| 2474 | int i; | 2478 | int i; |
| 2475 | 2479 | ||
| 2476 | for (i = 0; i <= classzone_idx; i++) | 2480 | /* Check the watermark levels */ |
| 2477 | present_pages += pgdat->node_zones[i].present_pages; | 2481 | for (i = 0; i <= classzone_idx; i++) { |
| 2482 | struct zone *zone = pgdat->node_zones + i; | ||
| 2478 | 2483 | ||
| 2479 | /* A special case here: if zone has no page, we think it's balanced */ | 2484 | if (!populated_zone(zone)) |
| 2480 | return balanced_pages >= (present_pages >> 2); | 2485 | continue; |
| 2486 | |||
| 2487 | present_pages += zone->present_pages; | ||
| 2488 | |||
| 2489 | /* | ||
| 2490 | * A special case here: | ||
| 2491 | * | ||
| 2492 | * balance_pgdat() skips over all_unreclaimable after | ||
| 2493 | * DEF_PRIORITY. Effectively, it considers them balanced so | ||
| 2494 | * they must be considered balanced here as well! | ||
| 2495 | */ | ||
| 2496 | if (zone->all_unreclaimable) { | ||
| 2497 | balanced_pages += zone->present_pages; | ||
| 2498 | continue; | ||
| 2499 | } | ||
| 2500 | |||
| 2501 | if (zone_balanced(zone, order, 0, i)) | ||
| 2502 | balanced_pages += zone->present_pages; | ||
| 2503 | else if (!order) | ||
| 2504 | return false; | ||
| 2505 | } | ||
| 2506 | |||
| 2507 | if (order) | ||
| 2508 | return balanced_pages >= (present_pages >> 2); | ||
| 2509 | else | ||
| 2510 | return true; | ||
| 2481 | } | 2511 | } |
| 2482 | 2512 | ||
| 2483 | /* | 2513 | /* |
| @@ -2489,10 +2519,6 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, | |||
| 2489 | static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, | 2519 | static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, |
| 2490 | int classzone_idx) | 2520 | int classzone_idx) |
| 2491 | { | 2521 | { |
| 2492 | int i; | ||
| 2493 | unsigned long balanced = 0; | ||
| 2494 | bool all_zones_ok = true; | ||
| 2495 | |||
| 2496 | /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ | 2522 | /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ |
| 2497 | if (remaining) | 2523 | if (remaining) |
| 2498 | return false; | 2524 | return false; |
| @@ -2511,39 +2537,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, | |||
| 2511 | return false; | 2537 | return false; |
| 2512 | } | 2538 | } |
| 2513 | 2539 | ||
| 2514 | /* Check the watermark levels */ | 2540 | return pgdat_balanced(pgdat, order, classzone_idx); |
| 2515 | for (i = 0; i <= classzone_idx; i++) { | ||
| 2516 | struct zone *zone = pgdat->node_zones + i; | ||
| 2517 | |||
| 2518 | if (!populated_zone(zone)) | ||
| 2519 | continue; | ||
| 2520 | |||
| 2521 | /* | ||
| 2522 | * balance_pgdat() skips over all_unreclaimable after | ||
| 2523 | * DEF_PRIORITY. Effectively, it considers them balanced so | ||
| 2524 | * they must be considered balanced here as well if kswapd | ||
| 2525 | * is to sleep | ||
| 2526 | */ | ||
| 2527 | if (zone->all_unreclaimable) { | ||
| 2528 | balanced += zone->present_pages; | ||
| 2529 | continue; | ||
| 2530 | } | ||
| 2531 | |||
| 2532 | if (!zone_balanced(zone, order, 0, i)) | ||
| 2533 | all_zones_ok = false; | ||
| 2534 | else | ||
| 2535 | balanced += zone->present_pages; | ||
| 2536 | } | ||
| 2537 | |||
| 2538 | /* | ||
| 2539 | * For high-order requests, the balanced zones must contain at least | ||
| 2540 | * 25% of the nodes pages for kswapd to sleep. For order-0, all zones | ||
| 2541 | * must be balanced | ||
| 2542 | */ | ||
| 2543 | if (order) | ||
| 2544 | return pgdat_balanced(pgdat, balanced, classzone_idx); | ||
| 2545 | else | ||
| 2546 | return all_zones_ok; | ||
| 2547 | } | 2541 | } |
| 2548 | 2542 | ||
| 2549 | /* | 2543 | /* |
| @@ -2571,7 +2565,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
| 2571 | int *classzone_idx) | 2565 | int *classzone_idx) |
| 2572 | { | 2566 | { |
| 2573 | struct zone *unbalanced_zone; | 2567 | struct zone *unbalanced_zone; |
| 2574 | unsigned long balanced; | ||
| 2575 | int i; | 2568 | int i; |
| 2576 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2569 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
| 2577 | unsigned long total_scanned; | 2570 | unsigned long total_scanned; |
| @@ -2605,7 +2598,6 @@ loop_again: | |||
| 2605 | int has_under_min_watermark_zone = 0; | 2598 | int has_under_min_watermark_zone = 0; |
| 2606 | 2599 | ||
| 2607 | unbalanced_zone = NULL; | 2600 | unbalanced_zone = NULL; |
| 2608 | balanced = 0; | ||
| 2609 | 2601 | ||
| 2610 | /* | 2602 | /* |
| 2611 | * Scan in the highmem->dma direction for the highest | 2603 | * Scan in the highmem->dma direction for the highest |
| @@ -2761,8 +2753,6 @@ loop_again: | |||
| 2761 | * speculatively avoid congestion waits | 2753 | * speculatively avoid congestion waits |
| 2762 | */ | 2754 | */ |
| 2763 | zone_clear_flag(zone, ZONE_CONGESTED); | 2755 | zone_clear_flag(zone, ZONE_CONGESTED); |
| 2764 | if (i <= *classzone_idx) | ||
| 2765 | balanced += zone->present_pages; | ||
| 2766 | } | 2756 | } |
| 2767 | 2757 | ||
| 2768 | } | 2758 | } |
| @@ -2776,7 +2766,7 @@ loop_again: | |||
| 2776 | pfmemalloc_watermark_ok(pgdat)) | 2766 | pfmemalloc_watermark_ok(pgdat)) |
| 2777 | wake_up(&pgdat->pfmemalloc_wait); | 2767 | wake_up(&pgdat->pfmemalloc_wait); |
| 2778 | 2768 | ||
| 2779 | if (!unbalanced_zone || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) | 2769 | if (pgdat_balanced(pgdat, order, *classzone_idx)) |
| 2780 | break; /* kswapd: all done */ | 2770 | break; /* kswapd: all done */ |
| 2781 | /* | 2771 | /* |
| 2782 | * OK, kswapd is getting into trouble. Take a nap, then take | 2772 | * OK, kswapd is getting into trouble. Take a nap, then take |
| @@ -2785,7 +2775,7 @@ loop_again: | |||
| 2785 | if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { | 2775 | if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { |
| 2786 | if (has_under_min_watermark_zone) | 2776 | if (has_under_min_watermark_zone) |
| 2787 | count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); | 2777 | count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); |
| 2788 | else | 2778 | else if (unbalanced_zone) |
| 2789 | wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10); | 2779 | wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10); |
| 2790 | } | 2780 | } |
| 2791 | 2781 | ||
| @@ -2800,12 +2790,7 @@ loop_again: | |||
| 2800 | } while (--sc.priority >= 0); | 2790 | } while (--sc.priority >= 0); |
| 2801 | out: | 2791 | out: |
| 2802 | 2792 | ||
| 2803 | /* | 2793 | if (!pgdat_balanced(pgdat, order, *classzone_idx)) { |
| 2804 | * order-0: All zones must meet high watermark for a balanced node | ||
| 2805 | * high-order: Balanced zones must make up at least 25% of the node | ||
| 2806 | * for the node to be balanced | ||
| 2807 | */ | ||
| 2808 | if (unbalanced_zone && (!order || !pgdat_balanced(pgdat, balanced, *classzone_idx))) { | ||
| 2809 | cond_resched(); | 2794 | cond_resched(); |
| 2810 | 2795 | ||
| 2811 | try_to_freeze(); | 2796 | try_to_freeze(); |
| @@ -3137,8 +3122,8 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |||
| 3137 | not required for correctness. So if the last cpu in a node goes | 3122 | not required for correctness. So if the last cpu in a node goes |
| 3138 | away, we get changed to run anywhere: as the first one comes back, | 3123 | away, we get changed to run anywhere: as the first one comes back, |
| 3139 | restore their cpu bindings. */ | 3124 | restore their cpu bindings. */ |
| 3140 | static int __devinit cpu_callback(struct notifier_block *nfb, | 3125 | static int cpu_callback(struct notifier_block *nfb, unsigned long action, |
| 3141 | unsigned long action, void *hcpu) | 3126 | void *hcpu) |
| 3142 | { | 3127 | { |
| 3143 | int nid; | 3128 | int nid; |
| 3144 | 3129 | ||
