diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-05-26 07:48:30 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-05-26 07:48:39 -0400 |
commit | 1102c660dd35725a11c7ca9365c237f2f42f6b30 (patch) | |
tree | cd32d3053b30050182218e0d36b4aed7459c48de /mm/page_alloc.c | |
parent | 6e9101aeec39961308176e0f59e73ac5d37d243a (diff) | |
parent | 4db70f73e56961b9bcdfd0c36c62847a18b7dbb5 (diff) |
Merge branch 'linus' into perf/urgent
Merge reason: Linus applied an overlapping commit:
5f2e8e2b0bf0: kernel/watchdog.c: Use proper ANSI C prototypes
So merge it in to make sure we can iterate the file without conflicts.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 124 |
1 files changed, 79 insertions, 45 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3f8bce264df6..2a00f17c3bf4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/pagevec.h> | 30 | #include <linux/pagevec.h> |
31 | #include <linux/blkdev.h> | 31 | #include <linux/blkdev.h> |
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | #include <linux/ratelimit.h> | ||
33 | #include <linux/oom.h> | 34 | #include <linux/oom.h> |
34 | #include <linux/notifier.h> | 35 | #include <linux/notifier.h> |
35 | #include <linux/topology.h> | 36 | #include <linux/topology.h> |
@@ -39,6 +40,7 @@ | |||
39 | #include <linux/memory_hotplug.h> | 40 | #include <linux/memory_hotplug.h> |
40 | #include <linux/nodemask.h> | 41 | #include <linux/nodemask.h> |
41 | #include <linux/vmalloc.h> | 42 | #include <linux/vmalloc.h> |
43 | #include <linux/vmstat.h> | ||
42 | #include <linux/mempolicy.h> | 44 | #include <linux/mempolicy.h> |
43 | #include <linux/stop_machine.h> | 45 | #include <linux/stop_machine.h> |
44 | #include <linux/sort.h> | 46 | #include <linux/sort.h> |
@@ -54,6 +56,7 @@ | |||
54 | #include <trace/events/kmem.h> | 56 | #include <trace/events/kmem.h> |
55 | #include <linux/ftrace_event.h> | 57 | #include <linux/ftrace_event.h> |
56 | #include <linux/memcontrol.h> | 58 | #include <linux/memcontrol.h> |
59 | #include <linux/prefetch.h> | ||
57 | 60 | ||
58 | #include <asm/tlbflush.h> | 61 | #include <asm/tlbflush.h> |
59 | #include <asm/div64.h> | 62 | #include <asm/div64.h> |
@@ -1734,6 +1737,45 @@ static inline bool should_suppress_show_mem(void) | |||
1734 | return ret; | 1737 | return ret; |
1735 | } | 1738 | } |
1736 | 1739 | ||
1740 | static DEFINE_RATELIMIT_STATE(nopage_rs, | ||
1741 | DEFAULT_RATELIMIT_INTERVAL, | ||
1742 | DEFAULT_RATELIMIT_BURST); | ||
1743 | |||
1744 | void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) | ||
1745 | { | ||
1746 | va_list args; | ||
1747 | unsigned int filter = SHOW_MEM_FILTER_NODES; | ||
1748 | |||
1749 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) | ||
1750 | return; | ||
1751 | |||
1752 | /* | ||
1753 | * This documents exceptions given to allocations in certain | ||
1754 | * contexts that are allowed to allocate outside current's set | ||
1755 | * of allowed nodes. | ||
1756 | */ | ||
1757 | if (!(gfp_mask & __GFP_NOMEMALLOC)) | ||
1758 | if (test_thread_flag(TIF_MEMDIE) || | ||
1759 | (current->flags & (PF_MEMALLOC | PF_EXITING))) | ||
1760 | filter &= ~SHOW_MEM_FILTER_NODES; | ||
1761 | if (in_interrupt() || !(gfp_mask & __GFP_WAIT)) | ||
1762 | filter &= ~SHOW_MEM_FILTER_NODES; | ||
1763 | |||
1764 | if (fmt) { | ||
1765 | printk(KERN_WARNING); | ||
1766 | va_start(args, fmt); | ||
1767 | vprintk(fmt, args); | ||
1768 | va_end(args); | ||
1769 | } | ||
1770 | |||
1771 | pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n", | ||
1772 | current->comm, order, gfp_mask); | ||
1773 | |||
1774 | dump_stack(); | ||
1775 | if (!should_suppress_show_mem()) | ||
1776 | show_mem(filter); | ||
1777 | } | ||
1778 | |||
1737 | static inline int | 1779 | static inline int |
1738 | should_alloc_retry(gfp_t gfp_mask, unsigned int order, | 1780 | should_alloc_retry(gfp_t gfp_mask, unsigned int order, |
1739 | unsigned long pages_reclaimed) | 1781 | unsigned long pages_reclaimed) |
@@ -2064,6 +2106,7 @@ restart: | |||
2064 | first_zones_zonelist(zonelist, high_zoneidx, NULL, | 2106 | first_zones_zonelist(zonelist, high_zoneidx, NULL, |
2065 | &preferred_zone); | 2107 | &preferred_zone); |
2066 | 2108 | ||
2109 | rebalance: | ||
2067 | /* This is the last chance, in general, before the goto nopage. */ | 2110 | /* This is the last chance, in general, before the goto nopage. */ |
2068 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, | 2111 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, |
2069 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, | 2112 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, |
@@ -2071,7 +2114,6 @@ restart: | |||
2071 | if (page) | 2114 | if (page) |
2072 | goto got_pg; | 2115 | goto got_pg; |
2073 | 2116 | ||
2074 | rebalance: | ||
2075 | /* Allocate without watermarks if the context allows */ | 2117 | /* Allocate without watermarks if the context allows */ |
2076 | if (alloc_flags & ALLOC_NO_WATERMARKS) { | 2118 | if (alloc_flags & ALLOC_NO_WATERMARKS) { |
2077 | page = __alloc_pages_high_priority(gfp_mask, order, | 2119 | page = __alloc_pages_high_priority(gfp_mask, order, |
@@ -2105,7 +2147,7 @@ rebalance: | |||
2105 | sync_migration); | 2147 | sync_migration); |
2106 | if (page) | 2148 | if (page) |
2107 | goto got_pg; | 2149 | goto got_pg; |
2108 | sync_migration = !(gfp_mask & __GFP_NO_KSWAPD); | 2150 | sync_migration = true; |
2109 | 2151 | ||
2110 | /* Try direct reclaim and then allocating */ | 2152 | /* Try direct reclaim and then allocating */ |
2111 | page = __alloc_pages_direct_reclaim(gfp_mask, order, | 2153 | page = __alloc_pages_direct_reclaim(gfp_mask, order, |
@@ -2176,27 +2218,7 @@ rebalance: | |||
2176 | } | 2218 | } |
2177 | 2219 | ||
2178 | nopage: | 2220 | nopage: |
2179 | if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { | 2221 | warn_alloc_failed(gfp_mask, order, NULL); |
2180 | unsigned int filter = SHOW_MEM_FILTER_NODES; | ||
2181 | |||
2182 | /* | ||
2183 | * This documents exceptions given to allocations in certain | ||
2184 | * contexts that are allowed to allocate outside current's set | ||
2185 | * of allowed nodes. | ||
2186 | */ | ||
2187 | if (!(gfp_mask & __GFP_NOMEMALLOC)) | ||
2188 | if (test_thread_flag(TIF_MEMDIE) || | ||
2189 | (current->flags & (PF_MEMALLOC | PF_EXITING))) | ||
2190 | filter &= ~SHOW_MEM_FILTER_NODES; | ||
2191 | if (in_interrupt() || !wait) | ||
2192 | filter &= ~SHOW_MEM_FILTER_NODES; | ||
2193 | |||
2194 | pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n", | ||
2195 | current->comm, order, gfp_mask); | ||
2196 | dump_stack(); | ||
2197 | if (!should_suppress_show_mem()) | ||
2198 | show_mem(filter); | ||
2199 | } | ||
2200 | return page; | 2222 | return page; |
2201 | got_pg: | 2223 | got_pg: |
2202 | if (kmemcheck_enabled) | 2224 | if (kmemcheck_enabled) |
@@ -2225,6 +2247,10 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2225 | 2247 | ||
2226 | if (should_fail_alloc_page(gfp_mask, order)) | 2248 | if (should_fail_alloc_page(gfp_mask, order)) |
2227 | return NULL; | 2249 | return NULL; |
2250 | #ifndef CONFIG_ZONE_DMA | ||
2251 | if (WARN_ON_ONCE(gfp_mask & __GFP_DMA)) | ||
2252 | return NULL; | ||
2253 | #endif | ||
2228 | 2254 | ||
2229 | /* | 2255 | /* |
2230 | * Check the zones suitable for the gfp_mask contain at least one | 2256 | * Check the zones suitable for the gfp_mask contain at least one |
@@ -2472,10 +2498,10 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
2472 | #endif | 2498 | #endif |
2473 | 2499 | ||
2474 | /* | 2500 | /* |
2475 | * Determine whether the zone's node should be displayed or not, depending on | 2501 | * Determine whether the node should be displayed or not, depending on whether |
2476 | * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas(). | 2502 | * SHOW_MEM_FILTER_NODES was passed to show_free_areas(). |
2477 | */ | 2503 | */ |
2478 | static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone) | 2504 | bool skip_free_areas_node(unsigned int flags, int nid) |
2479 | { | 2505 | { |
2480 | bool ret = false; | 2506 | bool ret = false; |
2481 | 2507 | ||
@@ -2483,8 +2509,7 @@ static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone) | |||
2483 | goto out; | 2509 | goto out; |
2484 | 2510 | ||
2485 | get_mems_allowed(); | 2511 | get_mems_allowed(); |
2486 | ret = !node_isset(zone->zone_pgdat->node_id, | 2512 | ret = !node_isset(nid, cpuset_current_mems_allowed); |
2487 | cpuset_current_mems_allowed); | ||
2488 | put_mems_allowed(); | 2513 | put_mems_allowed(); |
2489 | out: | 2514 | out: |
2490 | return ret; | 2515 | return ret; |
@@ -2499,13 +2524,13 @@ out: | |||
2499 | * Suppresses nodes that are not allowed by current's cpuset if | 2524 | * Suppresses nodes that are not allowed by current's cpuset if |
2500 | * SHOW_MEM_FILTER_NODES is passed. | 2525 | * SHOW_MEM_FILTER_NODES is passed. |
2501 | */ | 2526 | */ |
2502 | void __show_free_areas(unsigned int filter) | 2527 | void show_free_areas(unsigned int filter) |
2503 | { | 2528 | { |
2504 | int cpu; | 2529 | int cpu; |
2505 | struct zone *zone; | 2530 | struct zone *zone; |
2506 | 2531 | ||
2507 | for_each_populated_zone(zone) { | 2532 | for_each_populated_zone(zone) { |
2508 | if (skip_free_areas_zone(filter, zone)) | 2533 | if (skip_free_areas_node(filter, zone_to_nid(zone))) |
2509 | continue; | 2534 | continue; |
2510 | show_node(zone); | 2535 | show_node(zone); |
2511 | printk("%s per-cpu:\n", zone->name); | 2536 | printk("%s per-cpu:\n", zone->name); |
@@ -2548,7 +2573,7 @@ void __show_free_areas(unsigned int filter) | |||
2548 | for_each_populated_zone(zone) { | 2573 | for_each_populated_zone(zone) { |
2549 | int i; | 2574 | int i; |
2550 | 2575 | ||
2551 | if (skip_free_areas_zone(filter, zone)) | 2576 | if (skip_free_areas_node(filter, zone_to_nid(zone))) |
2552 | continue; | 2577 | continue; |
2553 | show_node(zone); | 2578 | show_node(zone); |
2554 | printk("%s" | 2579 | printk("%s" |
@@ -2617,7 +2642,7 @@ void __show_free_areas(unsigned int filter) | |||
2617 | for_each_populated_zone(zone) { | 2642 | for_each_populated_zone(zone) { |
2618 | unsigned long nr[MAX_ORDER], flags, order, total = 0; | 2643 | unsigned long nr[MAX_ORDER], flags, order, total = 0; |
2619 | 2644 | ||
2620 | if (skip_free_areas_zone(filter, zone)) | 2645 | if (skip_free_areas_node(filter, zone_to_nid(zone))) |
2621 | continue; | 2646 | continue; |
2622 | show_node(zone); | 2647 | show_node(zone); |
2623 | printk("%s: ", zone->name); | 2648 | printk("%s: ", zone->name); |
@@ -2638,11 +2663,6 @@ void __show_free_areas(unsigned int filter) | |||
2638 | show_swap_cache_info(); | 2663 | show_swap_cache_info(); |
2639 | } | 2664 | } |
2640 | 2665 | ||
2641 | void show_free_areas(void) | ||
2642 | { | ||
2643 | __show_free_areas(0); | ||
2644 | } | ||
2645 | |||
2646 | static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) | 2666 | static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) |
2647 | { | 2667 | { |
2648 | zoneref->zone = zone; | 2668 | zoneref->zone = zone; |
@@ -3313,6 +3333,20 @@ static inline unsigned long wait_table_bits(unsigned long size) | |||
3313 | #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) | 3333 | #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) |
3314 | 3334 | ||
3315 | /* | 3335 | /* |
3336 | * Check if a pageblock contains reserved pages | ||
3337 | */ | ||
3338 | static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn) | ||
3339 | { | ||
3340 | unsigned long pfn; | ||
3341 | |||
3342 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | ||
3343 | if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn))) | ||
3344 | return 1; | ||
3345 | } | ||
3346 | return 0; | ||
3347 | } | ||
3348 | |||
3349 | /* | ||
3316 | * Mark a number of pageblocks as MIGRATE_RESERVE. The number | 3350 | * Mark a number of pageblocks as MIGRATE_RESERVE. The number |
3317 | * of blocks reserved is based on min_wmark_pages(zone). The memory within | 3351 | * of blocks reserved is based on min_wmark_pages(zone). The memory within |
3318 | * the reserve will tend to store contiguous free pages. Setting min_free_kbytes | 3352 | * the reserve will tend to store contiguous free pages. Setting min_free_kbytes |
@@ -3321,7 +3355,7 @@ static inline unsigned long wait_table_bits(unsigned long size) | |||
3321 | */ | 3355 | */ |
3322 | static void setup_zone_migrate_reserve(struct zone *zone) | 3356 | static void setup_zone_migrate_reserve(struct zone *zone) |
3323 | { | 3357 | { |
3324 | unsigned long start_pfn, pfn, end_pfn; | 3358 | unsigned long start_pfn, pfn, end_pfn, block_end_pfn; |
3325 | struct page *page; | 3359 | struct page *page; |
3326 | unsigned long block_migratetype; | 3360 | unsigned long block_migratetype; |
3327 | int reserve; | 3361 | int reserve; |
@@ -3351,7 +3385,8 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
3351 | continue; | 3385 | continue; |
3352 | 3386 | ||
3353 | /* Blocks with reserved pages will never free, skip them. */ | 3387 | /* Blocks with reserved pages will never free, skip them. */ |
3354 | if (PageReserved(page)) | 3388 | block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn); |
3389 | if (pageblock_is_reserved(pfn, block_end_pfn)) | ||
3355 | continue; | 3390 | continue; |
3356 | 3391 | ||
3357 | block_migratetype = get_pageblock_migratetype(page); | 3392 | block_migratetype = get_pageblock_migratetype(page); |
@@ -3540,7 +3575,7 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p, | |||
3540 | pcp->batch = PAGE_SHIFT * 8; | 3575 | pcp->batch = PAGE_SHIFT * 8; |
3541 | } | 3576 | } |
3542 | 3577 | ||
3543 | static __meminit void setup_zone_pageset(struct zone *zone) | 3578 | static void setup_zone_pageset(struct zone *zone) |
3544 | { | 3579 | { |
3545 | int cpu; | 3580 | int cpu; |
3546 | 3581 | ||
@@ -5099,7 +5134,7 @@ void setup_per_zone_wmarks(void) | |||
5099 | * 1TB 101 10GB | 5134 | * 1TB 101 10GB |
5100 | * 10TB 320 32GB | 5135 | * 10TB 320 32GB |
5101 | */ | 5136 | */ |
5102 | void calculate_zone_inactive_ratio(struct zone *zone) | 5137 | static void __meminit calculate_zone_inactive_ratio(struct zone *zone) |
5103 | { | 5138 | { |
5104 | unsigned int gb, ratio; | 5139 | unsigned int gb, ratio; |
5105 | 5140 | ||
@@ -5113,7 +5148,7 @@ void calculate_zone_inactive_ratio(struct zone *zone) | |||
5113 | zone->inactive_ratio = ratio; | 5148 | zone->inactive_ratio = ratio; |
5114 | } | 5149 | } |
5115 | 5150 | ||
5116 | static void __init setup_per_zone_inactive_ratio(void) | 5151 | static void __meminit setup_per_zone_inactive_ratio(void) |
5117 | { | 5152 | { |
5118 | struct zone *zone; | 5153 | struct zone *zone; |
5119 | 5154 | ||
@@ -5145,7 +5180,7 @@ static void __init setup_per_zone_inactive_ratio(void) | |||
5145 | * 8192MB: 11584k | 5180 | * 8192MB: 11584k |
5146 | * 16384MB: 16384k | 5181 | * 16384MB: 16384k |
5147 | */ | 5182 | */ |
5148 | static int __init init_per_zone_wmark_min(void) | 5183 | int __meminit init_per_zone_wmark_min(void) |
5149 | { | 5184 | { |
5150 | unsigned long lowmem_kbytes; | 5185 | unsigned long lowmem_kbytes; |
5151 | 5186 | ||
@@ -5157,6 +5192,7 @@ static int __init init_per_zone_wmark_min(void) | |||
5157 | if (min_free_kbytes > 65536) | 5192 | if (min_free_kbytes > 65536) |
5158 | min_free_kbytes = 65536; | 5193 | min_free_kbytes = 65536; |
5159 | setup_per_zone_wmarks(); | 5194 | setup_per_zone_wmarks(); |
5195 | refresh_zone_stat_thresholds(); | ||
5160 | setup_per_zone_lowmem_reserve(); | 5196 | setup_per_zone_lowmem_reserve(); |
5161 | setup_per_zone_inactive_ratio(); | 5197 | setup_per_zone_inactive_ratio(); |
5162 | return 0; | 5198 | return 0; |
@@ -5507,10 +5543,8 @@ int set_migratetype_isolate(struct page *page) | |||
5507 | struct memory_isolate_notify arg; | 5543 | struct memory_isolate_notify arg; |
5508 | int notifier_ret; | 5544 | int notifier_ret; |
5509 | int ret = -EBUSY; | 5545 | int ret = -EBUSY; |
5510 | int zone_idx; | ||
5511 | 5546 | ||
5512 | zone = page_zone(page); | 5547 | zone = page_zone(page); |
5513 | zone_idx = zone_idx(zone); | ||
5514 | 5548 | ||
5515 | spin_lock_irqsave(&zone->lock, flags); | 5549 | spin_lock_irqsave(&zone->lock, flags); |
5516 | 5550 | ||