aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-05-26 07:48:30 -0400
committerIngo Molnar <mingo@elte.hu>2011-05-26 07:48:39 -0400
commit1102c660dd35725a11c7ca9365c237f2f42f6b30 (patch)
treecd32d3053b30050182218e0d36b4aed7459c48de /mm/page_alloc.c
parent6e9101aeec39961308176e0f59e73ac5d37d243a (diff)
parent4db70f73e56961b9bcdfd0c36c62847a18b7dbb5 (diff)
Merge branch 'linus' into perf/urgent
Merge reason: Linus applied an overlapping commit: 5f2e8e2b0bf0: kernel/watchdog.c: Use proper ANSI C prototypes So merge it in to make sure we can iterate the file without conflicts. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c124
1 files changed, 79 insertions, 45 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3f8bce264df6..2a00f17c3bf4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -30,6 +30,7 @@
30#include <linux/pagevec.h> 30#include <linux/pagevec.h>
31#include <linux/blkdev.h> 31#include <linux/blkdev.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/ratelimit.h>
33#include <linux/oom.h> 34#include <linux/oom.h>
34#include <linux/notifier.h> 35#include <linux/notifier.h>
35#include <linux/topology.h> 36#include <linux/topology.h>
@@ -39,6 +40,7 @@
39#include <linux/memory_hotplug.h> 40#include <linux/memory_hotplug.h>
40#include <linux/nodemask.h> 41#include <linux/nodemask.h>
41#include <linux/vmalloc.h> 42#include <linux/vmalloc.h>
43#include <linux/vmstat.h>
42#include <linux/mempolicy.h> 44#include <linux/mempolicy.h>
43#include <linux/stop_machine.h> 45#include <linux/stop_machine.h>
44#include <linux/sort.h> 46#include <linux/sort.h>
@@ -54,6 +56,7 @@
54#include <trace/events/kmem.h> 56#include <trace/events/kmem.h>
55#include <linux/ftrace_event.h> 57#include <linux/ftrace_event.h>
56#include <linux/memcontrol.h> 58#include <linux/memcontrol.h>
59#include <linux/prefetch.h>
57 60
58#include <asm/tlbflush.h> 61#include <asm/tlbflush.h>
59#include <asm/div64.h> 62#include <asm/div64.h>
@@ -1734,6 +1737,45 @@ static inline bool should_suppress_show_mem(void)
1734 return ret; 1737 return ret;
1735} 1738}
1736 1739
1740static DEFINE_RATELIMIT_STATE(nopage_rs,
1741 DEFAULT_RATELIMIT_INTERVAL,
1742 DEFAULT_RATELIMIT_BURST);
1743
1744void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
1745{
1746 va_list args;
1747 unsigned int filter = SHOW_MEM_FILTER_NODES;
1748
1749 if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
1750 return;
1751
1752 /*
1753 * This documents exceptions given to allocations in certain
1754 * contexts that are allowed to allocate outside current's set
1755 * of allowed nodes.
1756 */
1757 if (!(gfp_mask & __GFP_NOMEMALLOC))
1758 if (test_thread_flag(TIF_MEMDIE) ||
1759 (current->flags & (PF_MEMALLOC | PF_EXITING)))
1760 filter &= ~SHOW_MEM_FILTER_NODES;
1761 if (in_interrupt() || !(gfp_mask & __GFP_WAIT))
1762 filter &= ~SHOW_MEM_FILTER_NODES;
1763
1764 if (fmt) {
1765 printk(KERN_WARNING);
1766 va_start(args, fmt);
1767 vprintk(fmt, args);
1768 va_end(args);
1769 }
1770
1771 pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n",
1772 current->comm, order, gfp_mask);
1773
1774 dump_stack();
1775 if (!should_suppress_show_mem())
1776 show_mem(filter);
1777}
1778
1737static inline int 1779static inline int
1738should_alloc_retry(gfp_t gfp_mask, unsigned int order, 1780should_alloc_retry(gfp_t gfp_mask, unsigned int order,
1739 unsigned long pages_reclaimed) 1781 unsigned long pages_reclaimed)
@@ -2064,6 +2106,7 @@ restart:
2064 first_zones_zonelist(zonelist, high_zoneidx, NULL, 2106 first_zones_zonelist(zonelist, high_zoneidx, NULL,
2065 &preferred_zone); 2107 &preferred_zone);
2066 2108
2109rebalance:
2067 /* This is the last chance, in general, before the goto nopage. */ 2110 /* This is the last chance, in general, before the goto nopage. */
2068 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, 2111 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
2069 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, 2112 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2071,7 +2114,6 @@ restart:
2071 if (page) 2114 if (page)
2072 goto got_pg; 2115 goto got_pg;
2073 2116
2074rebalance:
2075 /* Allocate without watermarks if the context allows */ 2117 /* Allocate without watermarks if the context allows */
2076 if (alloc_flags & ALLOC_NO_WATERMARKS) { 2118 if (alloc_flags & ALLOC_NO_WATERMARKS) {
2077 page = __alloc_pages_high_priority(gfp_mask, order, 2119 page = __alloc_pages_high_priority(gfp_mask, order,
@@ -2105,7 +2147,7 @@ rebalance:
2105 sync_migration); 2147 sync_migration);
2106 if (page) 2148 if (page)
2107 goto got_pg; 2149 goto got_pg;
2108 sync_migration = !(gfp_mask & __GFP_NO_KSWAPD); 2150 sync_migration = true;
2109 2151
2110 /* Try direct reclaim and then allocating */ 2152 /* Try direct reclaim and then allocating */
2111 page = __alloc_pages_direct_reclaim(gfp_mask, order, 2153 page = __alloc_pages_direct_reclaim(gfp_mask, order,
@@ -2176,27 +2218,7 @@ rebalance:
2176 } 2218 }
2177 2219
2178nopage: 2220nopage:
2179 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { 2221 warn_alloc_failed(gfp_mask, order, NULL);
2180 unsigned int filter = SHOW_MEM_FILTER_NODES;
2181
2182 /*
2183 * This documents exceptions given to allocations in certain
2184 * contexts that are allowed to allocate outside current's set
2185 * of allowed nodes.
2186 */
2187 if (!(gfp_mask & __GFP_NOMEMALLOC))
2188 if (test_thread_flag(TIF_MEMDIE) ||
2189 (current->flags & (PF_MEMALLOC | PF_EXITING)))
2190 filter &= ~SHOW_MEM_FILTER_NODES;
2191 if (in_interrupt() || !wait)
2192 filter &= ~SHOW_MEM_FILTER_NODES;
2193
2194 pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n",
2195 current->comm, order, gfp_mask);
2196 dump_stack();
2197 if (!should_suppress_show_mem())
2198 show_mem(filter);
2199 }
2200 return page; 2222 return page;
2201got_pg: 2223got_pg:
2202 if (kmemcheck_enabled) 2224 if (kmemcheck_enabled)
@@ -2225,6 +2247,10 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2225 2247
2226 if (should_fail_alloc_page(gfp_mask, order)) 2248 if (should_fail_alloc_page(gfp_mask, order))
2227 return NULL; 2249 return NULL;
2250#ifndef CONFIG_ZONE_DMA
2251 if (WARN_ON_ONCE(gfp_mask & __GFP_DMA))
2252 return NULL;
2253#endif
2228 2254
2229 /* 2255 /*
2230 * Check the zones suitable for the gfp_mask contain at least one 2256 * Check the zones suitable for the gfp_mask contain at least one
@@ -2472,10 +2498,10 @@ void si_meminfo_node(struct sysinfo *val, int nid)
2472#endif 2498#endif
2473 2499
2474/* 2500/*
2475 * Determine whether the zone's node should be displayed or not, depending on 2501 * Determine whether the node should be displayed or not, depending on whether
2476 * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas(). 2502 * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
2477 */ 2503 */
2478static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone) 2504bool skip_free_areas_node(unsigned int flags, int nid)
2479{ 2505{
2480 bool ret = false; 2506 bool ret = false;
2481 2507
@@ -2483,8 +2509,7 @@ static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
2483 goto out; 2509 goto out;
2484 2510
2485 get_mems_allowed(); 2511 get_mems_allowed();
2486 ret = !node_isset(zone->zone_pgdat->node_id, 2512 ret = !node_isset(nid, cpuset_current_mems_allowed);
2487 cpuset_current_mems_allowed);
2488 put_mems_allowed(); 2513 put_mems_allowed();
2489out: 2514out:
2490 return ret; 2515 return ret;
@@ -2499,13 +2524,13 @@ out:
2499 * Suppresses nodes that are not allowed by current's cpuset if 2524 * Suppresses nodes that are not allowed by current's cpuset if
2500 * SHOW_MEM_FILTER_NODES is passed. 2525 * SHOW_MEM_FILTER_NODES is passed.
2501 */ 2526 */
2502void __show_free_areas(unsigned int filter) 2527void show_free_areas(unsigned int filter)
2503{ 2528{
2504 int cpu; 2529 int cpu;
2505 struct zone *zone; 2530 struct zone *zone;
2506 2531
2507 for_each_populated_zone(zone) { 2532 for_each_populated_zone(zone) {
2508 if (skip_free_areas_zone(filter, zone)) 2533 if (skip_free_areas_node(filter, zone_to_nid(zone)))
2509 continue; 2534 continue;
2510 show_node(zone); 2535 show_node(zone);
2511 printk("%s per-cpu:\n", zone->name); 2536 printk("%s per-cpu:\n", zone->name);
@@ -2548,7 +2573,7 @@ void __show_free_areas(unsigned int filter)
2548 for_each_populated_zone(zone) { 2573 for_each_populated_zone(zone) {
2549 int i; 2574 int i;
2550 2575
2551 if (skip_free_areas_zone(filter, zone)) 2576 if (skip_free_areas_node(filter, zone_to_nid(zone)))
2552 continue; 2577 continue;
2553 show_node(zone); 2578 show_node(zone);
2554 printk("%s" 2579 printk("%s"
@@ -2617,7 +2642,7 @@ void __show_free_areas(unsigned int filter)
2617 for_each_populated_zone(zone) { 2642 for_each_populated_zone(zone) {
2618 unsigned long nr[MAX_ORDER], flags, order, total = 0; 2643 unsigned long nr[MAX_ORDER], flags, order, total = 0;
2619 2644
2620 if (skip_free_areas_zone(filter, zone)) 2645 if (skip_free_areas_node(filter, zone_to_nid(zone)))
2621 continue; 2646 continue;
2622 show_node(zone); 2647 show_node(zone);
2623 printk("%s: ", zone->name); 2648 printk("%s: ", zone->name);
@@ -2638,11 +2663,6 @@ void __show_free_areas(unsigned int filter)
2638 show_swap_cache_info(); 2663 show_swap_cache_info();
2639} 2664}
2640 2665
2641void show_free_areas(void)
2642{
2643 __show_free_areas(0);
2644}
2645
2646static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) 2666static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
2647{ 2667{
2648 zoneref->zone = zone; 2668 zoneref->zone = zone;
@@ -3313,6 +3333,20 @@ static inline unsigned long wait_table_bits(unsigned long size)
3313#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) 3333#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
3314 3334
3315/* 3335/*
3336 * Check if a pageblock contains reserved pages
3337 */
3338static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
3339{
3340 unsigned long pfn;
3341
3342 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
3343 if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
3344 return 1;
3345 }
3346 return 0;
3347}
3348
3349/*
3316 * Mark a number of pageblocks as MIGRATE_RESERVE. The number 3350 * Mark a number of pageblocks as MIGRATE_RESERVE. The number
3317 * of blocks reserved is based on min_wmark_pages(zone). The memory within 3351 * of blocks reserved is based on min_wmark_pages(zone). The memory within
3318 * the reserve will tend to store contiguous free pages. Setting min_free_kbytes 3352 * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
@@ -3321,7 +3355,7 @@ static inline unsigned long wait_table_bits(unsigned long size)
3321 */ 3355 */
3322static void setup_zone_migrate_reserve(struct zone *zone) 3356static void setup_zone_migrate_reserve(struct zone *zone)
3323{ 3357{
3324 unsigned long start_pfn, pfn, end_pfn; 3358 unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
3325 struct page *page; 3359 struct page *page;
3326 unsigned long block_migratetype; 3360 unsigned long block_migratetype;
3327 int reserve; 3361 int reserve;
@@ -3351,7 +3385,8 @@ static void setup_zone_migrate_reserve(struct zone *zone)
3351 continue; 3385 continue;
3352 3386
3353 /* Blocks with reserved pages will never free, skip them. */ 3387 /* Blocks with reserved pages will never free, skip them. */
3354 if (PageReserved(page)) 3388 block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
3389 if (pageblock_is_reserved(pfn, block_end_pfn))
3355 continue; 3390 continue;
3356 3391
3357 block_migratetype = get_pageblock_migratetype(page); 3392 block_migratetype = get_pageblock_migratetype(page);
@@ -3540,7 +3575,7 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
3540 pcp->batch = PAGE_SHIFT * 8; 3575 pcp->batch = PAGE_SHIFT * 8;
3541} 3576}
3542 3577
3543static __meminit void setup_zone_pageset(struct zone *zone) 3578static void setup_zone_pageset(struct zone *zone)
3544{ 3579{
3545 int cpu; 3580 int cpu;
3546 3581
@@ -5099,7 +5134,7 @@ void setup_per_zone_wmarks(void)
5099 * 1TB 101 10GB 5134 * 1TB 101 10GB
5100 * 10TB 320 32GB 5135 * 10TB 320 32GB
5101 */ 5136 */
5102void calculate_zone_inactive_ratio(struct zone *zone) 5137static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
5103{ 5138{
5104 unsigned int gb, ratio; 5139 unsigned int gb, ratio;
5105 5140
@@ -5113,7 +5148,7 @@ void calculate_zone_inactive_ratio(struct zone *zone)
5113 zone->inactive_ratio = ratio; 5148 zone->inactive_ratio = ratio;
5114} 5149}
5115 5150
5116static void __init setup_per_zone_inactive_ratio(void) 5151static void __meminit setup_per_zone_inactive_ratio(void)
5117{ 5152{
5118 struct zone *zone; 5153 struct zone *zone;
5119 5154
@@ -5145,7 +5180,7 @@ static void __init setup_per_zone_inactive_ratio(void)
5145 * 8192MB: 11584k 5180 * 8192MB: 11584k
5146 * 16384MB: 16384k 5181 * 16384MB: 16384k
5147 */ 5182 */
5148static int __init init_per_zone_wmark_min(void) 5183int __meminit init_per_zone_wmark_min(void)
5149{ 5184{
5150 unsigned long lowmem_kbytes; 5185 unsigned long lowmem_kbytes;
5151 5186
@@ -5157,6 +5192,7 @@ static int __init init_per_zone_wmark_min(void)
5157 if (min_free_kbytes > 65536) 5192 if (min_free_kbytes > 65536)
5158 min_free_kbytes = 65536; 5193 min_free_kbytes = 65536;
5159 setup_per_zone_wmarks(); 5194 setup_per_zone_wmarks();
5195 refresh_zone_stat_thresholds();
5160 setup_per_zone_lowmem_reserve(); 5196 setup_per_zone_lowmem_reserve();
5161 setup_per_zone_inactive_ratio(); 5197 setup_per_zone_inactive_ratio();
5162 return 0; 5198 return 0;
@@ -5507,10 +5543,8 @@ int set_migratetype_isolate(struct page *page)
5507 struct memory_isolate_notify arg; 5543 struct memory_isolate_notify arg;
5508 int notifier_ret; 5544 int notifier_ret;
5509 int ret = -EBUSY; 5545 int ret = -EBUSY;
5510 int zone_idx;
5511 5546
5512 zone = page_zone(page); 5547 zone = page_zone(page);
5513 zone_idx = zone_idx(zone);
5514 5548
5515 spin_lock_irqsave(&zone->lock, flags); 5549 spin_lock_irqsave(&zone->lock, flags);
5516 5550