aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2011-05-14 06:06:36 -0400
committerThomas Gleixner <tglx@linutronix.de>2011-05-14 06:06:36 -0400
commita18f22a968de17b29f2310cdb7ba69163e65ec15 (patch)
treea7d56d88fad5e444d7661484109758a2f436129e /mm/page_alloc.c
parenta1c57e0fec53defe745e64417eacdbd3618c3e66 (diff)
parent798778b8653f64b7b2162ac70eca10367cff6ce8 (diff)
Merge branch 'consolidate-clksrc-i8253' of master.kernel.org:~rmk/linux-2.6-arm into timers/clocksource
Conflicts: arch/ia64/kernel/cyclone.c arch/mips/kernel/i8253.c arch/x86/kernel/i8253.c Reason: Resolve conflicts so further cleanups do not conflict further Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c175
1 files changed, 118 insertions, 57 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a873e61e312e..9f8a97b9a350 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -53,6 +53,7 @@
53#include <linux/compaction.h> 53#include <linux/compaction.h>
54#include <trace/events/kmem.h> 54#include <trace/events/kmem.h>
55#include <linux/ftrace_event.h> 55#include <linux/ftrace_event.h>
56#include <linux/memcontrol.h>
56 57
57#include <asm/tlbflush.h> 58#include <asm/tlbflush.h>
58#include <asm/div64.h> 59#include <asm/div64.h>
@@ -286,7 +287,7 @@ static void bad_page(struct page *page)
286 287
287 /* Don't complain about poisoned pages */ 288 /* Don't complain about poisoned pages */
288 if (PageHWPoison(page)) { 289 if (PageHWPoison(page)) {
289 __ClearPageBuddy(page); 290 reset_page_mapcount(page); /* remove PageBuddy */
290 return; 291 return;
291 } 292 }
292 293
@@ -317,7 +318,7 @@ static void bad_page(struct page *page)
317 dump_stack(); 318 dump_stack();
318out: 319out:
319 /* Leave bad fields for debug, except PageBuddy could make trouble */ 320 /* Leave bad fields for debug, except PageBuddy could make trouble */
320 __ClearPageBuddy(page); 321 reset_page_mapcount(page); /* remove PageBuddy */
321 add_taint(TAINT_BAD_PAGE); 322 add_taint(TAINT_BAD_PAGE);
322} 323}
323 324
@@ -565,7 +566,8 @@ static inline int free_pages_check(struct page *page)
565 if (unlikely(page_mapcount(page) | 566 if (unlikely(page_mapcount(page) |
566 (page->mapping != NULL) | 567 (page->mapping != NULL) |
567 (atomic_read(&page->_count) != 0) | 568 (atomic_read(&page->_count) != 0) |
568 (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) { 569 (page->flags & PAGE_FLAGS_CHECK_AT_FREE) |
570 (mem_cgroup_bad_page_check(page)))) {
569 bad_page(page); 571 bad_page(page);
570 return 1; 572 return 1;
571 } 573 }
@@ -614,6 +616,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
614 list = &pcp->lists[migratetype]; 616 list = &pcp->lists[migratetype];
615 } while (list_empty(list)); 617 } while (list_empty(list));
616 618
619 /* This is the only non-empty list. Free them all. */
620 if (batch_free == MIGRATE_PCPTYPES)
621 batch_free = to_free;
622
617 do { 623 do {
618 page = list_entry(list->prev, struct page, lru); 624 page = list_entry(list->prev, struct page, lru);
619 /* must delete as __free_one_page list manipulates */ 625 /* must delete as __free_one_page list manipulates */
@@ -750,7 +756,8 @@ static inline int check_new_page(struct page *page)
750 if (unlikely(page_mapcount(page) | 756 if (unlikely(page_mapcount(page) |
751 (page->mapping != NULL) | 757 (page->mapping != NULL) |
752 (atomic_read(&page->_count) != 0) | 758 (atomic_read(&page->_count) != 0) |
753 (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) { 759 (page->flags & PAGE_FLAGS_CHECK_AT_PREP) |
760 (mem_cgroup_bad_page_check(page)))) {
754 bad_page(page); 761 bad_page(page);
755 return 1; 762 return 1;
756 } 763 }
@@ -863,9 +870,8 @@ static int move_freepages(struct zone *zone,
863 } 870 }
864 871
865 order = page_order(page); 872 order = page_order(page);
866 list_del(&page->lru); 873 list_move(&page->lru,
867 list_add(&page->lru, 874 &zone->free_area[order].free_list[migratetype]);
868 &zone->free_area[order].free_list[migratetype]);
869 page += 1 << order; 875 page += 1 << order;
870 pages_moved += 1 << order; 876 pages_moved += 1 << order;
871 } 877 }
@@ -936,7 +942,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
936 * If breaking a large block of pages, move all free 942 * If breaking a large block of pages, move all free
937 * pages to the preferred allocation list. If falling 943 * pages to the preferred allocation list. If falling
938 * back for a reclaimable kernel allocation, be more 944 * back for a reclaimable kernel allocation, be more
939 * agressive about taking ownership of free pages 945 * aggressive about taking ownership of free pages
940 */ 946 */
941 if (unlikely(current_order >= (pageblock_order >> 1)) || 947 if (unlikely(current_order >= (pageblock_order >> 1)) ||
942 start_migratetype == MIGRATE_RECLAIMABLE || 948 start_migratetype == MIGRATE_RECLAIMABLE ||
@@ -1333,7 +1339,7 @@ again:
1333 } 1339 }
1334 1340
1335 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1341 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1336 zone_statistics(preferred_zone, zone); 1342 zone_statistics(preferred_zone, zone, gfp_flags);
1337 local_irq_restore(flags); 1343 local_irq_restore(flags);
1338 1344
1339 VM_BUG_ON(bad_range(zone, page)); 1345 VM_BUG_ON(bad_range(zone, page));
@@ -1714,6 +1720,20 @@ try_next_zone:
1714 return page; 1720 return page;
1715} 1721}
1716 1722
1723/*
1724 * Large machines with many possible nodes should not always dump per-node
1725 * meminfo in irq context.
1726 */
1727static inline bool should_suppress_show_mem(void)
1728{
1729 bool ret = false;
1730
1731#if NODES_SHIFT > 8
1732 ret = in_interrupt();
1733#endif
1734 return ret;
1735}
1736
1717static inline int 1737static inline int
1718should_alloc_retry(gfp_t gfp_mask, unsigned int order, 1738should_alloc_retry(gfp_t gfp_mask, unsigned int order,
1719 unsigned long pages_reclaimed) 1739 unsigned long pages_reclaimed)
@@ -2085,7 +2105,7 @@ rebalance:
2085 sync_migration); 2105 sync_migration);
2086 if (page) 2106 if (page)
2087 goto got_pg; 2107 goto got_pg;
2088 sync_migration = true; 2108 sync_migration = !(gfp_mask & __GFP_NO_KSWAPD);
2089 2109
2090 /* Try direct reclaim and then allocating */ 2110 /* Try direct reclaim and then allocating */
2091 page = __alloc_pages_direct_reclaim(gfp_mask, order, 2111 page = __alloc_pages_direct_reclaim(gfp_mask, order,
@@ -2157,11 +2177,25 @@ rebalance:
2157 2177
2158nopage: 2178nopage:
2159 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { 2179 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
2160 printk(KERN_WARNING "%s: page allocation failure." 2180 unsigned int filter = SHOW_MEM_FILTER_NODES;
2161 " order:%d, mode:0x%x\n", 2181
2182 /*
2183 * This documents exceptions given to allocations in certain
2184 * contexts that are allowed to allocate outside current's set
2185 * of allowed nodes.
2186 */
2187 if (!(gfp_mask & __GFP_NOMEMALLOC))
2188 if (test_thread_flag(TIF_MEMDIE) ||
2189 (current->flags & (PF_MEMALLOC | PF_EXITING)))
2190 filter &= ~SHOW_MEM_FILTER_NODES;
2191 if (in_interrupt() || !wait)
2192 filter &= ~SHOW_MEM_FILTER_NODES;
2193
2194 pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n",
2162 current->comm, order, gfp_mask); 2195 current->comm, order, gfp_mask);
2163 dump_stack(); 2196 dump_stack();
2164 show_mem(); 2197 if (!should_suppress_show_mem())
2198 show_mem(filter);
2165 } 2199 }
2166 return page; 2200 return page;
2167got_pg: 2201got_pg:
@@ -2411,19 +2445,42 @@ void si_meminfo_node(struct sysinfo *val, int nid)
2411} 2445}
2412#endif 2446#endif
2413 2447
2448/*
2449 * Determine whether the zone's node should be displayed or not, depending on
2450 * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas().
2451 */
2452static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
2453{
2454 bool ret = false;
2455
2456 if (!(flags & SHOW_MEM_FILTER_NODES))
2457 goto out;
2458
2459 get_mems_allowed();
2460 ret = !node_isset(zone->zone_pgdat->node_id,
2461 cpuset_current_mems_allowed);
2462 put_mems_allowed();
2463out:
2464 return ret;
2465}
2466
2414#define K(x) ((x) << (PAGE_SHIFT-10)) 2467#define K(x) ((x) << (PAGE_SHIFT-10))
2415 2468
2416/* 2469/*
2417 * Show free area list (used inside shift_scroll-lock stuff) 2470 * Show free area list (used inside shift_scroll-lock stuff)
2418 * We also calculate the percentage fragmentation. We do this by counting the 2471 * We also calculate the percentage fragmentation. We do this by counting the
2419 * memory on each free list with the exception of the first item on the list. 2472 * memory on each free list with the exception of the first item on the list.
2473 * Suppresses nodes that are not allowed by current's cpuset if
2474 * SHOW_MEM_FILTER_NODES is passed.
2420 */ 2475 */
2421void show_free_areas(void) 2476void __show_free_areas(unsigned int filter)
2422{ 2477{
2423 int cpu; 2478 int cpu;
2424 struct zone *zone; 2479 struct zone *zone;
2425 2480
2426 for_each_populated_zone(zone) { 2481 for_each_populated_zone(zone) {
2482 if (skip_free_areas_zone(filter, zone))
2483 continue;
2427 show_node(zone); 2484 show_node(zone);
2428 printk("%s per-cpu:\n", zone->name); 2485 printk("%s per-cpu:\n", zone->name);
2429 2486
@@ -2465,6 +2522,8 @@ void show_free_areas(void)
2465 for_each_populated_zone(zone) { 2522 for_each_populated_zone(zone) {
2466 int i; 2523 int i;
2467 2524
2525 if (skip_free_areas_zone(filter, zone))
2526 continue;
2468 show_node(zone); 2527 show_node(zone);
2469 printk("%s" 2528 printk("%s"
2470 " free:%lukB" 2529 " free:%lukB"
@@ -2532,6 +2591,8 @@ void show_free_areas(void)
2532 for_each_populated_zone(zone) { 2591 for_each_populated_zone(zone) {
2533 unsigned long nr[MAX_ORDER], flags, order, total = 0; 2592 unsigned long nr[MAX_ORDER], flags, order, total = 0;
2534 2593
2594 if (skip_free_areas_zone(filter, zone))
2595 continue;
2535 show_node(zone); 2596 show_node(zone);
2536 printk("%s: ", zone->name); 2597 printk("%s: ", zone->name);
2537 2598
@@ -2551,6 +2612,11 @@ void show_free_areas(void)
2551 show_swap_cache_info(); 2612 show_swap_cache_info();
2552} 2613}
2553 2614
2615void show_free_areas(void)
2616{
2617 __show_free_areas(0);
2618}
2619
2554static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) 2620static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
2555{ 2621{
2556 zoneref->zone = zone; 2622 zoneref->zone = zone;
@@ -3110,7 +3176,7 @@ static __init_refok int __build_all_zonelists(void *data)
3110 * Called with zonelists_mutex held always 3176 * Called with zonelists_mutex held always
3111 * unless system_state == SYSTEM_BOOTING. 3177 * unless system_state == SYSTEM_BOOTING.
3112 */ 3178 */
3113void build_all_zonelists(void *data) 3179void __ref build_all_zonelists(void *data)
3114{ 3180{
3115 set_zonelist_order(); 3181 set_zonelist_order();
3116 3182
@@ -3699,13 +3765,45 @@ void __init free_bootmem_with_active_regions(int nid,
3699} 3765}
3700 3766
3701#ifdef CONFIG_HAVE_MEMBLOCK 3767#ifdef CONFIG_HAVE_MEMBLOCK
3768/*
3769 * Basic iterator support. Return the last range of PFNs for a node
3770 * Note: nid == MAX_NUMNODES returns last region regardless of node
3771 */
3772static int __meminit last_active_region_index_in_nid(int nid)
3773{
3774 int i;
3775
3776 for (i = nr_nodemap_entries - 1; i >= 0; i--)
3777 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3778 return i;
3779
3780 return -1;
3781}
3782
3783/*
3784 * Basic iterator support. Return the previous active range of PFNs for a node
3785 * Note: nid == MAX_NUMNODES returns next region regardless of node
3786 */
3787static int __meminit previous_active_region_index_in_nid(int index, int nid)
3788{
3789 for (index = index - 1; index >= 0; index--)
3790 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3791 return index;
3792
3793 return -1;
3794}
3795
3796#define for_each_active_range_index_in_nid_reverse(i, nid) \
3797 for (i = last_active_region_index_in_nid(nid); i != -1; \
3798 i = previous_active_region_index_in_nid(i, nid))
3799
3702u64 __init find_memory_core_early(int nid, u64 size, u64 align, 3800u64 __init find_memory_core_early(int nid, u64 size, u64 align,
3703 u64 goal, u64 limit) 3801 u64 goal, u64 limit)
3704{ 3802{
3705 int i; 3803 int i;
3706 3804
3707 /* Need to go over early_node_map to find out good range for node */ 3805 /* Need to go over early_node_map to find out good range for node */
3708 for_each_active_range_index_in_nid(i, nid) { 3806 for_each_active_range_index_in_nid_reverse(i, nid) {
3709 u64 addr; 3807 u64 addr;
3710 u64 ei_start, ei_last; 3808 u64 ei_start, ei_last;
3711 u64 final_start, final_end; 3809 u64 final_start, final_end;
@@ -3748,34 +3846,6 @@ int __init add_from_early_node_map(struct range *range, int az,
3748 return nr_range; 3846 return nr_range;
3749} 3847}
3750 3848
3751#ifdef CONFIG_NO_BOOTMEM
3752void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3753 u64 goal, u64 limit)
3754{
3755 void *ptr;
3756 u64 addr;
3757
3758 if (limit > memblock.current_limit)
3759 limit = memblock.current_limit;
3760
3761 addr = find_memory_core_early(nid, size, align, goal, limit);
3762
3763 if (addr == MEMBLOCK_ERROR)
3764 return NULL;
3765
3766 ptr = phys_to_virt(addr);
3767 memset(ptr, 0, size);
3768 memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
3769 /*
3770 * The min_count is set to 0 so that bootmem allocated blocks
3771 * are never reported as leaks.
3772 */
3773 kmemleak_alloc(ptr, size, 0, 0);
3774 return ptr;
3775}
3776#endif
3777
3778
3779void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) 3849void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3780{ 3850{
3781 int i; 3851 int i;
@@ -3856,7 +3926,7 @@ static void __init find_usable_zone_for_movable(void)
3856 3926
3857/* 3927/*
3858 * The zone ranges provided by the architecture do not include ZONE_MOVABLE 3928 * The zone ranges provided by the architecture do not include ZONE_MOVABLE
3859 * because it is sized independant of architecture. Unlike the other zones, 3929 * because it is sized independent of architecture. Unlike the other zones,
3860 * the starting point for ZONE_MOVABLE is not fixed. It may be different 3930 * the starting point for ZONE_MOVABLE is not fixed. It may be different
3861 * in each node depending on the size of each node and how evenly kernelcore 3931 * in each node depending on the size of each node and how evenly kernelcore
3862 * is distributed. This helper function adjusts the zone ranges 3932 * is distributed. This helper function adjusts the zone ranges
@@ -4809,15 +4879,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
4809 dma_reserve = new_dma_reserve; 4879 dma_reserve = new_dma_reserve;
4810} 4880}
4811 4881
4812#ifndef CONFIG_NEED_MULTIPLE_NODES
4813struct pglist_data __refdata contig_page_data = {
4814#ifndef CONFIG_NO_BOOTMEM
4815 .bdata = &bootmem_node_data[0]
4816#endif
4817 };
4818EXPORT_SYMBOL(contig_page_data);
4819#endif
4820
4821void __init free_area_init(unsigned long *zones_size) 4882void __init free_area_init(unsigned long *zones_size)
4822{ 4883{
4823 free_area_init_node(0, zones_size, 4884 free_area_init_node(0, zones_size,
@@ -5376,10 +5437,9 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count)
5376 for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { 5437 for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
5377 unsigned long check = pfn + iter; 5438 unsigned long check = pfn + iter;
5378 5439
5379 if (!pfn_valid_within(check)) { 5440 if (!pfn_valid_within(check))
5380 iter++;
5381 continue; 5441 continue;
5382 } 5442
5383 page = pfn_to_page(check); 5443 page = pfn_to_page(check);
5384 if (!page_count(page)) { 5444 if (!page_count(page)) {
5385 if (PageBuddy(page)) 5445 if (PageBuddy(page))
@@ -5627,4 +5687,5 @@ void dump_page(struct page *page)
5627 page, atomic_read(&page->_count), page_mapcount(page), 5687 page, atomic_read(&page->_count), page_mapcount(page),
5628 page->mapping, page->index); 5688 page->mapping, page->index);
5629 dump_page_flags(page->flags); 5689 dump_page_flags(page->flags);
5690 mem_cgroup_print_bad_page(page);
5630} 5691}