diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2011-05-14 06:06:36 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2011-05-14 06:06:36 -0400 |
commit | a18f22a968de17b29f2310cdb7ba69163e65ec15 (patch) | |
tree | a7d56d88fad5e444d7661484109758a2f436129e /mm/page_alloc.c | |
parent | a1c57e0fec53defe745e64417eacdbd3618c3e66 (diff) | |
parent | 798778b8653f64b7b2162ac70eca10367cff6ce8 (diff) |
Merge branch 'consolidate-clksrc-i8253' of master.kernel.org:~rmk/linux-2.6-arm into timers/clocksource
Conflicts:
arch/ia64/kernel/cyclone.c
arch/mips/kernel/i8253.c
arch/x86/kernel/i8253.c
Reason: Resolve conflicts so further cleanups do not conflict further
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 175 |
1 files changed, 118 insertions, 57 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a873e61e312e..9f8a97b9a350 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/compaction.h> | 53 | #include <linux/compaction.h> |
54 | #include <trace/events/kmem.h> | 54 | #include <trace/events/kmem.h> |
55 | #include <linux/ftrace_event.h> | 55 | #include <linux/ftrace_event.h> |
56 | #include <linux/memcontrol.h> | ||
56 | 57 | ||
57 | #include <asm/tlbflush.h> | 58 | #include <asm/tlbflush.h> |
58 | #include <asm/div64.h> | 59 | #include <asm/div64.h> |
@@ -286,7 +287,7 @@ static void bad_page(struct page *page) | |||
286 | 287 | ||
287 | /* Don't complain about poisoned pages */ | 288 | /* Don't complain about poisoned pages */ |
288 | if (PageHWPoison(page)) { | 289 | if (PageHWPoison(page)) { |
289 | __ClearPageBuddy(page); | 290 | reset_page_mapcount(page); /* remove PageBuddy */ |
290 | return; | 291 | return; |
291 | } | 292 | } |
292 | 293 | ||
@@ -317,7 +318,7 @@ static void bad_page(struct page *page) | |||
317 | dump_stack(); | 318 | dump_stack(); |
318 | out: | 319 | out: |
319 | /* Leave bad fields for debug, except PageBuddy could make trouble */ | 320 | /* Leave bad fields for debug, except PageBuddy could make trouble */ |
320 | __ClearPageBuddy(page); | 321 | reset_page_mapcount(page); /* remove PageBuddy */ |
321 | add_taint(TAINT_BAD_PAGE); | 322 | add_taint(TAINT_BAD_PAGE); |
322 | } | 323 | } |
323 | 324 | ||
@@ -565,7 +566,8 @@ static inline int free_pages_check(struct page *page) | |||
565 | if (unlikely(page_mapcount(page) | | 566 | if (unlikely(page_mapcount(page) | |
566 | (page->mapping != NULL) | | 567 | (page->mapping != NULL) | |
567 | (atomic_read(&page->_count) != 0) | | 568 | (atomic_read(&page->_count) != 0) | |
568 | (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) { | 569 | (page->flags & PAGE_FLAGS_CHECK_AT_FREE) | |
570 | (mem_cgroup_bad_page_check(page)))) { | ||
569 | bad_page(page); | 571 | bad_page(page); |
570 | return 1; | 572 | return 1; |
571 | } | 573 | } |
@@ -614,6 +616,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
614 | list = &pcp->lists[migratetype]; | 616 | list = &pcp->lists[migratetype]; |
615 | } while (list_empty(list)); | 617 | } while (list_empty(list)); |
616 | 618 | ||
619 | /* This is the only non-empty list. Free them all. */ | ||
620 | if (batch_free == MIGRATE_PCPTYPES) | ||
621 | batch_free = to_free; | ||
622 | |||
617 | do { | 623 | do { |
618 | page = list_entry(list->prev, struct page, lru); | 624 | page = list_entry(list->prev, struct page, lru); |
619 | /* must delete as __free_one_page list manipulates */ | 625 | /* must delete as __free_one_page list manipulates */ |
@@ -750,7 +756,8 @@ static inline int check_new_page(struct page *page) | |||
750 | if (unlikely(page_mapcount(page) | | 756 | if (unlikely(page_mapcount(page) | |
751 | (page->mapping != NULL) | | 757 | (page->mapping != NULL) | |
752 | (atomic_read(&page->_count) != 0) | | 758 | (atomic_read(&page->_count) != 0) | |
753 | (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) { | 759 | (page->flags & PAGE_FLAGS_CHECK_AT_PREP) | |
760 | (mem_cgroup_bad_page_check(page)))) { | ||
754 | bad_page(page); | 761 | bad_page(page); |
755 | return 1; | 762 | return 1; |
756 | } | 763 | } |
@@ -863,9 +870,8 @@ static int move_freepages(struct zone *zone, | |||
863 | } | 870 | } |
864 | 871 | ||
865 | order = page_order(page); | 872 | order = page_order(page); |
866 | list_del(&page->lru); | 873 | list_move(&page->lru, |
867 | list_add(&page->lru, | 874 | &zone->free_area[order].free_list[migratetype]); |
868 | &zone->free_area[order].free_list[migratetype]); | ||
869 | page += 1 << order; | 875 | page += 1 << order; |
870 | pages_moved += 1 << order; | 876 | pages_moved += 1 << order; |
871 | } | 877 | } |
@@ -936,7 +942,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | |||
936 | * If breaking a large block of pages, move all free | 942 | * If breaking a large block of pages, move all free |
937 | * pages to the preferred allocation list. If falling | 943 | * pages to the preferred allocation list. If falling |
938 | * back for a reclaimable kernel allocation, be more | 944 | * back for a reclaimable kernel allocation, be more |
939 | * agressive about taking ownership of free pages | 945 | * aggressive about taking ownership of free pages |
940 | */ | 946 | */ |
941 | if (unlikely(current_order >= (pageblock_order >> 1)) || | 947 | if (unlikely(current_order >= (pageblock_order >> 1)) || |
942 | start_migratetype == MIGRATE_RECLAIMABLE || | 948 | start_migratetype == MIGRATE_RECLAIMABLE || |
@@ -1333,7 +1339,7 @@ again: | |||
1333 | } | 1339 | } |
1334 | 1340 | ||
1335 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1341 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
1336 | zone_statistics(preferred_zone, zone); | 1342 | zone_statistics(preferred_zone, zone, gfp_flags); |
1337 | local_irq_restore(flags); | 1343 | local_irq_restore(flags); |
1338 | 1344 | ||
1339 | VM_BUG_ON(bad_range(zone, page)); | 1345 | VM_BUG_ON(bad_range(zone, page)); |
@@ -1714,6 +1720,20 @@ try_next_zone: | |||
1714 | return page; | 1720 | return page; |
1715 | } | 1721 | } |
1716 | 1722 | ||
1723 | /* | ||
1724 | * Large machines with many possible nodes should not always dump per-node | ||
1725 | * meminfo in irq context. | ||
1726 | */ | ||
1727 | static inline bool should_suppress_show_mem(void) | ||
1728 | { | ||
1729 | bool ret = false; | ||
1730 | |||
1731 | #if NODES_SHIFT > 8 | ||
1732 | ret = in_interrupt(); | ||
1733 | #endif | ||
1734 | return ret; | ||
1735 | } | ||
1736 | |||
1717 | static inline int | 1737 | static inline int |
1718 | should_alloc_retry(gfp_t gfp_mask, unsigned int order, | 1738 | should_alloc_retry(gfp_t gfp_mask, unsigned int order, |
1719 | unsigned long pages_reclaimed) | 1739 | unsigned long pages_reclaimed) |
@@ -2085,7 +2105,7 @@ rebalance: | |||
2085 | sync_migration); | 2105 | sync_migration); |
2086 | if (page) | 2106 | if (page) |
2087 | goto got_pg; | 2107 | goto got_pg; |
2088 | sync_migration = true; | 2108 | sync_migration = !(gfp_mask & __GFP_NO_KSWAPD); |
2089 | 2109 | ||
2090 | /* Try direct reclaim and then allocating */ | 2110 | /* Try direct reclaim and then allocating */ |
2091 | page = __alloc_pages_direct_reclaim(gfp_mask, order, | 2111 | page = __alloc_pages_direct_reclaim(gfp_mask, order, |
@@ -2157,11 +2177,25 @@ rebalance: | |||
2157 | 2177 | ||
2158 | nopage: | 2178 | nopage: |
2159 | if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { | 2179 | if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { |
2160 | printk(KERN_WARNING "%s: page allocation failure." | 2180 | unsigned int filter = SHOW_MEM_FILTER_NODES; |
2161 | " order:%d, mode:0x%x\n", | 2181 | |
2182 | /* | ||
2183 | * This documents exceptions given to allocations in certain | ||
2184 | * contexts that are allowed to allocate outside current's set | ||
2185 | * of allowed nodes. | ||
2186 | */ | ||
2187 | if (!(gfp_mask & __GFP_NOMEMALLOC)) | ||
2188 | if (test_thread_flag(TIF_MEMDIE) || | ||
2189 | (current->flags & (PF_MEMALLOC | PF_EXITING))) | ||
2190 | filter &= ~SHOW_MEM_FILTER_NODES; | ||
2191 | if (in_interrupt() || !wait) | ||
2192 | filter &= ~SHOW_MEM_FILTER_NODES; | ||
2193 | |||
2194 | pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n", | ||
2162 | current->comm, order, gfp_mask); | 2195 | current->comm, order, gfp_mask); |
2163 | dump_stack(); | 2196 | dump_stack(); |
2164 | show_mem(); | 2197 | if (!should_suppress_show_mem()) |
2198 | show_mem(filter); | ||
2165 | } | 2199 | } |
2166 | return page; | 2200 | return page; |
2167 | got_pg: | 2201 | got_pg: |
@@ -2411,19 +2445,42 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
2411 | } | 2445 | } |
2412 | #endif | 2446 | #endif |
2413 | 2447 | ||
2448 | /* | ||
2449 | * Determine whether the zone's node should be displayed or not, depending on | ||
2450 | * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas(). | ||
2451 | */ | ||
2452 | static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone) | ||
2453 | { | ||
2454 | bool ret = false; | ||
2455 | |||
2456 | if (!(flags & SHOW_MEM_FILTER_NODES)) | ||
2457 | goto out; | ||
2458 | |||
2459 | get_mems_allowed(); | ||
2460 | ret = !node_isset(zone->zone_pgdat->node_id, | ||
2461 | cpuset_current_mems_allowed); | ||
2462 | put_mems_allowed(); | ||
2463 | out: | ||
2464 | return ret; | ||
2465 | } | ||
2466 | |||
2414 | #define K(x) ((x) << (PAGE_SHIFT-10)) | 2467 | #define K(x) ((x) << (PAGE_SHIFT-10)) |
2415 | 2468 | ||
2416 | /* | 2469 | /* |
2417 | * Show free area list (used inside shift_scroll-lock stuff) | 2470 | * Show free area list (used inside shift_scroll-lock stuff) |
2418 | * We also calculate the percentage fragmentation. We do this by counting the | 2471 | * We also calculate the percentage fragmentation. We do this by counting the |
2419 | * memory on each free list with the exception of the first item on the list. | 2472 | * memory on each free list with the exception of the first item on the list. |
2473 | * Suppresses nodes that are not allowed by current's cpuset if | ||
2474 | * SHOW_MEM_FILTER_NODES is passed. | ||
2420 | */ | 2475 | */ |
2421 | void show_free_areas(void) | 2476 | void __show_free_areas(unsigned int filter) |
2422 | { | 2477 | { |
2423 | int cpu; | 2478 | int cpu; |
2424 | struct zone *zone; | 2479 | struct zone *zone; |
2425 | 2480 | ||
2426 | for_each_populated_zone(zone) { | 2481 | for_each_populated_zone(zone) { |
2482 | if (skip_free_areas_zone(filter, zone)) | ||
2483 | continue; | ||
2427 | show_node(zone); | 2484 | show_node(zone); |
2428 | printk("%s per-cpu:\n", zone->name); | 2485 | printk("%s per-cpu:\n", zone->name); |
2429 | 2486 | ||
@@ -2465,6 +2522,8 @@ void show_free_areas(void) | |||
2465 | for_each_populated_zone(zone) { | 2522 | for_each_populated_zone(zone) { |
2466 | int i; | 2523 | int i; |
2467 | 2524 | ||
2525 | if (skip_free_areas_zone(filter, zone)) | ||
2526 | continue; | ||
2468 | show_node(zone); | 2527 | show_node(zone); |
2469 | printk("%s" | 2528 | printk("%s" |
2470 | " free:%lukB" | 2529 | " free:%lukB" |
@@ -2532,6 +2591,8 @@ void show_free_areas(void) | |||
2532 | for_each_populated_zone(zone) { | 2591 | for_each_populated_zone(zone) { |
2533 | unsigned long nr[MAX_ORDER], flags, order, total = 0; | 2592 | unsigned long nr[MAX_ORDER], flags, order, total = 0; |
2534 | 2593 | ||
2594 | if (skip_free_areas_zone(filter, zone)) | ||
2595 | continue; | ||
2535 | show_node(zone); | 2596 | show_node(zone); |
2536 | printk("%s: ", zone->name); | 2597 | printk("%s: ", zone->name); |
2537 | 2598 | ||
@@ -2551,6 +2612,11 @@ void show_free_areas(void) | |||
2551 | show_swap_cache_info(); | 2612 | show_swap_cache_info(); |
2552 | } | 2613 | } |
2553 | 2614 | ||
2615 | void show_free_areas(void) | ||
2616 | { | ||
2617 | __show_free_areas(0); | ||
2618 | } | ||
2619 | |||
2554 | static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) | 2620 | static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) |
2555 | { | 2621 | { |
2556 | zoneref->zone = zone; | 2622 | zoneref->zone = zone; |
@@ -3110,7 +3176,7 @@ static __init_refok int __build_all_zonelists(void *data) | |||
3110 | * Called with zonelists_mutex held always | 3176 | * Called with zonelists_mutex held always |
3111 | * unless system_state == SYSTEM_BOOTING. | 3177 | * unless system_state == SYSTEM_BOOTING. |
3112 | */ | 3178 | */ |
3113 | void build_all_zonelists(void *data) | 3179 | void __ref build_all_zonelists(void *data) |
3114 | { | 3180 | { |
3115 | set_zonelist_order(); | 3181 | set_zonelist_order(); |
3116 | 3182 | ||
@@ -3699,13 +3765,45 @@ void __init free_bootmem_with_active_regions(int nid, | |||
3699 | } | 3765 | } |
3700 | 3766 | ||
3701 | #ifdef CONFIG_HAVE_MEMBLOCK | 3767 | #ifdef CONFIG_HAVE_MEMBLOCK |
3768 | /* | ||
3769 | * Basic iterator support. Return the last range of PFNs for a node | ||
3770 | * Note: nid == MAX_NUMNODES returns last region regardless of node | ||
3771 | */ | ||
3772 | static int __meminit last_active_region_index_in_nid(int nid) | ||
3773 | { | ||
3774 | int i; | ||
3775 | |||
3776 | for (i = nr_nodemap_entries - 1; i >= 0; i--) | ||
3777 | if (nid == MAX_NUMNODES || early_node_map[i].nid == nid) | ||
3778 | return i; | ||
3779 | |||
3780 | return -1; | ||
3781 | } | ||
3782 | |||
3783 | /* | ||
3784 | * Basic iterator support. Return the previous active range of PFNs for a node | ||
3785 | * Note: nid == MAX_NUMNODES returns next region regardless of node | ||
3786 | */ | ||
3787 | static int __meminit previous_active_region_index_in_nid(int index, int nid) | ||
3788 | { | ||
3789 | for (index = index - 1; index >= 0; index--) | ||
3790 | if (nid == MAX_NUMNODES || early_node_map[index].nid == nid) | ||
3791 | return index; | ||
3792 | |||
3793 | return -1; | ||
3794 | } | ||
3795 | |||
3796 | #define for_each_active_range_index_in_nid_reverse(i, nid) \ | ||
3797 | for (i = last_active_region_index_in_nid(nid); i != -1; \ | ||
3798 | i = previous_active_region_index_in_nid(i, nid)) | ||
3799 | |||
3702 | u64 __init find_memory_core_early(int nid, u64 size, u64 align, | 3800 | u64 __init find_memory_core_early(int nid, u64 size, u64 align, |
3703 | u64 goal, u64 limit) | 3801 | u64 goal, u64 limit) |
3704 | { | 3802 | { |
3705 | int i; | 3803 | int i; |
3706 | 3804 | ||
3707 | /* Need to go over early_node_map to find out good range for node */ | 3805 | /* Need to go over early_node_map to find out good range for node */ |
3708 | for_each_active_range_index_in_nid(i, nid) { | 3806 | for_each_active_range_index_in_nid_reverse(i, nid) { |
3709 | u64 addr; | 3807 | u64 addr; |
3710 | u64 ei_start, ei_last; | 3808 | u64 ei_start, ei_last; |
3711 | u64 final_start, final_end; | 3809 | u64 final_start, final_end; |
@@ -3748,34 +3846,6 @@ int __init add_from_early_node_map(struct range *range, int az, | |||
3748 | return nr_range; | 3846 | return nr_range; |
3749 | } | 3847 | } |
3750 | 3848 | ||
3751 | #ifdef CONFIG_NO_BOOTMEM | ||
3752 | void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, | ||
3753 | u64 goal, u64 limit) | ||
3754 | { | ||
3755 | void *ptr; | ||
3756 | u64 addr; | ||
3757 | |||
3758 | if (limit > memblock.current_limit) | ||
3759 | limit = memblock.current_limit; | ||
3760 | |||
3761 | addr = find_memory_core_early(nid, size, align, goal, limit); | ||
3762 | |||
3763 | if (addr == MEMBLOCK_ERROR) | ||
3764 | return NULL; | ||
3765 | |||
3766 | ptr = phys_to_virt(addr); | ||
3767 | memset(ptr, 0, size); | ||
3768 | memblock_x86_reserve_range(addr, addr + size, "BOOTMEM"); | ||
3769 | /* | ||
3770 | * The min_count is set to 0 so that bootmem allocated blocks | ||
3771 | * are never reported as leaks. | ||
3772 | */ | ||
3773 | kmemleak_alloc(ptr, size, 0, 0); | ||
3774 | return ptr; | ||
3775 | } | ||
3776 | #endif | ||
3777 | |||
3778 | |||
3779 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) | 3849 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) |
3780 | { | 3850 | { |
3781 | int i; | 3851 | int i; |
@@ -3856,7 +3926,7 @@ static void __init find_usable_zone_for_movable(void) | |||
3856 | 3926 | ||
3857 | /* | 3927 | /* |
3858 | * The zone ranges provided by the architecture do not include ZONE_MOVABLE | 3928 | * The zone ranges provided by the architecture do not include ZONE_MOVABLE |
3859 | * because it is sized independant of architecture. Unlike the other zones, | 3929 | * because it is sized independent of architecture. Unlike the other zones, |
3860 | * the starting point for ZONE_MOVABLE is not fixed. It may be different | 3930 | * the starting point for ZONE_MOVABLE is not fixed. It may be different |
3861 | * in each node depending on the size of each node and how evenly kernelcore | 3931 | * in each node depending on the size of each node and how evenly kernelcore |
3862 | * is distributed. This helper function adjusts the zone ranges | 3932 | * is distributed. This helper function adjusts the zone ranges |
@@ -4809,15 +4879,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) | |||
4809 | dma_reserve = new_dma_reserve; | 4879 | dma_reserve = new_dma_reserve; |
4810 | } | 4880 | } |
4811 | 4881 | ||
4812 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
4813 | struct pglist_data __refdata contig_page_data = { | ||
4814 | #ifndef CONFIG_NO_BOOTMEM | ||
4815 | .bdata = &bootmem_node_data[0] | ||
4816 | #endif | ||
4817 | }; | ||
4818 | EXPORT_SYMBOL(contig_page_data); | ||
4819 | #endif | ||
4820 | |||
4821 | void __init free_area_init(unsigned long *zones_size) | 4882 | void __init free_area_init(unsigned long *zones_size) |
4822 | { | 4883 | { |
4823 | free_area_init_node(0, zones_size, | 4884 | free_area_init_node(0, zones_size, |
@@ -5376,10 +5437,9 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count) | |||
5376 | for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { | 5437 | for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { |
5377 | unsigned long check = pfn + iter; | 5438 | unsigned long check = pfn + iter; |
5378 | 5439 | ||
5379 | if (!pfn_valid_within(check)) { | 5440 | if (!pfn_valid_within(check)) |
5380 | iter++; | ||
5381 | continue; | 5441 | continue; |
5382 | } | 5442 | |
5383 | page = pfn_to_page(check); | 5443 | page = pfn_to_page(check); |
5384 | if (!page_count(page)) { | 5444 | if (!page_count(page)) { |
5385 | if (PageBuddy(page)) | 5445 | if (PageBuddy(page)) |
@@ -5627,4 +5687,5 @@ void dump_page(struct page *page) | |||
5627 | page, atomic_read(&page->_count), page_mapcount(page), | 5687 | page, atomic_read(&page->_count), page_mapcount(page), |
5628 | page->mapping, page->index); | 5688 | page->mapping, page->index); |
5629 | dump_page_flags(page->flags); | 5689 | dump_page_flags(page->flags); |
5690 | mem_cgroup_print_bad_page(page); | ||
5630 | } | 5691 | } |