aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c152
1 files changed, 123 insertions, 29 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7945247b1e53..3f8bce264df6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -53,6 +53,7 @@
53#include <linux/compaction.h> 53#include <linux/compaction.h>
54#include <trace/events/kmem.h> 54#include <trace/events/kmem.h>
55#include <linux/ftrace_event.h> 55#include <linux/ftrace_event.h>
56#include <linux/memcontrol.h>
56 57
57#include <asm/tlbflush.h> 58#include <asm/tlbflush.h>
58#include <asm/div64.h> 59#include <asm/div64.h>
@@ -565,7 +566,8 @@ static inline int free_pages_check(struct page *page)
565 if (unlikely(page_mapcount(page) | 566 if (unlikely(page_mapcount(page) |
566 (page->mapping != NULL) | 567 (page->mapping != NULL) |
567 (atomic_read(&page->_count) != 0) | 568 (atomic_read(&page->_count) != 0) |
568 (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) { 569 (page->flags & PAGE_FLAGS_CHECK_AT_FREE) |
570 (mem_cgroup_bad_page_check(page)))) {
569 bad_page(page); 571 bad_page(page);
570 return 1; 572 return 1;
571 } 573 }
@@ -614,6 +616,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
614 list = &pcp->lists[migratetype]; 616 list = &pcp->lists[migratetype];
615 } while (list_empty(list)); 617 } while (list_empty(list));
616 618
619 /* This is the only non-empty list. Free them all. */
620 if (batch_free == MIGRATE_PCPTYPES)
621 batch_free = to_free;
622
617 do { 623 do {
618 page = list_entry(list->prev, struct page, lru); 624 page = list_entry(list->prev, struct page, lru);
619 /* must delete as __free_one_page list manipulates */ 625 /* must delete as __free_one_page list manipulates */
@@ -750,7 +756,8 @@ static inline int check_new_page(struct page *page)
750 if (unlikely(page_mapcount(page) | 756 if (unlikely(page_mapcount(page) |
751 (page->mapping != NULL) | 757 (page->mapping != NULL) |
752 (atomic_read(&page->_count) != 0) | 758 (atomic_read(&page->_count) != 0) |
753 (page->flags & PAGE_FLAGS_CHECK_AT_PREP))) { 759 (page->flags & PAGE_FLAGS_CHECK_AT_PREP) |
760 (mem_cgroup_bad_page_check(page)))) {
754 bad_page(page); 761 bad_page(page);
755 return 1; 762 return 1;
756 } 763 }
@@ -863,9 +870,8 @@ static int move_freepages(struct zone *zone,
863 } 870 }
864 871
865 order = page_order(page); 872 order = page_order(page);
866 list_del(&page->lru); 873 list_move(&page->lru,
867 list_add(&page->lru, 874 &zone->free_area[order].free_list[migratetype]);
868 &zone->free_area[order].free_list[migratetype]);
869 page += 1 << order; 875 page += 1 << order;
870 pages_moved += 1 << order; 876 pages_moved += 1 << order;
871 } 877 }
@@ -936,7 +942,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
936 * If breaking a large block of pages, move all free 942 * If breaking a large block of pages, move all free
937 * pages to the preferred allocation list. If falling 943 * pages to the preferred allocation list. If falling
938 * back for a reclaimable kernel allocation, be more 944 * back for a reclaimable kernel allocation, be more
939 * agressive about taking ownership of free pages 945 * aggressive about taking ownership of free pages
940 */ 946 */
941 if (unlikely(current_order >= (pageblock_order >> 1)) || 947 if (unlikely(current_order >= (pageblock_order >> 1)) ||
942 start_migratetype == MIGRATE_RECLAIMABLE || 948 start_migratetype == MIGRATE_RECLAIMABLE ||
@@ -1333,7 +1339,7 @@ again:
1333 } 1339 }
1334 1340
1335 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1341 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1336 zone_statistics(preferred_zone, zone); 1342 zone_statistics(preferred_zone, zone, gfp_flags);
1337 local_irq_restore(flags); 1343 local_irq_restore(flags);
1338 1344
1339 VM_BUG_ON(bad_range(zone, page)); 1345 VM_BUG_ON(bad_range(zone, page));
@@ -1714,6 +1720,20 @@ try_next_zone:
1714 return page; 1720 return page;
1715} 1721}
1716 1722
1723/*
1724 * Large machines with many possible nodes should not always dump per-node
1725 * meminfo in irq context.
1726 */
1727static inline bool should_suppress_show_mem(void)
1728{
1729 bool ret = false;
1730
1731#if NODES_SHIFT > 8
1732 ret = in_interrupt();
1733#endif
1734 return ret;
1735}
1736
1717static inline int 1737static inline int
1718should_alloc_retry(gfp_t gfp_mask, unsigned int order, 1738should_alloc_retry(gfp_t gfp_mask, unsigned int order,
1719 unsigned long pages_reclaimed) 1739 unsigned long pages_reclaimed)
@@ -2085,7 +2105,7 @@ rebalance:
2085 sync_migration); 2105 sync_migration);
2086 if (page) 2106 if (page)
2087 goto got_pg; 2107 goto got_pg;
2088 sync_migration = true; 2108 sync_migration = !(gfp_mask & __GFP_NO_KSWAPD);
2089 2109
2090 /* Try direct reclaim and then allocating */ 2110 /* Try direct reclaim and then allocating */
2091 page = __alloc_pages_direct_reclaim(gfp_mask, order, 2111 page = __alloc_pages_direct_reclaim(gfp_mask, order,
@@ -2157,11 +2177,25 @@ rebalance:
2157 2177
2158nopage: 2178nopage:
2159 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { 2179 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
2160 printk(KERN_WARNING "%s: page allocation failure." 2180 unsigned int filter = SHOW_MEM_FILTER_NODES;
2161 " order:%d, mode:0x%x\n", 2181
2182 /*
2183 * This documents exceptions given to allocations in certain
2184 * contexts that are allowed to allocate outside current's set
2185 * of allowed nodes.
2186 */
2187 if (!(gfp_mask & __GFP_NOMEMALLOC))
2188 if (test_thread_flag(TIF_MEMDIE) ||
2189 (current->flags & (PF_MEMALLOC | PF_EXITING)))
2190 filter &= ~SHOW_MEM_FILTER_NODES;
2191 if (in_interrupt() || !wait)
2192 filter &= ~SHOW_MEM_FILTER_NODES;
2193
2194 pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n",
2162 current->comm, order, gfp_mask); 2195 current->comm, order, gfp_mask);
2163 dump_stack(); 2196 dump_stack();
2164 show_mem(); 2197 if (!should_suppress_show_mem())
2198 show_mem(filter);
2165 } 2199 }
2166 return page; 2200 return page;
2167got_pg: 2201got_pg:
@@ -2283,6 +2317,21 @@ void free_pages(unsigned long addr, unsigned int order)
2283 2317
2284EXPORT_SYMBOL(free_pages); 2318EXPORT_SYMBOL(free_pages);
2285 2319
2320static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
2321{
2322 if (addr) {
2323 unsigned long alloc_end = addr + (PAGE_SIZE << order);
2324 unsigned long used = addr + PAGE_ALIGN(size);
2325
2326 split_page(virt_to_page((void *)addr), order);
2327 while (used < alloc_end) {
2328 free_page(used);
2329 used += PAGE_SIZE;
2330 }
2331 }
2332 return (void *)addr;
2333}
2334
2286/** 2335/**
2287 * alloc_pages_exact - allocate an exact number physically-contiguous pages. 2336 * alloc_pages_exact - allocate an exact number physically-contiguous pages.
2288 * @size: the number of bytes to allocate 2337 * @size: the number of bytes to allocate
@@ -2302,22 +2351,33 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
2302 unsigned long addr; 2351 unsigned long addr;
2303 2352
2304 addr = __get_free_pages(gfp_mask, order); 2353 addr = __get_free_pages(gfp_mask, order);
2305 if (addr) { 2354 return make_alloc_exact(addr, order, size);
2306 unsigned long alloc_end = addr + (PAGE_SIZE << order);
2307 unsigned long used = addr + PAGE_ALIGN(size);
2308
2309 split_page(virt_to_page((void *)addr), order);
2310 while (used < alloc_end) {
2311 free_page(used);
2312 used += PAGE_SIZE;
2313 }
2314 }
2315
2316 return (void *)addr;
2317} 2355}
2318EXPORT_SYMBOL(alloc_pages_exact); 2356EXPORT_SYMBOL(alloc_pages_exact);
2319 2357
2320/** 2358/**
2359 * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
2360 * pages on a node.
2361 * @nid: the preferred node ID where memory should be allocated
2362 * @size: the number of bytes to allocate
2363 * @gfp_mask: GFP flags for the allocation
2364 *
2365 * Like alloc_pages_exact(), but try to allocate on node nid first before falling
2366 * back.
2367 * Note this is not alloc_pages_exact_node() which allocates on a specific node,
2368 * but is not exact.
2369 */
2370void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
2371{
2372 unsigned order = get_order(size);
2373 struct page *p = alloc_pages_node(nid, gfp_mask, order);
2374 if (!p)
2375 return NULL;
2376 return make_alloc_exact((unsigned long)page_address(p), order, size);
2377}
2378EXPORT_SYMBOL(alloc_pages_exact_nid);
2379
2380/**
2321 * free_pages_exact - release memory allocated via alloc_pages_exact() 2381 * free_pages_exact - release memory allocated via alloc_pages_exact()
2322 * @virt: the value returned by alloc_pages_exact. 2382 * @virt: the value returned by alloc_pages_exact.
2323 * @size: size of allocation, same value as passed to alloc_pages_exact(). 2383 * @size: size of allocation, same value as passed to alloc_pages_exact().
@@ -2411,19 +2471,42 @@ void si_meminfo_node(struct sysinfo *val, int nid)
2411} 2471}
2412#endif 2472#endif
2413 2473
2474/*
2475 * Determine whether the zone's node should be displayed or not, depending on
2476 * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas().
2477 */
2478static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
2479{
2480 bool ret = false;
2481
2482 if (!(flags & SHOW_MEM_FILTER_NODES))
2483 goto out;
2484
2485 get_mems_allowed();
2486 ret = !node_isset(zone->zone_pgdat->node_id,
2487 cpuset_current_mems_allowed);
2488 put_mems_allowed();
2489out:
2490 return ret;
2491}
2492
2414#define K(x) ((x) << (PAGE_SHIFT-10)) 2493#define K(x) ((x) << (PAGE_SHIFT-10))
2415 2494
2416/* 2495/*
2417 * Show free area list (used inside shift_scroll-lock stuff) 2496 * Show free area list (used inside shift_scroll-lock stuff)
2418 * We also calculate the percentage fragmentation. We do this by counting the 2497 * We also calculate the percentage fragmentation. We do this by counting the
2419 * memory on each free list with the exception of the first item on the list. 2498 * memory on each free list with the exception of the first item on the list.
2499 * Suppresses nodes that are not allowed by current's cpuset if
2500 * SHOW_MEM_FILTER_NODES is passed.
2420 */ 2501 */
2421void show_free_areas(void) 2502void __show_free_areas(unsigned int filter)
2422{ 2503{
2423 int cpu; 2504 int cpu;
2424 struct zone *zone; 2505 struct zone *zone;
2425 2506
2426 for_each_populated_zone(zone) { 2507 for_each_populated_zone(zone) {
2508 if (skip_free_areas_zone(filter, zone))
2509 continue;
2427 show_node(zone); 2510 show_node(zone);
2428 printk("%s per-cpu:\n", zone->name); 2511 printk("%s per-cpu:\n", zone->name);
2429 2512
@@ -2465,6 +2548,8 @@ void show_free_areas(void)
2465 for_each_populated_zone(zone) { 2548 for_each_populated_zone(zone) {
2466 int i; 2549 int i;
2467 2550
2551 if (skip_free_areas_zone(filter, zone))
2552 continue;
2468 show_node(zone); 2553 show_node(zone);
2469 printk("%s" 2554 printk("%s"
2470 " free:%lukB" 2555 " free:%lukB"
@@ -2532,6 +2617,8 @@ void show_free_areas(void)
2532 for_each_populated_zone(zone) { 2617 for_each_populated_zone(zone) {
2533 unsigned long nr[MAX_ORDER], flags, order, total = 0; 2618 unsigned long nr[MAX_ORDER], flags, order, total = 0;
2534 2619
2620 if (skip_free_areas_zone(filter, zone))
2621 continue;
2535 show_node(zone); 2622 show_node(zone);
2536 printk("%s: ", zone->name); 2623 printk("%s: ", zone->name);
2537 2624
@@ -2551,6 +2638,11 @@ void show_free_areas(void)
2551 show_swap_cache_info(); 2638 show_swap_cache_info();
2552} 2639}
2553 2640
2641void show_free_areas(void)
2642{
2643 __show_free_areas(0);
2644}
2645
2554static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) 2646static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
2555{ 2647{
2556 zoneref->zone = zone; 2648 zoneref->zone = zone;
@@ -3110,7 +3202,7 @@ static __init_refok int __build_all_zonelists(void *data)
3110 * Called with zonelists_mutex held always 3202 * Called with zonelists_mutex held always
3111 * unless system_state == SYSTEM_BOOTING. 3203 * unless system_state == SYSTEM_BOOTING.
3112 */ 3204 */
3113void build_all_zonelists(void *data) 3205void __ref build_all_zonelists(void *data)
3114{ 3206{
3115 set_zonelist_order(); 3207 set_zonelist_order();
3116 3208
@@ -3498,7 +3590,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
3498 3590
3499 if (!slab_is_available()) { 3591 if (!slab_is_available()) {
3500 zone->wait_table = (wait_queue_head_t *) 3592 zone->wait_table = (wait_queue_head_t *)
3501 alloc_bootmem_node(pgdat, alloc_size); 3593 alloc_bootmem_node_nopanic(pgdat, alloc_size);
3502 } else { 3594 } else {
3503 /* 3595 /*
3504 * This case means that a zone whose size was 0 gets new memory 3596 * This case means that a zone whose size was 0 gets new memory
@@ -3860,7 +3952,7 @@ static void __init find_usable_zone_for_movable(void)
3860 3952
3861/* 3953/*
3862 * The zone ranges provided by the architecture do not include ZONE_MOVABLE 3954 * The zone ranges provided by the architecture do not include ZONE_MOVABLE
3863 * because it is sized independant of architecture. Unlike the other zones, 3955 * because it is sized independent of architecture. Unlike the other zones,
3864 * the starting point for ZONE_MOVABLE is not fixed. It may be different 3956 * the starting point for ZONE_MOVABLE is not fixed. It may be different
3865 * in each node depending on the size of each node and how evenly kernelcore 3957 * in each node depending on the size of each node and how evenly kernelcore
3866 * is distributed. This helper function adjusts the zone ranges 3958 * is distributed. This helper function adjusts the zone ranges
@@ -4075,7 +4167,8 @@ static void __init setup_usemap(struct pglist_data *pgdat,
4075 unsigned long usemapsize = usemap_size(zonesize); 4167 unsigned long usemapsize = usemap_size(zonesize);
4076 zone->pageblock_flags = NULL; 4168 zone->pageblock_flags = NULL;
4077 if (usemapsize) 4169 if (usemapsize)
4078 zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); 4170 zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
4171 usemapsize);
4079} 4172}
4080#else 4173#else
4081static inline void setup_usemap(struct pglist_data *pgdat, 4174static inline void setup_usemap(struct pglist_data *pgdat,
@@ -4241,7 +4334,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
4241 size = (end - start) * sizeof(struct page); 4334 size = (end - start) * sizeof(struct page);
4242 map = alloc_remap(pgdat->node_id, size); 4335 map = alloc_remap(pgdat->node_id, size);
4243 if (!map) 4336 if (!map)
4244 map = alloc_bootmem_node(pgdat, size); 4337 map = alloc_bootmem_node_nopanic(pgdat, size);
4245 pgdat->node_mem_map = map + (pgdat->node_start_pfn - start); 4338 pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
4246 } 4339 }
4247#ifndef CONFIG_NEED_MULTIPLE_NODES 4340#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -5621,4 +5714,5 @@ void dump_page(struct page *page)
5621 page, atomic_read(&page->_count), page_mapcount(page), 5714 page, atomic_read(&page->_count), page_mapcount(page),
5622 page->mapping, page->index); 5715 page->mapping, page->index);
5623 dump_page_flags(page->flags); 5716 dump_page_flags(page->flags);
5717 mem_cgroup_print_bad_page(page);
5624} 5718}