aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c128
1 files changed, 101 insertions, 27 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bc2ac63f41e..8deb9d0fd5b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
48#include <linux/page_cgroup.h> 48#include <linux/page_cgroup.h>
49#include <linux/debugobjects.h> 49#include <linux/debugobjects.h>
50#include <linux/kmemleak.h> 50#include <linux/kmemleak.h>
51#include <linux/memory.h>
51#include <trace/events/kmem.h> 52#include <trace/events/kmem.h>
52 53
53#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
@@ -486,7 +487,6 @@ static inline void __free_one_page(struct page *page,
486 zone->free_area[order].nr_free++; 487 zone->free_area[order].nr_free++;
487} 488}
488 489
489#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
490/* 490/*
491 * free_page_mlock() -- clean up attempts to free and mlocked() page. 491 * free_page_mlock() -- clean up attempts to free and mlocked() page.
492 * Page should not be on lru, so no need to fix that up. 492 * Page should not be on lru, so no need to fix that up.
@@ -497,9 +497,6 @@ static inline void free_page_mlock(struct page *page)
497 __dec_zone_page_state(page, NR_MLOCK); 497 __dec_zone_page_state(page, NR_MLOCK);
498 __count_vm_event(UNEVICTABLE_MLOCKFREED); 498 __count_vm_event(UNEVICTABLE_MLOCKFREED);
499} 499}
500#else
501static void free_page_mlock(struct page *page) { }
502#endif
503 500
504static inline int free_pages_check(struct page *page) 501static inline int free_pages_check(struct page *page)
505{ 502{
@@ -559,8 +556,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
559 page = list_entry(list->prev, struct page, lru); 556 page = list_entry(list->prev, struct page, lru);
560 /* must delete as __free_one_page list manipulates */ 557 /* must delete as __free_one_page list manipulates */
561 list_del(&page->lru); 558 list_del(&page->lru);
562 __free_one_page(page, zone, 0, migratetype); 559 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
563 trace_mm_page_pcpu_drain(page, 0, migratetype); 560 __free_one_page(page, zone, 0, page_private(page));
561 trace_mm_page_pcpu_drain(page, 0, page_private(page));
564 } while (--count && --batch_free && !list_empty(list)); 562 } while (--count && --batch_free && !list_empty(list));
565 } 563 }
566 spin_unlock(&zone->lock); 564 spin_unlock(&zone->lock);
@@ -1225,10 +1223,10 @@ again:
1225 } 1223 }
1226 spin_lock_irqsave(&zone->lock, flags); 1224 spin_lock_irqsave(&zone->lock, flags);
1227 page = __rmqueue(zone, order, migratetype); 1225 page = __rmqueue(zone, order, migratetype);
1228 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
1229 spin_unlock(&zone->lock); 1226 spin_unlock(&zone->lock);
1230 if (!page) 1227 if (!page)
1231 goto failed; 1228 goto failed;
1229 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
1232 } 1230 }
1233 1231
1234 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1232 __count_zone_vm_events(PGALLOC, zone, 1 << order);
@@ -1658,12 +1656,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
1658 if (page) 1656 if (page)
1659 goto out; 1657 goto out;
1660 1658
1661 /* The OOM killer will not help higher order allocs */ 1659 if (!(gfp_mask & __GFP_NOFAIL)) {
1662 if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL)) 1660 /* The OOM killer will not help higher order allocs */
1663 goto out; 1661 if (order > PAGE_ALLOC_COSTLY_ORDER)
1664 1662 goto out;
1663 /*
1664 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
1665 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
1666 * The caller should handle page allocation failure by itself if
1667 * it specifies __GFP_THISNODE.
1668 * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
1669 */
1670 if (gfp_mask & __GFP_THISNODE)
1671 goto out;
1672 }
1665 /* Exhausted what can be done so it's blamo time */ 1673 /* Exhausted what can be done so it's blamo time */
1666 out_of_memory(zonelist, gfp_mask, order); 1674 out_of_memory(zonelist, gfp_mask, order, nodemask);
1667 1675
1668out: 1676out:
1669 clear_zonelist_oom(zonelist, gfp_mask); 1677 clear_zonelist_oom(zonelist, gfp_mask);
@@ -2395,13 +2403,14 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
2395{ 2403{
2396 char saved_string[NUMA_ZONELIST_ORDER_LEN]; 2404 char saved_string[NUMA_ZONELIST_ORDER_LEN];
2397 int ret; 2405 int ret;
2406 static DEFINE_MUTEX(zl_order_mutex);
2398 2407
2408 mutex_lock(&zl_order_mutex);
2399 if (write) 2409 if (write)
2400 strncpy(saved_string, (char*)table->data, 2410 strcpy(saved_string, (char*)table->data);
2401 NUMA_ZONELIST_ORDER_LEN);
2402 ret = proc_dostring(table, write, buffer, length, ppos); 2411 ret = proc_dostring(table, write, buffer, length, ppos);
2403 if (ret) 2412 if (ret)
2404 return ret; 2413 goto out;
2405 if (write) { 2414 if (write) {
2406 int oldval = user_zonelist_order; 2415 int oldval = user_zonelist_order;
2407 if (__parse_numa_zonelist_order((char*)table->data)) { 2416 if (__parse_numa_zonelist_order((char*)table->data)) {
@@ -2414,7 +2423,9 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
2414 } else if (oldval != user_zonelist_order) 2423 } else if (oldval != user_zonelist_order)
2415 build_all_zonelists(); 2424 build_all_zonelists();
2416 } 2425 }
2417 return 0; 2426out:
2427 mutex_unlock(&zl_order_mutex);
2428 return ret;
2418} 2429}
2419 2430
2420 2431
@@ -3127,7 +3138,7 @@ static int __cpuinit process_zones(int cpu)
3127 3138
3128 if (percpu_pagelist_fraction) 3139 if (percpu_pagelist_fraction)
3129 setup_pagelist_highmark(zone_pcp(zone, cpu), 3140 setup_pagelist_highmark(zone_pcp(zone, cpu),
3130 (zone->present_pages / percpu_pagelist_fraction)); 3141 (zone->present_pages / percpu_pagelist_fraction));
3131 } 3142 }
3132 3143
3133 return 0; 3144 return 0;
@@ -3573,7 +3584,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
3573 * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, 3584 * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
3574 * then all holes in the requested range will be accounted for. 3585 * then all holes in the requested range will be accounted for.
3575 */ 3586 */
3576static unsigned long __meminit __absent_pages_in_range(int nid, 3587unsigned long __meminit __absent_pages_in_range(int nid,
3577 unsigned long range_start_pfn, 3588 unsigned long range_start_pfn,
3578 unsigned long range_end_pfn) 3589 unsigned long range_end_pfn)
3579{ 3590{
@@ -3988,7 +3999,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
3988 } 3999 }
3989 4000
3990 /* Merge backward if suitable */ 4001 /* Merge backward if suitable */
3991 if (start_pfn < early_node_map[i].end_pfn && 4002 if (start_pfn < early_node_map[i].start_pfn &&
3992 end_pfn >= early_node_map[i].start_pfn) { 4003 end_pfn >= early_node_map[i].start_pfn) {
3993 early_node_map[i].start_pfn = start_pfn; 4004 early_node_map[i].start_pfn = start_pfn;
3994 return; 4005 return;
@@ -4102,7 +4113,7 @@ static int __init cmp_node_active_region(const void *a, const void *b)
4102} 4113}
4103 4114
4104/* sort the node_map by start_pfn */ 4115/* sort the node_map by start_pfn */
4105static void __init sort_node_map(void) 4116void __init sort_node_map(void)
4106{ 4117{
4107 sort(early_node_map, (size_t)nr_nodemap_entries, 4118 sort(early_node_map, (size_t)nr_nodemap_entries,
4108 sizeof(struct node_active_region), 4119 sizeof(struct node_active_region),
@@ -5002,23 +5013,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
5002int set_migratetype_isolate(struct page *page) 5013int set_migratetype_isolate(struct page *page)
5003{ 5014{
5004 struct zone *zone; 5015 struct zone *zone;
5005 unsigned long flags; 5016 struct page *curr_page;
5017 unsigned long flags, pfn, iter;
5018 unsigned long immobile = 0;
5019 struct memory_isolate_notify arg;
5020 int notifier_ret;
5006 int ret = -EBUSY; 5021 int ret = -EBUSY;
5007 int zone_idx; 5022 int zone_idx;
5008 5023
5009 zone = page_zone(page); 5024 zone = page_zone(page);
5010 zone_idx = zone_idx(zone); 5025 zone_idx = zone_idx(zone);
5026
5011 spin_lock_irqsave(&zone->lock, flags); 5027 spin_lock_irqsave(&zone->lock, flags);
5028 if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
5029 zone_idx == ZONE_MOVABLE) {
5030 ret = 0;
5031 goto out;
5032 }
5033
5034 pfn = page_to_pfn(page);
5035 arg.start_pfn = pfn;
5036 arg.nr_pages = pageblock_nr_pages;
5037 arg.pages_found = 0;
5038
5012 /* 5039 /*
5013 * In future, more migrate types will be able to be isolation target. 5040 * It may be possible to isolate a pageblock even if the
5041 * migratetype is not MIGRATE_MOVABLE. The memory isolation
5042 * notifier chain is used by balloon drivers to return the
5043 * number of pages in a range that are held by the balloon
5044 * driver to shrink memory. If all the pages are accounted for
5045 * by balloons, are free, or on the LRU, isolation can continue.
5046 * Later, for example, when memory hotplug notifier runs, these
5047 * pages reported as "can be isolated" should be isolated(freed)
5048 * by the balloon driver through the memory notifier chain.
5014 */ 5049 */
5015 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && 5050 notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
5016 zone_idx != ZONE_MOVABLE) 5051 notifier_ret = notifier_to_errno(notifier_ret);
5052 if (notifier_ret || !arg.pages_found)
5017 goto out; 5053 goto out;
5018 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 5054
5019 move_freepages_block(zone, page, MIGRATE_ISOLATE); 5055 for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
5020 ret = 0; 5056 if (!pfn_valid_within(pfn))
5057 continue;
5058
5059 curr_page = pfn_to_page(iter);
5060 if (!page_count(curr_page) || PageLRU(curr_page))
5061 continue;
5062
5063 immobile++;
5064 }
5065
5066 if (arg.pages_found == immobile)
5067 ret = 0;
5068
5021out: 5069out:
5070 if (!ret) {
5071 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
5072 move_freepages_block(zone, page, MIGRATE_ISOLATE);
5073 }
5074
5022 spin_unlock_irqrestore(&zone->lock, flags); 5075 spin_unlock_irqrestore(&zone->lock, flags);
5023 if (!ret) 5076 if (!ret)
5024 drain_all_pages(); 5077 drain_all_pages();
@@ -5085,3 +5138,24 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
5085 spin_unlock_irqrestore(&zone->lock, flags); 5138 spin_unlock_irqrestore(&zone->lock, flags);
5086} 5139}
5087#endif 5140#endif
5141
5142#ifdef CONFIG_MEMORY_FAILURE
5143bool is_free_buddy_page(struct page *page)
5144{
5145 struct zone *zone = page_zone(page);
5146 unsigned long pfn = page_to_pfn(page);
5147 unsigned long flags;
5148 int order;
5149
5150 spin_lock_irqsave(&zone->lock, flags);
5151 for (order = 0; order < MAX_ORDER; order++) {
5152 struct page *page_head = page - (pfn & ((1 << order) - 1));
5153
5154 if (PageBuddy(page_head) && page_order(page_head) >= order)
5155 break;
5156 }
5157 spin_unlock_irqrestore(&zone->lock, flags);
5158
5159 return order < MAX_ORDER;
5160}
5161#endif