aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c123
1 files changed, 98 insertions, 25 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2bc2ac63f41e..d2a8889b4c58 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
48#include <linux/page_cgroup.h> 48#include <linux/page_cgroup.h>
49#include <linux/debugobjects.h> 49#include <linux/debugobjects.h>
50#include <linux/kmemleak.h> 50#include <linux/kmemleak.h>
51#include <linux/memory.h>
51#include <trace/events/kmem.h> 52#include <trace/events/kmem.h>
52 53
53#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
@@ -486,7 +487,6 @@ static inline void __free_one_page(struct page *page,
486 zone->free_area[order].nr_free++; 487 zone->free_area[order].nr_free++;
487} 488}
488 489
489#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
490/* 490/*
491 * free_page_mlock() -- clean up attempts to free and mlocked() page. 491 * free_page_mlock() -- clean up attempts to free and mlocked() page.
492 * Page should not be on lru, so no need to fix that up. 492 * Page should not be on lru, so no need to fix that up.
@@ -497,9 +497,6 @@ static inline void free_page_mlock(struct page *page)
497 __dec_zone_page_state(page, NR_MLOCK); 497 __dec_zone_page_state(page, NR_MLOCK);
498 __count_vm_event(UNEVICTABLE_MLOCKFREED); 498 __count_vm_event(UNEVICTABLE_MLOCKFREED);
499} 499}
500#else
501static void free_page_mlock(struct page *page) { }
502#endif
503 500
504static inline int free_pages_check(struct page *page) 501static inline int free_pages_check(struct page *page)
505{ 502{
@@ -1225,10 +1222,10 @@ again:
1225 } 1222 }
1226 spin_lock_irqsave(&zone->lock, flags); 1223 spin_lock_irqsave(&zone->lock, flags);
1227 page = __rmqueue(zone, order, migratetype); 1224 page = __rmqueue(zone, order, migratetype);
1228 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
1229 spin_unlock(&zone->lock); 1225 spin_unlock(&zone->lock);
1230 if (!page) 1226 if (!page)
1231 goto failed; 1227 goto failed;
1228 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
1232 } 1229 }
1233 1230
1234 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1231 __count_zone_vm_events(PGALLOC, zone, 1 << order);
@@ -1658,12 +1655,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
1658 if (page) 1655 if (page)
1659 goto out; 1656 goto out;
1660 1657
1661 /* The OOM killer will not help higher order allocs */ 1658 if (!(gfp_mask & __GFP_NOFAIL)) {
1662 if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL)) 1659 /* The OOM killer will not help higher order allocs */
1663 goto out; 1660 if (order > PAGE_ALLOC_COSTLY_ORDER)
1664 1661 goto out;
1662 /*
1663 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
1664 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
1665 * The caller should handle page allocation failure by itself if
1666 * it specifies __GFP_THISNODE.
1667 * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
1668 */
1669 if (gfp_mask & __GFP_THISNODE)
1670 goto out;
1671 }
1665 /* Exhausted what can be done so it's blamo time */ 1672 /* Exhausted what can be done so it's blamo time */
1666 out_of_memory(zonelist, gfp_mask, order); 1673 out_of_memory(zonelist, gfp_mask, order, nodemask);
1667 1674
1668out: 1675out:
1669 clear_zonelist_oom(zonelist, gfp_mask); 1676 clear_zonelist_oom(zonelist, gfp_mask);
@@ -2395,13 +2402,14 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
2395{ 2402{
2396 char saved_string[NUMA_ZONELIST_ORDER_LEN]; 2403 char saved_string[NUMA_ZONELIST_ORDER_LEN];
2397 int ret; 2404 int ret;
2405 static DEFINE_MUTEX(zl_order_mutex);
2398 2406
2407 mutex_lock(&zl_order_mutex);
2399 if (write) 2408 if (write)
2400 strncpy(saved_string, (char*)table->data, 2409 strcpy(saved_string, (char*)table->data);
2401 NUMA_ZONELIST_ORDER_LEN);
2402 ret = proc_dostring(table, write, buffer, length, ppos); 2410 ret = proc_dostring(table, write, buffer, length, ppos);
2403 if (ret) 2411 if (ret)
2404 return ret; 2412 goto out;
2405 if (write) { 2413 if (write) {
2406 int oldval = user_zonelist_order; 2414 int oldval = user_zonelist_order;
2407 if (__parse_numa_zonelist_order((char*)table->data)) { 2415 if (__parse_numa_zonelist_order((char*)table->data)) {
@@ -2414,7 +2422,9 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
2414 } else if (oldval != user_zonelist_order) 2422 } else if (oldval != user_zonelist_order)
2415 build_all_zonelists(); 2423 build_all_zonelists();
2416 } 2424 }
2417 return 0; 2425out:
2426 mutex_unlock(&zl_order_mutex);
2427 return ret;
2418} 2428}
2419 2429
2420 2430
@@ -3127,7 +3137,7 @@ static int __cpuinit process_zones(int cpu)
3127 3137
3128 if (percpu_pagelist_fraction) 3138 if (percpu_pagelist_fraction)
3129 setup_pagelist_highmark(zone_pcp(zone, cpu), 3139 setup_pagelist_highmark(zone_pcp(zone, cpu),
3130 (zone->present_pages / percpu_pagelist_fraction)); 3140 (zone->present_pages / percpu_pagelist_fraction));
3131 } 3141 }
3132 3142
3133 return 0; 3143 return 0;
@@ -3573,7 +3583,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
3573 * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, 3583 * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
3574 * then all holes in the requested range will be accounted for. 3584 * then all holes in the requested range will be accounted for.
3575 */ 3585 */
3576static unsigned long __meminit __absent_pages_in_range(int nid, 3586unsigned long __meminit __absent_pages_in_range(int nid,
3577 unsigned long range_start_pfn, 3587 unsigned long range_start_pfn,
3578 unsigned long range_end_pfn) 3588 unsigned long range_end_pfn)
3579{ 3589{
@@ -3988,7 +3998,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
3988 } 3998 }
3989 3999
3990 /* Merge backward if suitable */ 4000 /* Merge backward if suitable */
3991 if (start_pfn < early_node_map[i].end_pfn && 4001 if (start_pfn < early_node_map[i].start_pfn &&
3992 end_pfn >= early_node_map[i].start_pfn) { 4002 end_pfn >= early_node_map[i].start_pfn) {
3993 early_node_map[i].start_pfn = start_pfn; 4003 early_node_map[i].start_pfn = start_pfn;
3994 return; 4004 return;
@@ -4102,7 +4112,7 @@ static int __init cmp_node_active_region(const void *a, const void *b)
4102} 4112}
4103 4113
4104/* sort the node_map by start_pfn */ 4114/* sort the node_map by start_pfn */
4105static void __init sort_node_map(void) 4115void __init sort_node_map(void)
4106{ 4116{
4107 sort(early_node_map, (size_t)nr_nodemap_entries, 4117 sort(early_node_map, (size_t)nr_nodemap_entries,
4108 sizeof(struct node_active_region), 4118 sizeof(struct node_active_region),
@@ -5002,23 +5012,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
5002int set_migratetype_isolate(struct page *page) 5012int set_migratetype_isolate(struct page *page)
5003{ 5013{
5004 struct zone *zone; 5014 struct zone *zone;
5005 unsigned long flags; 5015 struct page *curr_page;
5016 unsigned long flags, pfn, iter;
5017 unsigned long immobile = 0;
5018 struct memory_isolate_notify arg;
5019 int notifier_ret;
5006 int ret = -EBUSY; 5020 int ret = -EBUSY;
5007 int zone_idx; 5021 int zone_idx;
5008 5022
5009 zone = page_zone(page); 5023 zone = page_zone(page);
5010 zone_idx = zone_idx(zone); 5024 zone_idx = zone_idx(zone);
5025
5011 spin_lock_irqsave(&zone->lock, flags); 5026 spin_lock_irqsave(&zone->lock, flags);
5027 if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
5028 zone_idx == ZONE_MOVABLE) {
5029 ret = 0;
5030 goto out;
5031 }
5032
5033 pfn = page_to_pfn(page);
5034 arg.start_pfn = pfn;
5035 arg.nr_pages = pageblock_nr_pages;
5036 arg.pages_found = 0;
5037
5012 /* 5038 /*
5013 * In future, more migrate types will be able to be isolation target. 5039 * It may be possible to isolate a pageblock even if the
5040 * migratetype is not MIGRATE_MOVABLE. The memory isolation
5041 * notifier chain is used by balloon drivers to return the
5042 * number of pages in a range that are held by the balloon
5043 * driver to shrink memory. If all the pages are accounted for
5044 * by balloons, are free, or on the LRU, isolation can continue.
5045 * Later, for example, when memory hotplug notifier runs, these
5046 * pages reported as "can be isolated" should be isolated(freed)
5047 * by the balloon driver through the memory notifier chain.
5014 */ 5048 */
5015 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && 5049 notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
5016 zone_idx != ZONE_MOVABLE) 5050 notifier_ret = notifier_to_errno(notifier_ret);
5051 if (notifier_ret || !arg.pages_found)
5017 goto out; 5052 goto out;
5018 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 5053
5019 move_freepages_block(zone, page, MIGRATE_ISOLATE); 5054 for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
5020 ret = 0; 5055 if (!pfn_valid_within(pfn))
5056 continue;
5057
5058 curr_page = pfn_to_page(iter);
5059 if (!page_count(curr_page) || PageLRU(curr_page))
5060 continue;
5061
5062 immobile++;
5063 }
5064
5065 if (arg.pages_found == immobile)
5066 ret = 0;
5067
5021out: 5068out:
5069 if (!ret) {
5070 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
5071 move_freepages_block(zone, page, MIGRATE_ISOLATE);
5072 }
5073
5022 spin_unlock_irqrestore(&zone->lock, flags); 5074 spin_unlock_irqrestore(&zone->lock, flags);
5023 if (!ret) 5075 if (!ret)
5024 drain_all_pages(); 5076 drain_all_pages();
@@ -5085,3 +5137,24 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
5085 spin_unlock_irqrestore(&zone->lock, flags); 5137 spin_unlock_irqrestore(&zone->lock, flags);
5086} 5138}
5087#endif 5139#endif
5140
5141#ifdef CONFIG_MEMORY_FAILURE
5142bool is_free_buddy_page(struct page *page)
5143{
5144 struct zone *zone = page_zone(page);
5145 unsigned long pfn = page_to_pfn(page);
5146 unsigned long flags;
5147 int order;
5148
5149 spin_lock_irqsave(&zone->lock, flags);
5150 for (order = 0; order < MAX_ORDER; order++) {
5151 struct page *page_head = page - (pfn & ((1 << order) - 1));
5152
5153 if (PageBuddy(page_head) && page_order(page_head) >= order)
5154 break;
5155 }
5156 spin_unlock_irqrestore(&zone->lock, flags);
5157
5158 return order < MAX_ORDER;
5159}
5160#endif