aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-01-04 19:17:33 -0500
committerTejun Heo <tj@kernel.org>2010-01-04 19:17:33 -0500
commit32032df6c2f6c9c6b2ada2ce42322231824f70c2 (patch)
treeb1ce838a37044bb38dfc128e2116ca35630e629a /mm/page_alloc.c
parent22b737f4c75197372d64afc6ed1bccd58c00e549 (diff)
parentc5974b835a909ff15c3b7e6cf6789b5eb919f419 (diff)
Merge branch 'master' into percpu
Conflicts: arch/powerpc/platforms/pseries/hvCall.S include/linux/percpu.h
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c126
1 files changed, 99 insertions, 27 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bf720550b44d..4e9f5cc5fb59 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
48#include <linux/page_cgroup.h> 48#include <linux/page_cgroup.h>
49#include <linux/debugobjects.h> 49#include <linux/debugobjects.h>
50#include <linux/kmemleak.h> 50#include <linux/kmemleak.h>
51#include <linux/memory.h>
51#include <trace/events/kmem.h> 52#include <trace/events/kmem.h>
52 53
53#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
@@ -486,7 +487,6 @@ static inline void __free_one_page(struct page *page,
486 zone->free_area[order].nr_free++; 487 zone->free_area[order].nr_free++;
487} 488}
488 489
489#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
490/* 490/*
491 * free_page_mlock() -- clean up attempts to free and mlocked() page. 491 * free_page_mlock() -- clean up attempts to free and mlocked() page.
492 * Page should not be on lru, so no need to fix that up. 492 * Page should not be on lru, so no need to fix that up.
@@ -497,9 +497,6 @@ static inline void free_page_mlock(struct page *page)
497 __dec_zone_page_state(page, NR_MLOCK); 497 __dec_zone_page_state(page, NR_MLOCK);
498 __count_vm_event(UNEVICTABLE_MLOCKFREED); 498 __count_vm_event(UNEVICTABLE_MLOCKFREED);
499} 499}
500#else
501static void free_page_mlock(struct page *page) { }
502#endif
503 500
504static inline int free_pages_check(struct page *page) 501static inline int free_pages_check(struct page *page)
505{ 502{
@@ -1658,12 +1655,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
1658 if (page) 1655 if (page)
1659 goto out; 1656 goto out;
1660 1657
1661 /* The OOM killer will not help higher order allocs */ 1658 if (!(gfp_mask & __GFP_NOFAIL)) {
1662 if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL)) 1659 /* The OOM killer will not help higher order allocs */
1663 goto out; 1660 if (order > PAGE_ALLOC_COSTLY_ORDER)
1664 1661 goto out;
1662 /*
1663 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
1664 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
1665 * The caller should handle page allocation failure by itself if
1666 * it specifies __GFP_THISNODE.
1667 * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
1668 */
1669 if (gfp_mask & __GFP_THISNODE)
1670 goto out;
1671 }
1665 /* Exhausted what can be done so it's blamo time */ 1672 /* Exhausted what can be done so it's blamo time */
1666 out_of_memory(zonelist, gfp_mask, order); 1673 out_of_memory(zonelist, gfp_mask, order, nodemask);
1667 1674
1668out: 1675out:
1669 clear_zonelist_oom(zonelist, gfp_mask); 1676 clear_zonelist_oom(zonelist, gfp_mask);
@@ -1769,7 +1776,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
1769 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 1776 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
1770 */ 1777 */
1771 alloc_flags &= ~ALLOC_CPUSET; 1778 alloc_flags &= ~ALLOC_CPUSET;
1772 } else if (unlikely(rt_task(p))) 1779 } else if (unlikely(rt_task(p)) && !in_interrupt())
1773 alloc_flags |= ALLOC_HARDER; 1780 alloc_flags |= ALLOC_HARDER;
1774 1781
1775 if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { 1782 if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) {
@@ -1817,9 +1824,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1817 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) 1824 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
1818 goto nopage; 1825 goto nopage;
1819 1826
1827restart:
1820 wake_all_kswapd(order, zonelist, high_zoneidx); 1828 wake_all_kswapd(order, zonelist, high_zoneidx);
1821 1829
1822restart:
1823 /* 1830 /*
1824 * OK, we're below the kswapd watermark and have kicked background 1831 * OK, we're below the kswapd watermark and have kicked background
1825 * reclaim. Now things get more complex, so set up alloc_flags according 1832 * reclaim. Now things get more complex, so set up alloc_flags according
@@ -2183,7 +2190,7 @@ void show_free_areas(void)
2183 printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" 2190 printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
2184 " active_file:%lu inactive_file:%lu isolated_file:%lu\n" 2191 " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
2185 " unevictable:%lu" 2192 " unevictable:%lu"
2186 " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" 2193 " dirty:%lu writeback:%lu unstable:%lu\n"
2187 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" 2194 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
2188 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", 2195 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
2189 global_page_state(NR_ACTIVE_ANON), 2196 global_page_state(NR_ACTIVE_ANON),
@@ -2196,7 +2203,6 @@ void show_free_areas(void)
2196 global_page_state(NR_FILE_DIRTY), 2203 global_page_state(NR_FILE_DIRTY),
2197 global_page_state(NR_WRITEBACK), 2204 global_page_state(NR_WRITEBACK),
2198 global_page_state(NR_UNSTABLE_NFS), 2205 global_page_state(NR_UNSTABLE_NFS),
2199 nr_blockdev_pages(),
2200 global_page_state(NR_FREE_PAGES), 2206 global_page_state(NR_FREE_PAGES),
2201 global_page_state(NR_SLAB_RECLAIMABLE), 2207 global_page_state(NR_SLAB_RECLAIMABLE),
2202 global_page_state(NR_SLAB_UNRECLAIMABLE), 2208 global_page_state(NR_SLAB_UNRECLAIMABLE),
@@ -2396,13 +2402,14 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
2396{ 2402{
2397 char saved_string[NUMA_ZONELIST_ORDER_LEN]; 2403 char saved_string[NUMA_ZONELIST_ORDER_LEN];
2398 int ret; 2404 int ret;
2405 static DEFINE_MUTEX(zl_order_mutex);
2399 2406
2407 mutex_lock(&zl_order_mutex);
2400 if (write) 2408 if (write)
2401 strncpy(saved_string, (char*)table->data, 2409 strcpy(saved_string, (char*)table->data);
2402 NUMA_ZONELIST_ORDER_LEN);
2403 ret = proc_dostring(table, write, buffer, length, ppos); 2410 ret = proc_dostring(table, write, buffer, length, ppos);
2404 if (ret) 2411 if (ret)
2405 return ret; 2412 goto out;
2406 if (write) { 2413 if (write) {
2407 int oldval = user_zonelist_order; 2414 int oldval = user_zonelist_order;
2408 if (__parse_numa_zonelist_order((char*)table->data)) { 2415 if (__parse_numa_zonelist_order((char*)table->data)) {
@@ -2415,7 +2422,9 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
2415 } else if (oldval != user_zonelist_order) 2422 } else if (oldval != user_zonelist_order)
2416 build_all_zonelists(); 2423 build_all_zonelists();
2417 } 2424 }
2418 return 0; 2425out:
2426 mutex_unlock(&zl_order_mutex);
2427 return ret;
2419} 2428}
2420 2429
2421 2430
@@ -3128,7 +3137,7 @@ static int __cpuinit process_zones(int cpu)
3128 3137
3129 if (percpu_pagelist_fraction) 3138 if (percpu_pagelist_fraction)
3130 setup_pagelist_highmark(zone_pcp(zone, cpu), 3139 setup_pagelist_highmark(zone_pcp(zone, cpu),
3131 (zone->present_pages / percpu_pagelist_fraction)); 3140 (zone->present_pages / percpu_pagelist_fraction));
3132 } 3141 }
3133 3142
3134 return 0; 3143 return 0;
@@ -3574,7 +3583,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
3574 * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, 3583 * Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
3575 * then all holes in the requested range will be accounted for. 3584 * then all holes in the requested range will be accounted for.
3576 */ 3585 */
3577static unsigned long __meminit __absent_pages_in_range(int nid, 3586unsigned long __meminit __absent_pages_in_range(int nid,
3578 unsigned long range_start_pfn, 3587 unsigned long range_start_pfn,
3579 unsigned long range_end_pfn) 3588 unsigned long range_end_pfn)
3580{ 3589{
@@ -4103,7 +4112,7 @@ static int __init cmp_node_active_region(const void *a, const void *b)
4103} 4112}
4104 4113
4105/* sort the node_map by start_pfn */ 4114/* sort the node_map by start_pfn */
4106static void __init sort_node_map(void) 4115void __init sort_node_map(void)
4107{ 4116{
4108 sort(early_node_map, (size_t)nr_nodemap_entries, 4117 sort(early_node_map, (size_t)nr_nodemap_entries,
4109 sizeof(struct node_active_region), 4118 sizeof(struct node_active_region),
@@ -5003,23 +5012,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
5003int set_migratetype_isolate(struct page *page) 5012int set_migratetype_isolate(struct page *page)
5004{ 5013{
5005 struct zone *zone; 5014 struct zone *zone;
5006 unsigned long flags; 5015 struct page *curr_page;
5016 unsigned long flags, pfn, iter;
5017 unsigned long immobile = 0;
5018 struct memory_isolate_notify arg;
5019 int notifier_ret;
5007 int ret = -EBUSY; 5020 int ret = -EBUSY;
5008 int zone_idx; 5021 int zone_idx;
5009 5022
5010 zone = page_zone(page); 5023 zone = page_zone(page);
5011 zone_idx = zone_idx(zone); 5024 zone_idx = zone_idx(zone);
5025
5012 spin_lock_irqsave(&zone->lock, flags); 5026 spin_lock_irqsave(&zone->lock, flags);
5027 if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
5028 zone_idx == ZONE_MOVABLE) {
5029 ret = 0;
5030 goto out;
5031 }
5032
5033 pfn = page_to_pfn(page);
5034 arg.start_pfn = pfn;
5035 arg.nr_pages = pageblock_nr_pages;
5036 arg.pages_found = 0;
5037
5013 /* 5038 /*
5014 * In future, more migrate types will be able to be isolation target. 5039 * It may be possible to isolate a pageblock even if the
5040 * migratetype is not MIGRATE_MOVABLE. The memory isolation
5041 * notifier chain is used by balloon drivers to return the
5042 * number of pages in a range that are held by the balloon
5043 * driver to shrink memory. If all the pages are accounted for
5044 * by balloons, are free, or on the LRU, isolation can continue.
5045 * Later, for example, when memory hotplug notifier runs, these
5046 * pages reported as "can be isolated" should be isolated(freed)
5047 * by the balloon driver through the memory notifier chain.
5015 */ 5048 */
5016 if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && 5049 notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
5017 zone_idx != ZONE_MOVABLE) 5050 notifier_ret = notifier_to_errno(notifier_ret);
5051 if (notifier_ret || !arg.pages_found)
5018 goto out; 5052 goto out;
5019 set_pageblock_migratetype(page, MIGRATE_ISOLATE); 5053
5020 move_freepages_block(zone, page, MIGRATE_ISOLATE); 5054 for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
5021 ret = 0; 5055 if (!pfn_valid_within(pfn))
5056 continue;
5057
5058 curr_page = pfn_to_page(iter);
5059 if (!page_count(curr_page) || PageLRU(curr_page))
5060 continue;
5061
5062 immobile++;
5063 }
5064
5065 if (arg.pages_found == immobile)
5066 ret = 0;
5067
5022out: 5068out:
5069 if (!ret) {
5070 set_pageblock_migratetype(page, MIGRATE_ISOLATE);
5071 move_freepages_block(zone, page, MIGRATE_ISOLATE);
5072 }
5073
5023 spin_unlock_irqrestore(&zone->lock, flags); 5074 spin_unlock_irqrestore(&zone->lock, flags);
5024 if (!ret) 5075 if (!ret)
5025 drain_all_pages(); 5076 drain_all_pages();
@@ -5086,3 +5137,24 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
5086 spin_unlock_irqrestore(&zone->lock, flags); 5137 spin_unlock_irqrestore(&zone->lock, flags);
5087} 5138}
5088#endif 5139#endif
5140
5141#ifdef CONFIG_MEMORY_FAILURE
5142bool is_free_buddy_page(struct page *page)
5143{
5144 struct zone *zone = page_zone(page);
5145 unsigned long pfn = page_to_pfn(page);
5146 unsigned long flags;
5147 int order;
5148
5149 spin_lock_irqsave(&zone->lock, flags);
5150 for (order = 0; order < MAX_ORDER; order++) {
5151 struct page *page_head = page - (pfn & ((1 << order) - 1));
5152
5153 if (PageBuddy(page_head) && page_order(page_head) >= order)
5154 break;
5155 }
5156 spin_unlock_irqrestore(&zone->lock, flags);
5157
5158 return order < MAX_ORDER;
5159}
5160#endif