diff options
author | Tejun Heo <tj@kernel.org> | 2010-01-04 19:17:33 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2010-01-04 19:17:33 -0500 |
commit | 32032df6c2f6c9c6b2ada2ce42322231824f70c2 (patch) | |
tree | b1ce838a37044bb38dfc128e2116ca35630e629a /mm/page_alloc.c | |
parent | 22b737f4c75197372d64afc6ed1bccd58c00e549 (diff) | |
parent | c5974b835a909ff15c3b7e6cf6789b5eb919f419 (diff) |
Merge branch 'master' into percpu
Conflicts:
arch/powerpc/platforms/pseries/hvCall.S
include/linux/percpu.h
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 126 |
1 files changed, 99 insertions, 27 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bf720550b44d..4e9f5cc5fb59 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <linux/page_cgroup.h> | 48 | #include <linux/page_cgroup.h> |
49 | #include <linux/debugobjects.h> | 49 | #include <linux/debugobjects.h> |
50 | #include <linux/kmemleak.h> | 50 | #include <linux/kmemleak.h> |
51 | #include <linux/memory.h> | ||
51 | #include <trace/events/kmem.h> | 52 | #include <trace/events/kmem.h> |
52 | 53 | ||
53 | #include <asm/tlbflush.h> | 54 | #include <asm/tlbflush.h> |
@@ -486,7 +487,6 @@ static inline void __free_one_page(struct page *page, | |||
486 | zone->free_area[order].nr_free++; | 487 | zone->free_area[order].nr_free++; |
487 | } | 488 | } |
488 | 489 | ||
489 | #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT | ||
490 | /* | 490 | /* |
491 | * free_page_mlock() -- clean up attempts to free and mlocked() page. | 491 | * free_page_mlock() -- clean up attempts to free and mlocked() page. |
492 | * Page should not be on lru, so no need to fix that up. | 492 | * Page should not be on lru, so no need to fix that up. |
@@ -497,9 +497,6 @@ static inline void free_page_mlock(struct page *page) | |||
497 | __dec_zone_page_state(page, NR_MLOCK); | 497 | __dec_zone_page_state(page, NR_MLOCK); |
498 | __count_vm_event(UNEVICTABLE_MLOCKFREED); | 498 | __count_vm_event(UNEVICTABLE_MLOCKFREED); |
499 | } | 499 | } |
500 | #else | ||
501 | static void free_page_mlock(struct page *page) { } | ||
502 | #endif | ||
503 | 500 | ||
504 | static inline int free_pages_check(struct page *page) | 501 | static inline int free_pages_check(struct page *page) |
505 | { | 502 | { |
@@ -1658,12 +1655,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
1658 | if (page) | 1655 | if (page) |
1659 | goto out; | 1656 | goto out; |
1660 | 1657 | ||
1661 | /* The OOM killer will not help higher order allocs */ | 1658 | if (!(gfp_mask & __GFP_NOFAIL)) { |
1662 | if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL)) | 1659 | /* The OOM killer will not help higher order allocs */ |
1663 | goto out; | 1660 | if (order > PAGE_ALLOC_COSTLY_ORDER) |
1664 | 1661 | goto out; | |
1662 | /* | ||
1663 | * GFP_THISNODE contains __GFP_NORETRY and we never hit this. | ||
1664 | * Sanity check for bare calls of __GFP_THISNODE, not real OOM. | ||
1665 | * The caller should handle page allocation failure by itself if | ||
1666 | * it specifies __GFP_THISNODE. | ||
1667 | * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER. | ||
1668 | */ | ||
1669 | if (gfp_mask & __GFP_THISNODE) | ||
1670 | goto out; | ||
1671 | } | ||
1665 | /* Exhausted what can be done so it's blamo time */ | 1672 | /* Exhausted what can be done so it's blamo time */ |
1666 | out_of_memory(zonelist, gfp_mask, order); | 1673 | out_of_memory(zonelist, gfp_mask, order, nodemask); |
1667 | 1674 | ||
1668 | out: | 1675 | out: |
1669 | clear_zonelist_oom(zonelist, gfp_mask); | 1676 | clear_zonelist_oom(zonelist, gfp_mask); |
@@ -1769,7 +1776,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask) | |||
1769 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 1776 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
1770 | */ | 1777 | */ |
1771 | alloc_flags &= ~ALLOC_CPUSET; | 1778 | alloc_flags &= ~ALLOC_CPUSET; |
1772 | } else if (unlikely(rt_task(p))) | 1779 | } else if (unlikely(rt_task(p)) && !in_interrupt()) |
1773 | alloc_flags |= ALLOC_HARDER; | 1780 | alloc_flags |= ALLOC_HARDER; |
1774 | 1781 | ||
1775 | if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { | 1782 | if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { |
@@ -1817,9 +1824,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
1817 | if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | 1824 | if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) |
1818 | goto nopage; | 1825 | goto nopage; |
1819 | 1826 | ||
1827 | restart: | ||
1820 | wake_all_kswapd(order, zonelist, high_zoneidx); | 1828 | wake_all_kswapd(order, zonelist, high_zoneidx); |
1821 | 1829 | ||
1822 | restart: | ||
1823 | /* | 1830 | /* |
1824 | * OK, we're below the kswapd watermark and have kicked background | 1831 | * OK, we're below the kswapd watermark and have kicked background |
1825 | * reclaim. Now things get more complex, so set up alloc_flags according | 1832 | * reclaim. Now things get more complex, so set up alloc_flags according |
@@ -2183,7 +2190,7 @@ void show_free_areas(void) | |||
2183 | printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" | 2190 | printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" |
2184 | " active_file:%lu inactive_file:%lu isolated_file:%lu\n" | 2191 | " active_file:%lu inactive_file:%lu isolated_file:%lu\n" |
2185 | " unevictable:%lu" | 2192 | " unevictable:%lu" |
2186 | " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" | 2193 | " dirty:%lu writeback:%lu unstable:%lu\n" |
2187 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" | 2194 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" |
2188 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", | 2195 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", |
2189 | global_page_state(NR_ACTIVE_ANON), | 2196 | global_page_state(NR_ACTIVE_ANON), |
@@ -2196,7 +2203,6 @@ void show_free_areas(void) | |||
2196 | global_page_state(NR_FILE_DIRTY), | 2203 | global_page_state(NR_FILE_DIRTY), |
2197 | global_page_state(NR_WRITEBACK), | 2204 | global_page_state(NR_WRITEBACK), |
2198 | global_page_state(NR_UNSTABLE_NFS), | 2205 | global_page_state(NR_UNSTABLE_NFS), |
2199 | nr_blockdev_pages(), | ||
2200 | global_page_state(NR_FREE_PAGES), | 2206 | global_page_state(NR_FREE_PAGES), |
2201 | global_page_state(NR_SLAB_RECLAIMABLE), | 2207 | global_page_state(NR_SLAB_RECLAIMABLE), |
2202 | global_page_state(NR_SLAB_UNRECLAIMABLE), | 2208 | global_page_state(NR_SLAB_UNRECLAIMABLE), |
@@ -2396,13 +2402,14 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
2396 | { | 2402 | { |
2397 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; | 2403 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; |
2398 | int ret; | 2404 | int ret; |
2405 | static DEFINE_MUTEX(zl_order_mutex); | ||
2399 | 2406 | ||
2407 | mutex_lock(&zl_order_mutex); | ||
2400 | if (write) | 2408 | if (write) |
2401 | strncpy(saved_string, (char*)table->data, | 2409 | strcpy(saved_string, (char*)table->data); |
2402 | NUMA_ZONELIST_ORDER_LEN); | ||
2403 | ret = proc_dostring(table, write, buffer, length, ppos); | 2410 | ret = proc_dostring(table, write, buffer, length, ppos); |
2404 | if (ret) | 2411 | if (ret) |
2405 | return ret; | 2412 | goto out; |
2406 | if (write) { | 2413 | if (write) { |
2407 | int oldval = user_zonelist_order; | 2414 | int oldval = user_zonelist_order; |
2408 | if (__parse_numa_zonelist_order((char*)table->data)) { | 2415 | if (__parse_numa_zonelist_order((char*)table->data)) { |
@@ -2415,7 +2422,9 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
2415 | } else if (oldval != user_zonelist_order) | 2422 | } else if (oldval != user_zonelist_order) |
2416 | build_all_zonelists(); | 2423 | build_all_zonelists(); |
2417 | } | 2424 | } |
2418 | return 0; | 2425 | out: |
2426 | mutex_unlock(&zl_order_mutex); | ||
2427 | return ret; | ||
2419 | } | 2428 | } |
2420 | 2429 | ||
2421 | 2430 | ||
@@ -3128,7 +3137,7 @@ static int __cpuinit process_zones(int cpu) | |||
3128 | 3137 | ||
3129 | if (percpu_pagelist_fraction) | 3138 | if (percpu_pagelist_fraction) |
3130 | setup_pagelist_highmark(zone_pcp(zone, cpu), | 3139 | setup_pagelist_highmark(zone_pcp(zone, cpu), |
3131 | (zone->present_pages / percpu_pagelist_fraction)); | 3140 | (zone->present_pages / percpu_pagelist_fraction)); |
3132 | } | 3141 | } |
3133 | 3142 | ||
3134 | return 0; | 3143 | return 0; |
@@ -3574,7 +3583,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, | |||
3574 | * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, | 3583 | * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, |
3575 | * then all holes in the requested range will be accounted for. | 3584 | * then all holes in the requested range will be accounted for. |
3576 | */ | 3585 | */ |
3577 | static unsigned long __meminit __absent_pages_in_range(int nid, | 3586 | unsigned long __meminit __absent_pages_in_range(int nid, |
3578 | unsigned long range_start_pfn, | 3587 | unsigned long range_start_pfn, |
3579 | unsigned long range_end_pfn) | 3588 | unsigned long range_end_pfn) |
3580 | { | 3589 | { |
@@ -4103,7 +4112,7 @@ static int __init cmp_node_active_region(const void *a, const void *b) | |||
4103 | } | 4112 | } |
4104 | 4113 | ||
4105 | /* sort the node_map by start_pfn */ | 4114 | /* sort the node_map by start_pfn */ |
4106 | static void __init sort_node_map(void) | 4115 | void __init sort_node_map(void) |
4107 | { | 4116 | { |
4108 | sort(early_node_map, (size_t)nr_nodemap_entries, | 4117 | sort(early_node_map, (size_t)nr_nodemap_entries, |
4109 | sizeof(struct node_active_region), | 4118 | sizeof(struct node_active_region), |
@@ -5003,23 +5012,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, | |||
5003 | int set_migratetype_isolate(struct page *page) | 5012 | int set_migratetype_isolate(struct page *page) |
5004 | { | 5013 | { |
5005 | struct zone *zone; | 5014 | struct zone *zone; |
5006 | unsigned long flags; | 5015 | struct page *curr_page; |
5016 | unsigned long flags, pfn, iter; | ||
5017 | unsigned long immobile = 0; | ||
5018 | struct memory_isolate_notify arg; | ||
5019 | int notifier_ret; | ||
5007 | int ret = -EBUSY; | 5020 | int ret = -EBUSY; |
5008 | int zone_idx; | 5021 | int zone_idx; |
5009 | 5022 | ||
5010 | zone = page_zone(page); | 5023 | zone = page_zone(page); |
5011 | zone_idx = zone_idx(zone); | 5024 | zone_idx = zone_idx(zone); |
5025 | |||
5012 | spin_lock_irqsave(&zone->lock, flags); | 5026 | spin_lock_irqsave(&zone->lock, flags); |
5027 | if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE || | ||
5028 | zone_idx == ZONE_MOVABLE) { | ||
5029 | ret = 0; | ||
5030 | goto out; | ||
5031 | } | ||
5032 | |||
5033 | pfn = page_to_pfn(page); | ||
5034 | arg.start_pfn = pfn; | ||
5035 | arg.nr_pages = pageblock_nr_pages; | ||
5036 | arg.pages_found = 0; | ||
5037 | |||
5013 | /* | 5038 | /* |
5014 | * In future, more migrate types will be able to be isolation target. | 5039 | * It may be possible to isolate a pageblock even if the |
5040 | * migratetype is not MIGRATE_MOVABLE. The memory isolation | ||
5041 | * notifier chain is used by balloon drivers to return the | ||
5042 | * number of pages in a range that are held by the balloon | ||
5043 | * driver to shrink memory. If all the pages are accounted for | ||
5044 | * by balloons, are free, or on the LRU, isolation can continue. | ||
5045 | * Later, for example, when memory hotplug notifier runs, these | ||
5046 | * pages reported as "can be isolated" should be isolated(freed) | ||
5047 | * by the balloon driver through the memory notifier chain. | ||
5015 | */ | 5048 | */ |
5016 | if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && | 5049 | notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); |
5017 | zone_idx != ZONE_MOVABLE) | 5050 | notifier_ret = notifier_to_errno(notifier_ret); |
5051 | if (notifier_ret || !arg.pages_found) | ||
5018 | goto out; | 5052 | goto out; |
5019 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | 5053 | |
5020 | move_freepages_block(zone, page, MIGRATE_ISOLATE); | 5054 | for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) { |
5021 | ret = 0; | 5055 | if (!pfn_valid_within(pfn)) |
5056 | continue; | ||
5057 | |||
5058 | curr_page = pfn_to_page(iter); | ||
5059 | if (!page_count(curr_page) || PageLRU(curr_page)) | ||
5060 | continue; | ||
5061 | |||
5062 | immobile++; | ||
5063 | } | ||
5064 | |||
5065 | if (arg.pages_found == immobile) | ||
5066 | ret = 0; | ||
5067 | |||
5022 | out: | 5068 | out: |
5069 | if (!ret) { | ||
5070 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | ||
5071 | move_freepages_block(zone, page, MIGRATE_ISOLATE); | ||
5072 | } | ||
5073 | |||
5023 | spin_unlock_irqrestore(&zone->lock, flags); | 5074 | spin_unlock_irqrestore(&zone->lock, flags); |
5024 | if (!ret) | 5075 | if (!ret) |
5025 | drain_all_pages(); | 5076 | drain_all_pages(); |
@@ -5086,3 +5137,24 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) | |||
5086 | spin_unlock_irqrestore(&zone->lock, flags); | 5137 | spin_unlock_irqrestore(&zone->lock, flags); |
5087 | } | 5138 | } |
5088 | #endif | 5139 | #endif |
5140 | |||
5141 | #ifdef CONFIG_MEMORY_FAILURE | ||
5142 | bool is_free_buddy_page(struct page *page) | ||
5143 | { | ||
5144 | struct zone *zone = page_zone(page); | ||
5145 | unsigned long pfn = page_to_pfn(page); | ||
5146 | unsigned long flags; | ||
5147 | int order; | ||
5148 | |||
5149 | spin_lock_irqsave(&zone->lock, flags); | ||
5150 | for (order = 0; order < MAX_ORDER; order++) { | ||
5151 | struct page *page_head = page - (pfn & ((1 << order) - 1)); | ||
5152 | |||
5153 | if (PageBuddy(page_head) && page_order(page_head) >= order) | ||
5154 | break; | ||
5155 | } | ||
5156 | spin_unlock_irqrestore(&zone->lock, flags); | ||
5157 | |||
5158 | return order < MAX_ORDER; | ||
5159 | } | ||
5160 | #endif | ||