diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 123 |
1 files changed, 98 insertions, 25 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2bc2ac63f41e..d2a8889b4c58 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -48,6 +48,7 @@ | |||
| 48 | #include <linux/page_cgroup.h> | 48 | #include <linux/page_cgroup.h> |
| 49 | #include <linux/debugobjects.h> | 49 | #include <linux/debugobjects.h> |
| 50 | #include <linux/kmemleak.h> | 50 | #include <linux/kmemleak.h> |
| 51 | #include <linux/memory.h> | ||
| 51 | #include <trace/events/kmem.h> | 52 | #include <trace/events/kmem.h> |
| 52 | 53 | ||
| 53 | #include <asm/tlbflush.h> | 54 | #include <asm/tlbflush.h> |
| @@ -486,7 +487,6 @@ static inline void __free_one_page(struct page *page, | |||
| 486 | zone->free_area[order].nr_free++; | 487 | zone->free_area[order].nr_free++; |
| 487 | } | 488 | } |
| 488 | 489 | ||
| 489 | #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT | ||
| 490 | /* | 490 | /* |
| 491 | * free_page_mlock() -- clean up attempts to free and mlocked() page. | 491 | * free_page_mlock() -- clean up attempts to free and mlocked() page. |
| 492 | * Page should not be on lru, so no need to fix that up. | 492 | * Page should not be on lru, so no need to fix that up. |
| @@ -497,9 +497,6 @@ static inline void free_page_mlock(struct page *page) | |||
| 497 | __dec_zone_page_state(page, NR_MLOCK); | 497 | __dec_zone_page_state(page, NR_MLOCK); |
| 498 | __count_vm_event(UNEVICTABLE_MLOCKFREED); | 498 | __count_vm_event(UNEVICTABLE_MLOCKFREED); |
| 499 | } | 499 | } |
| 500 | #else | ||
| 501 | static void free_page_mlock(struct page *page) { } | ||
| 502 | #endif | ||
| 503 | 500 | ||
| 504 | static inline int free_pages_check(struct page *page) | 501 | static inline int free_pages_check(struct page *page) |
| 505 | { | 502 | { |
| @@ -1225,10 +1222,10 @@ again: | |||
| 1225 | } | 1222 | } |
| 1226 | spin_lock_irqsave(&zone->lock, flags); | 1223 | spin_lock_irqsave(&zone->lock, flags); |
| 1227 | page = __rmqueue(zone, order, migratetype); | 1224 | page = __rmqueue(zone, order, migratetype); |
| 1228 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); | ||
| 1229 | spin_unlock(&zone->lock); | 1225 | spin_unlock(&zone->lock); |
| 1230 | if (!page) | 1226 | if (!page) |
| 1231 | goto failed; | 1227 | goto failed; |
| 1228 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); | ||
| 1232 | } | 1229 | } |
| 1233 | 1230 | ||
| 1234 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1231 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
| @@ -1658,12 +1655,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
| 1658 | if (page) | 1655 | if (page) |
| 1659 | goto out; | 1656 | goto out; |
| 1660 | 1657 | ||
| 1661 | /* The OOM killer will not help higher order allocs */ | 1658 | if (!(gfp_mask & __GFP_NOFAIL)) { |
| 1662 | if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL)) | 1659 | /* The OOM killer will not help higher order allocs */ |
| 1663 | goto out; | 1660 | if (order > PAGE_ALLOC_COSTLY_ORDER) |
| 1664 | 1661 | goto out; | |
| 1662 | /* | ||
| 1663 | * GFP_THISNODE contains __GFP_NORETRY and we never hit this. | ||
| 1664 | * Sanity check for bare calls of __GFP_THISNODE, not real OOM. | ||
| 1665 | * The caller should handle page allocation failure by itself if | ||
| 1666 | * it specifies __GFP_THISNODE. | ||
| 1667 | * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER. | ||
| 1668 | */ | ||
| 1669 | if (gfp_mask & __GFP_THISNODE) | ||
| 1670 | goto out; | ||
| 1671 | } | ||
| 1665 | /* Exhausted what can be done so it's blamo time */ | 1672 | /* Exhausted what can be done so it's blamo time */ |
| 1666 | out_of_memory(zonelist, gfp_mask, order); | 1673 | out_of_memory(zonelist, gfp_mask, order, nodemask); |
| 1667 | 1674 | ||
| 1668 | out: | 1675 | out: |
| 1669 | clear_zonelist_oom(zonelist, gfp_mask); | 1676 | clear_zonelist_oom(zonelist, gfp_mask); |
| @@ -2395,13 +2402,14 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
| 2395 | { | 2402 | { |
| 2396 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; | 2403 | char saved_string[NUMA_ZONELIST_ORDER_LEN]; |
| 2397 | int ret; | 2404 | int ret; |
| 2405 | static DEFINE_MUTEX(zl_order_mutex); | ||
| 2398 | 2406 | ||
| 2407 | mutex_lock(&zl_order_mutex); | ||
| 2399 | if (write) | 2408 | if (write) |
| 2400 | strncpy(saved_string, (char*)table->data, | 2409 | strcpy(saved_string, (char*)table->data); |
| 2401 | NUMA_ZONELIST_ORDER_LEN); | ||
| 2402 | ret = proc_dostring(table, write, buffer, length, ppos); | 2410 | ret = proc_dostring(table, write, buffer, length, ppos); |
| 2403 | if (ret) | 2411 | if (ret) |
| 2404 | return ret; | 2412 | goto out; |
| 2405 | if (write) { | 2413 | if (write) { |
| 2406 | int oldval = user_zonelist_order; | 2414 | int oldval = user_zonelist_order; |
| 2407 | if (__parse_numa_zonelist_order((char*)table->data)) { | 2415 | if (__parse_numa_zonelist_order((char*)table->data)) { |
| @@ -2414,7 +2422,9 @@ int numa_zonelist_order_handler(ctl_table *table, int write, | |||
| 2414 | } else if (oldval != user_zonelist_order) | 2422 | } else if (oldval != user_zonelist_order) |
| 2415 | build_all_zonelists(); | 2423 | build_all_zonelists(); |
| 2416 | } | 2424 | } |
| 2417 | return 0; | 2425 | out: |
| 2426 | mutex_unlock(&zl_order_mutex); | ||
| 2427 | return ret; | ||
| 2418 | } | 2428 | } |
| 2419 | 2429 | ||
| 2420 | 2430 | ||
| @@ -3127,7 +3137,7 @@ static int __cpuinit process_zones(int cpu) | |||
| 3127 | 3137 | ||
| 3128 | if (percpu_pagelist_fraction) | 3138 | if (percpu_pagelist_fraction) |
| 3129 | setup_pagelist_highmark(zone_pcp(zone, cpu), | 3139 | setup_pagelist_highmark(zone_pcp(zone, cpu), |
| 3130 | (zone->present_pages / percpu_pagelist_fraction)); | 3140 | (zone->present_pages / percpu_pagelist_fraction)); |
| 3131 | } | 3141 | } |
| 3132 | 3142 | ||
| 3133 | return 0; | 3143 | return 0; |
| @@ -3573,7 +3583,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, | |||
| 3573 | * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, | 3583 | * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, |
| 3574 | * then all holes in the requested range will be accounted for. | 3584 | * then all holes in the requested range will be accounted for. |
| 3575 | */ | 3585 | */ |
| 3576 | static unsigned long __meminit __absent_pages_in_range(int nid, | 3586 | unsigned long __meminit __absent_pages_in_range(int nid, |
| 3577 | unsigned long range_start_pfn, | 3587 | unsigned long range_start_pfn, |
| 3578 | unsigned long range_end_pfn) | 3588 | unsigned long range_end_pfn) |
| 3579 | { | 3589 | { |
| @@ -3988,7 +3998,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, | |||
| 3988 | } | 3998 | } |
| 3989 | 3999 | ||
| 3990 | /* Merge backward if suitable */ | 4000 | /* Merge backward if suitable */ |
| 3991 | if (start_pfn < early_node_map[i].end_pfn && | 4001 | if (start_pfn < early_node_map[i].start_pfn && |
| 3992 | end_pfn >= early_node_map[i].start_pfn) { | 4002 | end_pfn >= early_node_map[i].start_pfn) { |
| 3993 | early_node_map[i].start_pfn = start_pfn; | 4003 | early_node_map[i].start_pfn = start_pfn; |
| 3994 | return; | 4004 | return; |
| @@ -4102,7 +4112,7 @@ static int __init cmp_node_active_region(const void *a, const void *b) | |||
| 4102 | } | 4112 | } |
| 4103 | 4113 | ||
| 4104 | /* sort the node_map by start_pfn */ | 4114 | /* sort the node_map by start_pfn */ |
| 4105 | static void __init sort_node_map(void) | 4115 | void __init sort_node_map(void) |
| 4106 | { | 4116 | { |
| 4107 | sort(early_node_map, (size_t)nr_nodemap_entries, | 4117 | sort(early_node_map, (size_t)nr_nodemap_entries, |
| 4108 | sizeof(struct node_active_region), | 4118 | sizeof(struct node_active_region), |
| @@ -5002,23 +5012,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, | |||
| 5002 | int set_migratetype_isolate(struct page *page) | 5012 | int set_migratetype_isolate(struct page *page) |
| 5003 | { | 5013 | { |
| 5004 | struct zone *zone; | 5014 | struct zone *zone; |
| 5005 | unsigned long flags; | 5015 | struct page *curr_page; |
| 5016 | unsigned long flags, pfn, iter; | ||
| 5017 | unsigned long immobile = 0; | ||
| 5018 | struct memory_isolate_notify arg; | ||
| 5019 | int notifier_ret; | ||
| 5006 | int ret = -EBUSY; | 5020 | int ret = -EBUSY; |
| 5007 | int zone_idx; | 5021 | int zone_idx; |
| 5008 | 5022 | ||
| 5009 | zone = page_zone(page); | 5023 | zone = page_zone(page); |
| 5010 | zone_idx = zone_idx(zone); | 5024 | zone_idx = zone_idx(zone); |
| 5025 | |||
| 5011 | spin_lock_irqsave(&zone->lock, flags); | 5026 | spin_lock_irqsave(&zone->lock, flags); |
| 5027 | if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE || | ||
| 5028 | zone_idx == ZONE_MOVABLE) { | ||
| 5029 | ret = 0; | ||
| 5030 | goto out; | ||
| 5031 | } | ||
| 5032 | |||
| 5033 | pfn = page_to_pfn(page); | ||
| 5034 | arg.start_pfn = pfn; | ||
| 5035 | arg.nr_pages = pageblock_nr_pages; | ||
| 5036 | arg.pages_found = 0; | ||
| 5037 | |||
| 5012 | /* | 5038 | /* |
| 5013 | * In future, more migrate types will be able to be isolation target. | 5039 | * It may be possible to isolate a pageblock even if the |
| 5040 | * migratetype is not MIGRATE_MOVABLE. The memory isolation | ||
| 5041 | * notifier chain is used by balloon drivers to return the | ||
| 5042 | * number of pages in a range that are held by the balloon | ||
| 5043 | * driver to shrink memory. If all the pages are accounted for | ||
| 5044 | * by balloons, are free, or on the LRU, isolation can continue. | ||
| 5045 | * Later, for example, when memory hotplug notifier runs, these | ||
| 5046 | * pages reported as "can be isolated" should be isolated(freed) | ||
| 5047 | * by the balloon driver through the memory notifier chain. | ||
| 5014 | */ | 5048 | */ |
| 5015 | if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE && | 5049 | notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); |
| 5016 | zone_idx != ZONE_MOVABLE) | 5050 | notifier_ret = notifier_to_errno(notifier_ret); |
| 5051 | if (notifier_ret || !arg.pages_found) | ||
| 5017 | goto out; | 5052 | goto out; |
| 5018 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | 5053 | |
| 5019 | move_freepages_block(zone, page, MIGRATE_ISOLATE); | 5054 | for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) { |
| 5020 | ret = 0; | 5055 | if (!pfn_valid_within(pfn)) |
| 5056 | continue; | ||
| 5057 | |||
| 5058 | curr_page = pfn_to_page(iter); | ||
| 5059 | if (!page_count(curr_page) || PageLRU(curr_page)) | ||
| 5060 | continue; | ||
| 5061 | |||
| 5062 | immobile++; | ||
| 5063 | } | ||
| 5064 | |||
| 5065 | if (arg.pages_found == immobile) | ||
| 5066 | ret = 0; | ||
| 5067 | |||
| 5021 | out: | 5068 | out: |
| 5069 | if (!ret) { | ||
| 5070 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); | ||
| 5071 | move_freepages_block(zone, page, MIGRATE_ISOLATE); | ||
| 5072 | } | ||
| 5073 | |||
| 5022 | spin_unlock_irqrestore(&zone->lock, flags); | 5074 | spin_unlock_irqrestore(&zone->lock, flags); |
| 5023 | if (!ret) | 5075 | if (!ret) |
| 5024 | drain_all_pages(); | 5076 | drain_all_pages(); |
| @@ -5085,3 +5137,24 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) | |||
| 5085 | spin_unlock_irqrestore(&zone->lock, flags); | 5137 | spin_unlock_irqrestore(&zone->lock, flags); |
| 5086 | } | 5138 | } |
| 5087 | #endif | 5139 | #endif |
| 5140 | |||
| 5141 | #ifdef CONFIG_MEMORY_FAILURE | ||
| 5142 | bool is_free_buddy_page(struct page *page) | ||
| 5143 | { | ||
| 5144 | struct zone *zone = page_zone(page); | ||
| 5145 | unsigned long pfn = page_to_pfn(page); | ||
| 5146 | unsigned long flags; | ||
| 5147 | int order; | ||
| 5148 | |||
| 5149 | spin_lock_irqsave(&zone->lock, flags); | ||
| 5150 | for (order = 0; order < MAX_ORDER; order++) { | ||
| 5151 | struct page *page_head = page - (pfn & ((1 << order) - 1)); | ||
| 5152 | |||
| 5153 | if (PageBuddy(page_head) && page_order(page_head) >= order) | ||
| 5154 | break; | ||
| 5155 | } | ||
| 5156 | spin_unlock_irqrestore(&zone->lock, flags); | ||
| 5157 | |||
| 5158 | return order < MAX_ORDER; | ||
| 5159 | } | ||
| 5160 | #endif | ||
