diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 667 |
1 files changed, 408 insertions, 259 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f3e0c69a97b7..eaa64d2ffdc5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -55,12 +55,13 @@ | |||
55 | #include <linux/kmemleak.h> | 55 | #include <linux/kmemleak.h> |
56 | #include <linux/compaction.h> | 56 | #include <linux/compaction.h> |
57 | #include <trace/events/kmem.h> | 57 | #include <trace/events/kmem.h> |
58 | #include <trace/events/oom.h> | ||
58 | #include <linux/prefetch.h> | 59 | #include <linux/prefetch.h> |
59 | #include <linux/mm_inline.h> | 60 | #include <linux/mm_inline.h> |
60 | #include <linux/migrate.h> | 61 | #include <linux/migrate.h> |
61 | #include <linux/page_ext.h> | ||
62 | #include <linux/hugetlb.h> | 62 | #include <linux/hugetlb.h> |
63 | #include <linux/sched/rt.h> | 63 | #include <linux/sched/rt.h> |
64 | #include <linux/sched/mm.h> | ||
64 | #include <linux/page_owner.h> | 65 | #include <linux/page_owner.h> |
65 | #include <linux/kthread.h> | 66 | #include <linux/kthread.h> |
66 | #include <linux/memcontrol.h> | 67 | #include <linux/memcontrol.h> |
@@ -91,6 +92,10 @@ EXPORT_PER_CPU_SYMBOL(_numa_mem_); | |||
91 | int _node_numa_mem_[MAX_NUMNODES]; | 92 | int _node_numa_mem_[MAX_NUMNODES]; |
92 | #endif | 93 | #endif |
93 | 94 | ||
95 | /* work_structs for global per-cpu drains */ | ||
96 | DEFINE_MUTEX(pcpu_drain_mutex); | ||
97 | DEFINE_PER_CPU(struct work_struct, pcpu_drain); | ||
98 | |||
94 | #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY | 99 | #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY |
95 | volatile unsigned long latent_entropy __latent_entropy; | 100 | volatile unsigned long latent_entropy __latent_entropy; |
96 | EXPORT_SYMBOL(latent_entropy); | 101 | EXPORT_SYMBOL(latent_entropy); |
@@ -714,7 +719,7 @@ static inline void rmv_page_order(struct page *page) | |||
714 | /* | 719 | /* |
715 | * This function checks whether a page is free && is the buddy | 720 | * This function checks whether a page is free && is the buddy |
716 | * we can do coalesce a page and its buddy if | 721 | * we can do coalesce a page and its buddy if |
717 | * (a) the buddy is not in a hole && | 722 | * (a) the buddy is not in a hole (check before calling!) && |
718 | * (b) the buddy is in the buddy system && | 723 | * (b) the buddy is in the buddy system && |
719 | * (c) a page and its buddy have the same order && | 724 | * (c) a page and its buddy have the same order && |
720 | * (d) a page and its buddy are in the same zone. | 725 | * (d) a page and its buddy are in the same zone. |
@@ -729,9 +734,6 @@ static inline void rmv_page_order(struct page *page) | |||
729 | static inline int page_is_buddy(struct page *page, struct page *buddy, | 734 | static inline int page_is_buddy(struct page *page, struct page *buddy, |
730 | unsigned int order) | 735 | unsigned int order) |
731 | { | 736 | { |
732 | if (!pfn_valid_within(page_to_pfn(buddy))) | ||
733 | return 0; | ||
734 | |||
735 | if (page_is_guard(buddy) && page_order(buddy) == order) { | 737 | if (page_is_guard(buddy) && page_order(buddy) == order) { |
736 | if (page_zone_id(page) != page_zone_id(buddy)) | 738 | if (page_zone_id(page) != page_zone_id(buddy)) |
737 | return 0; | 739 | return 0; |
@@ -787,9 +789,8 @@ static inline void __free_one_page(struct page *page, | |||
787 | struct zone *zone, unsigned int order, | 789 | struct zone *zone, unsigned int order, |
788 | int migratetype) | 790 | int migratetype) |
789 | { | 791 | { |
790 | unsigned long page_idx; | 792 | unsigned long combined_pfn; |
791 | unsigned long combined_idx; | 793 | unsigned long uninitialized_var(buddy_pfn); |
792 | unsigned long uninitialized_var(buddy_idx); | ||
793 | struct page *buddy; | 794 | struct page *buddy; |
794 | unsigned int max_order; | 795 | unsigned int max_order; |
795 | 796 | ||
@@ -802,15 +803,16 @@ static inline void __free_one_page(struct page *page, | |||
802 | if (likely(!is_migrate_isolate(migratetype))) | 803 | if (likely(!is_migrate_isolate(migratetype))) |
803 | __mod_zone_freepage_state(zone, 1 << order, migratetype); | 804 | __mod_zone_freepage_state(zone, 1 << order, migratetype); |
804 | 805 | ||
805 | page_idx = pfn & ((1 << MAX_ORDER) - 1); | 806 | VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page); |
806 | |||
807 | VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); | ||
808 | VM_BUG_ON_PAGE(bad_range(zone, page), page); | 807 | VM_BUG_ON_PAGE(bad_range(zone, page), page); |
809 | 808 | ||
810 | continue_merging: | 809 | continue_merging: |
811 | while (order < max_order - 1) { | 810 | while (order < max_order - 1) { |
812 | buddy_idx = __find_buddy_index(page_idx, order); | 811 | buddy_pfn = __find_buddy_pfn(pfn, order); |
813 | buddy = page + (buddy_idx - page_idx); | 812 | buddy = page + (buddy_pfn - pfn); |
813 | |||
814 | if (!pfn_valid_within(buddy_pfn)) | ||
815 | goto done_merging; | ||
814 | if (!page_is_buddy(page, buddy, order)) | 816 | if (!page_is_buddy(page, buddy, order)) |
815 | goto done_merging; | 817 | goto done_merging; |
816 | /* | 818 | /* |
@@ -824,9 +826,9 @@ continue_merging: | |||
824 | zone->free_area[order].nr_free--; | 826 | zone->free_area[order].nr_free--; |
825 | rmv_page_order(buddy); | 827 | rmv_page_order(buddy); |
826 | } | 828 | } |
827 | combined_idx = buddy_idx & page_idx; | 829 | combined_pfn = buddy_pfn & pfn; |
828 | page = page + (combined_idx - page_idx); | 830 | page = page + (combined_pfn - pfn); |
829 | page_idx = combined_idx; | 831 | pfn = combined_pfn; |
830 | order++; | 832 | order++; |
831 | } | 833 | } |
832 | if (max_order < MAX_ORDER) { | 834 | if (max_order < MAX_ORDER) { |
@@ -841,8 +843,8 @@ continue_merging: | |||
841 | if (unlikely(has_isolate_pageblock(zone))) { | 843 | if (unlikely(has_isolate_pageblock(zone))) { |
842 | int buddy_mt; | 844 | int buddy_mt; |
843 | 845 | ||
844 | buddy_idx = __find_buddy_index(page_idx, order); | 846 | buddy_pfn = __find_buddy_pfn(pfn, order); |
845 | buddy = page + (buddy_idx - page_idx); | 847 | buddy = page + (buddy_pfn - pfn); |
846 | buddy_mt = get_pageblock_migratetype(buddy); | 848 | buddy_mt = get_pageblock_migratetype(buddy); |
847 | 849 | ||
848 | if (migratetype != buddy_mt | 850 | if (migratetype != buddy_mt |
@@ -865,12 +867,12 @@ done_merging: | |||
865 | * so it's less likely to be used soon and more likely to be merged | 867 | * so it's less likely to be used soon and more likely to be merged |
866 | * as a higher order page | 868 | * as a higher order page |
867 | */ | 869 | */ |
868 | if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) { | 870 | if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn)) { |
869 | struct page *higher_page, *higher_buddy; | 871 | struct page *higher_page, *higher_buddy; |
870 | combined_idx = buddy_idx & page_idx; | 872 | combined_pfn = buddy_pfn & pfn; |
871 | higher_page = page + (combined_idx - page_idx); | 873 | higher_page = page + (combined_pfn - pfn); |
872 | buddy_idx = __find_buddy_index(combined_idx, order + 1); | 874 | buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1); |
873 | higher_buddy = higher_page + (buddy_idx - combined_idx); | 875 | higher_buddy = higher_page + (buddy_pfn - combined_pfn); |
874 | if (page_is_buddy(higher_page, higher_buddy, order + 1)) { | 876 | if (page_is_buddy(higher_page, higher_buddy, order + 1)) { |
875 | list_add_tail(&page->lru, | 877 | list_add_tail(&page->lru, |
876 | &zone->free_area[order].free_list[migratetype]); | 878 | &zone->free_area[order].free_list[migratetype]); |
@@ -1087,10 +1089,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
1087 | { | 1089 | { |
1088 | int migratetype = 0; | 1090 | int migratetype = 0; |
1089 | int batch_free = 0; | 1091 | int batch_free = 0; |
1090 | unsigned long nr_scanned; | 1092 | unsigned long nr_scanned, flags; |
1091 | bool isolated_pageblocks; | 1093 | bool isolated_pageblocks; |
1092 | 1094 | ||
1093 | spin_lock(&zone->lock); | 1095 | spin_lock_irqsave(&zone->lock, flags); |
1094 | isolated_pageblocks = has_isolate_pageblock(zone); | 1096 | isolated_pageblocks = has_isolate_pageblock(zone); |
1095 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | 1097 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); |
1096 | if (nr_scanned) | 1098 | if (nr_scanned) |
@@ -1139,7 +1141,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
1139 | trace_mm_page_pcpu_drain(page, 0, mt); | 1141 | trace_mm_page_pcpu_drain(page, 0, mt); |
1140 | } while (--count && --batch_free && !list_empty(list)); | 1142 | } while (--count && --batch_free && !list_empty(list)); |
1141 | } | 1143 | } |
1142 | spin_unlock(&zone->lock); | 1144 | spin_unlock_irqrestore(&zone->lock, flags); |
1143 | } | 1145 | } |
1144 | 1146 | ||
1145 | static void free_one_page(struct zone *zone, | 1147 | static void free_one_page(struct zone *zone, |
@@ -1147,8 +1149,9 @@ static void free_one_page(struct zone *zone, | |||
1147 | unsigned int order, | 1149 | unsigned int order, |
1148 | int migratetype) | 1150 | int migratetype) |
1149 | { | 1151 | { |
1150 | unsigned long nr_scanned; | 1152 | unsigned long nr_scanned, flags; |
1151 | spin_lock(&zone->lock); | 1153 | spin_lock_irqsave(&zone->lock, flags); |
1154 | __count_vm_events(PGFREE, 1 << order); | ||
1152 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | 1155 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); |
1153 | if (nr_scanned) | 1156 | if (nr_scanned) |
1154 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | 1157 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); |
@@ -1158,7 +1161,7 @@ static void free_one_page(struct zone *zone, | |||
1158 | migratetype = get_pfnblock_migratetype(page, pfn); | 1161 | migratetype = get_pfnblock_migratetype(page, pfn); |
1159 | } | 1162 | } |
1160 | __free_one_page(page, pfn, zone, order, migratetype); | 1163 | __free_one_page(page, pfn, zone, order, migratetype); |
1161 | spin_unlock(&zone->lock); | 1164 | spin_unlock_irqrestore(&zone->lock, flags); |
1162 | } | 1165 | } |
1163 | 1166 | ||
1164 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | 1167 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, |
@@ -1236,7 +1239,6 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end) | |||
1236 | 1239 | ||
1237 | static void __free_pages_ok(struct page *page, unsigned int order) | 1240 | static void __free_pages_ok(struct page *page, unsigned int order) |
1238 | { | 1241 | { |
1239 | unsigned long flags; | ||
1240 | int migratetype; | 1242 | int migratetype; |
1241 | unsigned long pfn = page_to_pfn(page); | 1243 | unsigned long pfn = page_to_pfn(page); |
1242 | 1244 | ||
@@ -1244,10 +1246,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
1244 | return; | 1246 | return; |
1245 | 1247 | ||
1246 | migratetype = get_pfnblock_migratetype(page, pfn); | 1248 | migratetype = get_pfnblock_migratetype(page, pfn); |
1247 | local_irq_save(flags); | ||
1248 | __count_vm_events(PGFREE, 1 << order); | ||
1249 | free_one_page(page_zone(page), page, pfn, order, migratetype); | 1249 | free_one_page(page_zone(page), page, pfn, order, migratetype); |
1250 | local_irq_restore(flags); | ||
1251 | } | 1250 | } |
1252 | 1251 | ||
1253 | static void __init __free_pages_boot_core(struct page *page, unsigned int order) | 1252 | static void __init __free_pages_boot_core(struct page *page, unsigned int order) |
@@ -2219,8 +2218,9 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
2219 | int migratetype, bool cold) | 2218 | int migratetype, bool cold) |
2220 | { | 2219 | { |
2221 | int i, alloced = 0; | 2220 | int i, alloced = 0; |
2221 | unsigned long flags; | ||
2222 | 2222 | ||
2223 | spin_lock(&zone->lock); | 2223 | spin_lock_irqsave(&zone->lock, flags); |
2224 | for (i = 0; i < count; ++i) { | 2224 | for (i = 0; i < count; ++i) { |
2225 | struct page *page = __rmqueue(zone, order, migratetype); | 2225 | struct page *page = __rmqueue(zone, order, migratetype); |
2226 | if (unlikely(page == NULL)) | 2226 | if (unlikely(page == NULL)) |
@@ -2256,7 +2256,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
2256 | * pages added to the pcp list. | 2256 | * pages added to the pcp list. |
2257 | */ | 2257 | */ |
2258 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); | 2258 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); |
2259 | spin_unlock(&zone->lock); | 2259 | spin_unlock_irqrestore(&zone->lock, flags); |
2260 | return alloced; | 2260 | return alloced; |
2261 | } | 2261 | } |
2262 | 2262 | ||
@@ -2341,16 +2341,26 @@ void drain_local_pages(struct zone *zone) | |||
2341 | drain_pages(cpu); | 2341 | drain_pages(cpu); |
2342 | } | 2342 | } |
2343 | 2343 | ||
2344 | static void drain_local_pages_wq(struct work_struct *work) | ||
2345 | { | ||
2346 | /* | ||
2347 | * drain_all_pages doesn't use proper cpu hotplug protection so | ||
2348 | * we can race with cpu offline when the WQ can move this from | ||
2349 | * a cpu pinned worker to an unbound one. We can operate on a different | ||
2350 | * cpu which is allright but we also have to make sure to not move to | ||
2351 | * a different one. | ||
2352 | */ | ||
2353 | preempt_disable(); | ||
2354 | drain_local_pages(NULL); | ||
2355 | preempt_enable(); | ||
2356 | } | ||
2357 | |||
2344 | /* | 2358 | /* |
2345 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator. | 2359 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator. |
2346 | * | 2360 | * |
2347 | * When zone parameter is non-NULL, spill just the single zone's pages. | 2361 | * When zone parameter is non-NULL, spill just the single zone's pages. |
2348 | * | 2362 | * |
2349 | * Note that this code is protected against sending an IPI to an offline | 2363 | * Note that this can be extremely slow as the draining happens in a workqueue. |
2350 | * CPU but does not guarantee sending an IPI to newly hotplugged CPUs: | ||
2351 | * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but | ||
2352 | * nothing keeps CPUs from showing up after we populated the cpumask and | ||
2353 | * before the call to on_each_cpu_mask(). | ||
2354 | */ | 2364 | */ |
2355 | void drain_all_pages(struct zone *zone) | 2365 | void drain_all_pages(struct zone *zone) |
2356 | { | 2366 | { |
@@ -2362,6 +2372,21 @@ void drain_all_pages(struct zone *zone) | |||
2362 | */ | 2372 | */ |
2363 | static cpumask_t cpus_with_pcps; | 2373 | static cpumask_t cpus_with_pcps; |
2364 | 2374 | ||
2375 | /* Workqueues cannot recurse */ | ||
2376 | if (current->flags & PF_WQ_WORKER) | ||
2377 | return; | ||
2378 | |||
2379 | /* | ||
2380 | * Do not drain if one is already in progress unless it's specific to | ||
2381 | * a zone. Such callers are primarily CMA and memory hotplug and need | ||
2382 | * the drain to be complete when the call returns. | ||
2383 | */ | ||
2384 | if (unlikely(!mutex_trylock(&pcpu_drain_mutex))) { | ||
2385 | if (!zone) | ||
2386 | return; | ||
2387 | mutex_lock(&pcpu_drain_mutex); | ||
2388 | } | ||
2389 | |||
2365 | /* | 2390 | /* |
2366 | * We don't care about racing with CPU hotplug event | 2391 | * We don't care about racing with CPU hotplug event |
2367 | * as offline notification will cause the notified | 2392 | * as offline notification will cause the notified |
@@ -2392,8 +2417,16 @@ void drain_all_pages(struct zone *zone) | |||
2392 | else | 2417 | else |
2393 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | 2418 | cpumask_clear_cpu(cpu, &cpus_with_pcps); |
2394 | } | 2419 | } |
2395 | on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages, | 2420 | |
2396 | zone, 1); | 2421 | for_each_cpu(cpu, &cpus_with_pcps) { |
2422 | struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu); | ||
2423 | INIT_WORK(work, drain_local_pages_wq); | ||
2424 | schedule_work_on(cpu, work); | ||
2425 | } | ||
2426 | for_each_cpu(cpu, &cpus_with_pcps) | ||
2427 | flush_work(per_cpu_ptr(&pcpu_drain, cpu)); | ||
2428 | |||
2429 | mutex_unlock(&pcpu_drain_mutex); | ||
2397 | } | 2430 | } |
2398 | 2431 | ||
2399 | #ifdef CONFIG_HIBERNATION | 2432 | #ifdef CONFIG_HIBERNATION |
@@ -2444,17 +2477,20 @@ void free_hot_cold_page(struct page *page, bool cold) | |||
2444 | { | 2477 | { |
2445 | struct zone *zone = page_zone(page); | 2478 | struct zone *zone = page_zone(page); |
2446 | struct per_cpu_pages *pcp; | 2479 | struct per_cpu_pages *pcp; |
2447 | unsigned long flags; | ||
2448 | unsigned long pfn = page_to_pfn(page); | 2480 | unsigned long pfn = page_to_pfn(page); |
2449 | int migratetype; | 2481 | int migratetype; |
2450 | 2482 | ||
2483 | if (in_interrupt()) { | ||
2484 | __free_pages_ok(page, 0); | ||
2485 | return; | ||
2486 | } | ||
2487 | |||
2451 | if (!free_pcp_prepare(page)) | 2488 | if (!free_pcp_prepare(page)) |
2452 | return; | 2489 | return; |
2453 | 2490 | ||
2454 | migratetype = get_pfnblock_migratetype(page, pfn); | 2491 | migratetype = get_pfnblock_migratetype(page, pfn); |
2455 | set_pcppage_migratetype(page, migratetype); | 2492 | set_pcppage_migratetype(page, migratetype); |
2456 | local_irq_save(flags); | 2493 | preempt_disable(); |
2457 | __count_vm_event(PGFREE); | ||
2458 | 2494 | ||
2459 | /* | 2495 | /* |
2460 | * We only track unmovable, reclaimable and movable on pcp lists. | 2496 | * We only track unmovable, reclaimable and movable on pcp lists. |
@@ -2471,6 +2507,7 @@ void free_hot_cold_page(struct page *page, bool cold) | |||
2471 | migratetype = MIGRATE_MOVABLE; | 2507 | migratetype = MIGRATE_MOVABLE; |
2472 | } | 2508 | } |
2473 | 2509 | ||
2510 | __count_vm_event(PGFREE); | ||
2474 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | 2511 | pcp = &this_cpu_ptr(zone->pageset)->pcp; |
2475 | if (!cold) | 2512 | if (!cold) |
2476 | list_add(&page->lru, &pcp->lists[migratetype]); | 2513 | list_add(&page->lru, &pcp->lists[migratetype]); |
@@ -2484,7 +2521,7 @@ void free_hot_cold_page(struct page *page, bool cold) | |||
2484 | } | 2521 | } |
2485 | 2522 | ||
2486 | out: | 2523 | out: |
2487 | local_irq_restore(flags); | 2524 | preempt_enable(); |
2488 | } | 2525 | } |
2489 | 2526 | ||
2490 | /* | 2527 | /* |
@@ -2602,74 +2639,105 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) | |||
2602 | #endif | 2639 | #endif |
2603 | } | 2640 | } |
2604 | 2641 | ||
2642 | /* Remove page from the per-cpu list, caller must protect the list */ | ||
2643 | static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | ||
2644 | bool cold, struct per_cpu_pages *pcp, | ||
2645 | struct list_head *list) | ||
2646 | { | ||
2647 | struct page *page; | ||
2648 | |||
2649 | VM_BUG_ON(in_interrupt()); | ||
2650 | |||
2651 | do { | ||
2652 | if (list_empty(list)) { | ||
2653 | pcp->count += rmqueue_bulk(zone, 0, | ||
2654 | pcp->batch, list, | ||
2655 | migratetype, cold); | ||
2656 | if (unlikely(list_empty(list))) | ||
2657 | return NULL; | ||
2658 | } | ||
2659 | |||
2660 | if (cold) | ||
2661 | page = list_last_entry(list, struct page, lru); | ||
2662 | else | ||
2663 | page = list_first_entry(list, struct page, lru); | ||
2664 | |||
2665 | list_del(&page->lru); | ||
2666 | pcp->count--; | ||
2667 | } while (check_new_pcp(page)); | ||
2668 | |||
2669 | return page; | ||
2670 | } | ||
2671 | |||
2672 | /* Lock and remove page from the per-cpu list */ | ||
2673 | static struct page *rmqueue_pcplist(struct zone *preferred_zone, | ||
2674 | struct zone *zone, unsigned int order, | ||
2675 | gfp_t gfp_flags, int migratetype) | ||
2676 | { | ||
2677 | struct per_cpu_pages *pcp; | ||
2678 | struct list_head *list; | ||
2679 | bool cold = ((gfp_flags & __GFP_COLD) != 0); | ||
2680 | struct page *page; | ||
2681 | |||
2682 | preempt_disable(); | ||
2683 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | ||
2684 | list = &pcp->lists[migratetype]; | ||
2685 | page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); | ||
2686 | if (page) { | ||
2687 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | ||
2688 | zone_statistics(preferred_zone, zone); | ||
2689 | } | ||
2690 | preempt_enable(); | ||
2691 | return page; | ||
2692 | } | ||
2693 | |||
2605 | /* | 2694 | /* |
2606 | * Allocate a page from the given zone. Use pcplists for order-0 allocations. | 2695 | * Allocate a page from the given zone. Use pcplists for order-0 allocations. |
2607 | */ | 2696 | */ |
2608 | static inline | 2697 | static inline |
2609 | struct page *buffered_rmqueue(struct zone *preferred_zone, | 2698 | struct page *rmqueue(struct zone *preferred_zone, |
2610 | struct zone *zone, unsigned int order, | 2699 | struct zone *zone, unsigned int order, |
2611 | gfp_t gfp_flags, unsigned int alloc_flags, | 2700 | gfp_t gfp_flags, unsigned int alloc_flags, |
2612 | int migratetype) | 2701 | int migratetype) |
2613 | { | 2702 | { |
2614 | unsigned long flags; | 2703 | unsigned long flags; |
2615 | struct page *page; | 2704 | struct page *page; |
2616 | bool cold = ((gfp_flags & __GFP_COLD) != 0); | ||
2617 | 2705 | ||
2618 | if (likely(order == 0)) { | 2706 | if (likely(order == 0) && !in_interrupt()) { |
2619 | struct per_cpu_pages *pcp; | 2707 | page = rmqueue_pcplist(preferred_zone, zone, order, |
2620 | struct list_head *list; | 2708 | gfp_flags, migratetype); |
2621 | 2709 | goto out; | |
2622 | local_irq_save(flags); | 2710 | } |
2623 | do { | ||
2624 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | ||
2625 | list = &pcp->lists[migratetype]; | ||
2626 | if (list_empty(list)) { | ||
2627 | pcp->count += rmqueue_bulk(zone, 0, | ||
2628 | pcp->batch, list, | ||
2629 | migratetype, cold); | ||
2630 | if (unlikely(list_empty(list))) | ||
2631 | goto failed; | ||
2632 | } | ||
2633 | |||
2634 | if (cold) | ||
2635 | page = list_last_entry(list, struct page, lru); | ||
2636 | else | ||
2637 | page = list_first_entry(list, struct page, lru); | ||
2638 | |||
2639 | list_del(&page->lru); | ||
2640 | pcp->count--; | ||
2641 | 2711 | ||
2642 | } while (check_new_pcp(page)); | 2712 | /* |
2643 | } else { | 2713 | * We most definitely don't want callers attempting to |
2644 | /* | 2714 | * allocate greater than order-1 page units with __GFP_NOFAIL. |
2645 | * We most definitely don't want callers attempting to | 2715 | */ |
2646 | * allocate greater than order-1 page units with __GFP_NOFAIL. | 2716 | WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); |
2647 | */ | 2717 | spin_lock_irqsave(&zone->lock, flags); |
2648 | WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); | ||
2649 | spin_lock_irqsave(&zone->lock, flags); | ||
2650 | 2718 | ||
2651 | do { | 2719 | do { |
2652 | page = NULL; | 2720 | page = NULL; |
2653 | if (alloc_flags & ALLOC_HARDER) { | 2721 | if (alloc_flags & ALLOC_HARDER) { |
2654 | page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); | 2722 | page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); |
2655 | if (page) | 2723 | if (page) |
2656 | trace_mm_page_alloc_zone_locked(page, order, migratetype); | 2724 | trace_mm_page_alloc_zone_locked(page, order, migratetype); |
2657 | } | 2725 | } |
2658 | if (!page) | ||
2659 | page = __rmqueue(zone, order, migratetype); | ||
2660 | } while (page && check_new_pages(page, order)); | ||
2661 | spin_unlock(&zone->lock); | ||
2662 | if (!page) | 2726 | if (!page) |
2663 | goto failed; | 2727 | page = __rmqueue(zone, order, migratetype); |
2664 | __mod_zone_freepage_state(zone, -(1 << order), | 2728 | } while (page && check_new_pages(page, order)); |
2665 | get_pcppage_migratetype(page)); | 2729 | spin_unlock(&zone->lock); |
2666 | } | 2730 | if (!page) |
2731 | goto failed; | ||
2732 | __mod_zone_freepage_state(zone, -(1 << order), | ||
2733 | get_pcppage_migratetype(page)); | ||
2667 | 2734 | ||
2668 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | 2735 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); |
2669 | zone_statistics(preferred_zone, zone); | 2736 | zone_statistics(preferred_zone, zone); |
2670 | local_irq_restore(flags); | 2737 | local_irq_restore(flags); |
2671 | 2738 | ||
2672 | VM_BUG_ON_PAGE(bad_range(zone, page), page); | 2739 | out: |
2740 | VM_BUG_ON_PAGE(page && bad_range(zone, page), page); | ||
2673 | return page; | 2741 | return page; |
2674 | 2742 | ||
2675 | failed: | 2743 | failed: |
@@ -2877,7 +2945,7 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order, | |||
2877 | #ifdef CONFIG_NUMA | 2945 | #ifdef CONFIG_NUMA |
2878 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) | 2946 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) |
2879 | { | 2947 | { |
2880 | return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) < | 2948 | return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <= |
2881 | RECLAIM_DISTANCE; | 2949 | RECLAIM_DISTANCE; |
2882 | } | 2950 | } |
2883 | #else /* CONFIG_NUMA */ | 2951 | #else /* CONFIG_NUMA */ |
@@ -2974,7 +3042,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, | |||
2974 | } | 3042 | } |
2975 | 3043 | ||
2976 | try_this_zone: | 3044 | try_this_zone: |
2977 | page = buffered_rmqueue(ac->preferred_zoneref->zone, zone, order, | 3045 | page = rmqueue(ac->preferred_zoneref->zone, zone, order, |
2978 | gfp_mask, alloc_flags, ac->migratetype); | 3046 | gfp_mask, alloc_flags, ac->migratetype); |
2979 | if (page) { | 3047 | if (page) { |
2980 | prep_new_page(page, order, gfp_mask, alloc_flags); | 3048 | prep_new_page(page, order, gfp_mask, alloc_flags); |
@@ -3007,18 +3075,12 @@ static inline bool should_suppress_show_mem(void) | |||
3007 | return ret; | 3075 | return ret; |
3008 | } | 3076 | } |
3009 | 3077 | ||
3010 | static DEFINE_RATELIMIT_STATE(nopage_rs, | 3078 | static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask) |
3011 | DEFAULT_RATELIMIT_INTERVAL, | ||
3012 | DEFAULT_RATELIMIT_BURST); | ||
3013 | |||
3014 | void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) | ||
3015 | { | 3079 | { |
3016 | unsigned int filter = SHOW_MEM_FILTER_NODES; | 3080 | unsigned int filter = SHOW_MEM_FILTER_NODES; |
3017 | struct va_format vaf; | 3081 | static DEFINE_RATELIMIT_STATE(show_mem_rs, HZ, 1); |
3018 | va_list args; | ||
3019 | 3082 | ||
3020 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || | 3083 | if (should_suppress_show_mem() || !__ratelimit(&show_mem_rs)) |
3021 | debug_guardpage_minorder() > 0) | ||
3022 | return; | 3084 | return; |
3023 | 3085 | ||
3024 | /* | 3086 | /* |
@@ -3033,6 +3095,20 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) | |||
3033 | if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM)) | 3095 | if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM)) |
3034 | filter &= ~SHOW_MEM_FILTER_NODES; | 3096 | filter &= ~SHOW_MEM_FILTER_NODES; |
3035 | 3097 | ||
3098 | show_mem(filter, nodemask); | ||
3099 | } | ||
3100 | |||
3101 | void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) | ||
3102 | { | ||
3103 | struct va_format vaf; | ||
3104 | va_list args; | ||
3105 | static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, | ||
3106 | DEFAULT_RATELIMIT_BURST); | ||
3107 | |||
3108 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || | ||
3109 | debug_guardpage_minorder() > 0) | ||
3110 | return; | ||
3111 | |||
3036 | pr_warn("%s: ", current->comm); | 3112 | pr_warn("%s: ", current->comm); |
3037 | 3113 | ||
3038 | va_start(args, fmt); | 3114 | va_start(args, fmt); |
@@ -3041,11 +3117,36 @@ void warn_alloc(gfp_t gfp_mask, const char *fmt, ...) | |||
3041 | pr_cont("%pV", &vaf); | 3117 | pr_cont("%pV", &vaf); |
3042 | va_end(args); | 3118 | va_end(args); |
3043 | 3119 | ||
3044 | pr_cont(", mode:%#x(%pGg)\n", gfp_mask, &gfp_mask); | 3120 | pr_cont(", mode:%#x(%pGg), nodemask=", gfp_mask, &gfp_mask); |
3121 | if (nodemask) | ||
3122 | pr_cont("%*pbl\n", nodemask_pr_args(nodemask)); | ||
3123 | else | ||
3124 | pr_cont("(null)\n"); | ||
3125 | |||
3126 | cpuset_print_current_mems_allowed(); | ||
3045 | 3127 | ||
3046 | dump_stack(); | 3128 | dump_stack(); |
3047 | if (!should_suppress_show_mem()) | 3129 | warn_alloc_show_mem(gfp_mask, nodemask); |
3048 | show_mem(filter); | 3130 | } |
3131 | |||
3132 | static inline struct page * | ||
3133 | __alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order, | ||
3134 | unsigned int alloc_flags, | ||
3135 | const struct alloc_context *ac) | ||
3136 | { | ||
3137 | struct page *page; | ||
3138 | |||
3139 | page = get_page_from_freelist(gfp_mask, order, | ||
3140 | alloc_flags|ALLOC_CPUSET, ac); | ||
3141 | /* | ||
3142 | * fallback to ignore cpuset restriction if our nodes | ||
3143 | * are depleted | ||
3144 | */ | ||
3145 | if (!page) | ||
3146 | page = get_page_from_freelist(gfp_mask, order, | ||
3147 | alloc_flags, ac); | ||
3148 | |||
3149 | return page; | ||
3049 | } | 3150 | } |
3050 | 3151 | ||
3051 | static inline struct page * | 3152 | static inline struct page * |
@@ -3083,47 +3184,42 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
3083 | if (page) | 3184 | if (page) |
3084 | goto out; | 3185 | goto out; |
3085 | 3186 | ||
3086 | if (!(gfp_mask & __GFP_NOFAIL)) { | 3187 | /* Coredumps can quickly deplete all memory reserves */ |
3087 | /* Coredumps can quickly deplete all memory reserves */ | 3188 | if (current->flags & PF_DUMPCORE) |
3088 | if (current->flags & PF_DUMPCORE) | 3189 | goto out; |
3089 | goto out; | 3190 | /* The OOM killer will not help higher order allocs */ |
3090 | /* The OOM killer will not help higher order allocs */ | 3191 | if (order > PAGE_ALLOC_COSTLY_ORDER) |
3091 | if (order > PAGE_ALLOC_COSTLY_ORDER) | 3192 | goto out; |
3092 | goto out; | 3193 | /* The OOM killer does not needlessly kill tasks for lowmem */ |
3093 | /* The OOM killer does not needlessly kill tasks for lowmem */ | 3194 | if (ac->high_zoneidx < ZONE_NORMAL) |
3094 | if (ac->high_zoneidx < ZONE_NORMAL) | 3195 | goto out; |
3095 | goto out; | 3196 | if (pm_suspended_storage()) |
3096 | if (pm_suspended_storage()) | 3197 | goto out; |
3097 | goto out; | 3198 | /* |
3098 | /* | 3199 | * XXX: GFP_NOFS allocations should rather fail than rely on |
3099 | * XXX: GFP_NOFS allocations should rather fail than rely on | 3200 | * other request to make a forward progress. |
3100 | * other request to make a forward progress. | 3201 | * We are in an unfortunate situation where out_of_memory cannot |
3101 | * We are in an unfortunate situation where out_of_memory cannot | 3202 | * do much for this context but let's try it to at least get |
3102 | * do much for this context but let's try it to at least get | 3203 | * access to memory reserved if the current task is killed (see |
3103 | * access to memory reserved if the current task is killed (see | 3204 | * out_of_memory). Once filesystems are ready to handle allocation |
3104 | * out_of_memory). Once filesystems are ready to handle allocation | 3205 | * failures more gracefully we should just bail out here. |
3105 | * failures more gracefully we should just bail out here. | 3206 | */ |
3106 | */ | 3207 | |
3208 | /* The OOM killer may not free memory on a specific node */ | ||
3209 | if (gfp_mask & __GFP_THISNODE) | ||
3210 | goto out; | ||
3107 | 3211 | ||
3108 | /* The OOM killer may not free memory on a specific node */ | ||
3109 | if (gfp_mask & __GFP_THISNODE) | ||
3110 | goto out; | ||
3111 | } | ||
3112 | /* Exhausted what can be done so it's blamo time */ | 3212 | /* Exhausted what can be done so it's blamo time */ |
3113 | if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { | 3213 | if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { |
3114 | *did_some_progress = 1; | 3214 | *did_some_progress = 1; |
3115 | 3215 | ||
3116 | if (gfp_mask & __GFP_NOFAIL) { | 3216 | /* |
3117 | page = get_page_from_freelist(gfp_mask, order, | 3217 | * Help non-failing allocations by giving them access to memory |
3118 | ALLOC_NO_WATERMARKS|ALLOC_CPUSET, ac); | 3218 | * reserves |
3119 | /* | 3219 | */ |
3120 | * fallback to ignore cpuset restriction if our nodes | 3220 | if (gfp_mask & __GFP_NOFAIL) |
3121 | * are depleted | 3221 | page = __alloc_pages_cpuset_fallback(gfp_mask, order, |
3122 | */ | ||
3123 | if (!page) | ||
3124 | page = get_page_from_freelist(gfp_mask, order, | ||
3125 | ALLOC_NO_WATERMARKS, ac); | 3222 | ALLOC_NO_WATERMARKS, ac); |
3126 | } | ||
3127 | } | 3223 | } |
3128 | out: | 3224 | out: |
3129 | mutex_unlock(&oom_lock); | 3225 | mutex_unlock(&oom_lock); |
@@ -3192,6 +3288,9 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, | |||
3192 | { | 3288 | { |
3193 | int max_retries = MAX_COMPACT_RETRIES; | 3289 | int max_retries = MAX_COMPACT_RETRIES; |
3194 | int min_priority; | 3290 | int min_priority; |
3291 | bool ret = false; | ||
3292 | int retries = *compaction_retries; | ||
3293 | enum compact_priority priority = *compact_priority; | ||
3195 | 3294 | ||
3196 | if (!order) | 3295 | if (!order) |
3197 | return false; | 3296 | return false; |
@@ -3213,8 +3312,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, | |||
3213 | * But do not retry if the given zonelist is not suitable for | 3312 | * But do not retry if the given zonelist is not suitable for |
3214 | * compaction. | 3313 | * compaction. |
3215 | */ | 3314 | */ |
3216 | if (compaction_withdrawn(compact_result)) | 3315 | if (compaction_withdrawn(compact_result)) { |
3217 | return compaction_zonelist_suitable(ac, order, alloc_flags); | 3316 | ret = compaction_zonelist_suitable(ac, order, alloc_flags); |
3317 | goto out; | ||
3318 | } | ||
3218 | 3319 | ||
3219 | /* | 3320 | /* |
3220 | * !costly requests are much more important than __GFP_REPEAT | 3321 | * !costly requests are much more important than __GFP_REPEAT |
@@ -3226,8 +3327,10 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, | |||
3226 | */ | 3327 | */ |
3227 | if (order > PAGE_ALLOC_COSTLY_ORDER) | 3328 | if (order > PAGE_ALLOC_COSTLY_ORDER) |
3228 | max_retries /= 4; | 3329 | max_retries /= 4; |
3229 | if (*compaction_retries <= max_retries) | 3330 | if (*compaction_retries <= max_retries) { |
3230 | return true; | 3331 | ret = true; |
3332 | goto out; | ||
3333 | } | ||
3231 | 3334 | ||
3232 | /* | 3335 | /* |
3233 | * Make sure there are attempts at the highest priority if we exhausted | 3336 | * Make sure there are attempts at the highest priority if we exhausted |
@@ -3236,12 +3339,15 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, | |||
3236 | check_priority: | 3339 | check_priority: |
3237 | min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ? | 3340 | min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ? |
3238 | MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY; | 3341 | MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY; |
3342 | |||
3239 | if (*compact_priority > min_priority) { | 3343 | if (*compact_priority > min_priority) { |
3240 | (*compact_priority)--; | 3344 | (*compact_priority)--; |
3241 | *compaction_retries = 0; | 3345 | *compaction_retries = 0; |
3242 | return true; | 3346 | ret = true; |
3243 | } | 3347 | } |
3244 | return false; | 3348 | out: |
3349 | trace_compact_retry(order, priority, compact_result, retries, max_retries, ret); | ||
3350 | return ret; | ||
3245 | } | 3351 | } |
3246 | #else | 3352 | #else |
3247 | static inline struct page * | 3353 | static inline struct page * |
@@ -3464,6 +3570,8 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, | |||
3464 | ac->nodemask) { | 3570 | ac->nodemask) { |
3465 | unsigned long available; | 3571 | unsigned long available; |
3466 | unsigned long reclaimable; | 3572 | unsigned long reclaimable; |
3573 | unsigned long min_wmark = min_wmark_pages(zone); | ||
3574 | bool wmark; | ||
3467 | 3575 | ||
3468 | available = reclaimable = zone_reclaimable_pages(zone); | 3576 | available = reclaimable = zone_reclaimable_pages(zone); |
3469 | available -= DIV_ROUND_UP((*no_progress_loops) * available, | 3577 | available -= DIV_ROUND_UP((*no_progress_loops) * available, |
@@ -3474,8 +3582,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, | |||
3474 | * Would the allocation succeed if we reclaimed the whole | 3582 | * Would the allocation succeed if we reclaimed the whole |
3475 | * available? | 3583 | * available? |
3476 | */ | 3584 | */ |
3477 | if (__zone_watermark_ok(zone, order, min_wmark_pages(zone), | 3585 | wmark = __zone_watermark_ok(zone, order, min_wmark, |
3478 | ac_classzone_idx(ac), alloc_flags, available)) { | 3586 | ac_classzone_idx(ac), alloc_flags, available); |
3587 | trace_reclaim_retry_zone(z, order, reclaimable, | ||
3588 | available, min_wmark, *no_progress_loops, wmark); | ||
3589 | if (wmark) { | ||
3479 | /* | 3590 | /* |
3480 | * If we didn't make any progress and have a lot of | 3591 | * If we didn't make any progress and have a lot of |
3481 | * dirty + writeback pages then we should wait for | 3592 | * dirty + writeback pages then we should wait for |
@@ -3555,6 +3666,14 @@ retry_cpuset: | |||
3555 | no_progress_loops = 0; | 3666 | no_progress_loops = 0; |
3556 | compact_priority = DEF_COMPACT_PRIORITY; | 3667 | compact_priority = DEF_COMPACT_PRIORITY; |
3557 | cpuset_mems_cookie = read_mems_allowed_begin(); | 3668 | cpuset_mems_cookie = read_mems_allowed_begin(); |
3669 | |||
3670 | /* | ||
3671 | * The fast path uses conservative alloc_flags to succeed only until | ||
3672 | * kswapd needs to be woken up, and to avoid the cost of setting up | ||
3673 | * alloc_flags precisely. So we do that now. | ||
3674 | */ | ||
3675 | alloc_flags = gfp_to_alloc_flags(gfp_mask); | ||
3676 | |||
3558 | /* | 3677 | /* |
3559 | * We need to recalculate the starting point for the zonelist iterator | 3678 | * We need to recalculate the starting point for the zonelist iterator |
3560 | * because we might have used different nodemask in the fast path, or | 3679 | * because we might have used different nodemask in the fast path, or |
@@ -3566,14 +3685,6 @@ retry_cpuset: | |||
3566 | if (!ac->preferred_zoneref->zone) | 3685 | if (!ac->preferred_zoneref->zone) |
3567 | goto nopage; | 3686 | goto nopage; |
3568 | 3687 | ||
3569 | |||
3570 | /* | ||
3571 | * The fast path uses conservative alloc_flags to succeed only until | ||
3572 | * kswapd needs to be woken up, and to avoid the cost of setting up | ||
3573 | * alloc_flags precisely. So we do that now. | ||
3574 | */ | ||
3575 | alloc_flags = gfp_to_alloc_flags(gfp_mask); | ||
3576 | |||
3577 | if (gfp_mask & __GFP_KSWAPD_RECLAIM) | 3688 | if (gfp_mask & __GFP_KSWAPD_RECLAIM) |
3578 | wake_all_kswapds(order, ac); | 3689 | wake_all_kswapds(order, ac); |
3579 | 3690 | ||
@@ -3650,35 +3761,21 @@ retry: | |||
3650 | goto got_pg; | 3761 | goto got_pg; |
3651 | 3762 | ||
3652 | /* Caller is not willing to reclaim, we can't balance anything */ | 3763 | /* Caller is not willing to reclaim, we can't balance anything */ |
3653 | if (!can_direct_reclaim) { | 3764 | if (!can_direct_reclaim) |
3654 | /* | ||
3655 | * All existing users of the __GFP_NOFAIL are blockable, so warn | ||
3656 | * of any new users that actually allow this type of allocation | ||
3657 | * to fail. | ||
3658 | */ | ||
3659 | WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL); | ||
3660 | goto nopage; | 3765 | goto nopage; |
3661 | } | ||
3662 | 3766 | ||
3663 | /* Avoid recursion of direct reclaim */ | 3767 | /* Make sure we know about allocations which stall for too long */ |
3664 | if (current->flags & PF_MEMALLOC) { | 3768 | if (time_after(jiffies, alloc_start + stall_timeout)) { |
3665 | /* | 3769 | warn_alloc(gfp_mask, ac->nodemask, |
3666 | * __GFP_NOFAIL request from this context is rather bizarre | 3770 | "page allocation stalls for %ums, order:%u", |
3667 | * because we cannot reclaim anything and only can loop waiting | 3771 | jiffies_to_msecs(jiffies-alloc_start), order); |
3668 | * for somebody to do a work for us. | 3772 | stall_timeout += 10 * HZ; |
3669 | */ | ||
3670 | if (WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { | ||
3671 | cond_resched(); | ||
3672 | goto retry; | ||
3673 | } | ||
3674 | goto nopage; | ||
3675 | } | 3773 | } |
3676 | 3774 | ||
3677 | /* Avoid allocations with no watermarks from looping endlessly */ | 3775 | /* Avoid recursion of direct reclaim */ |
3678 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) | 3776 | if (current->flags & PF_MEMALLOC) |
3679 | goto nopage; | 3777 | goto nopage; |
3680 | 3778 | ||
3681 | |||
3682 | /* Try direct reclaim and then allocating */ | 3779 | /* Try direct reclaim and then allocating */ |
3683 | page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac, | 3780 | page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac, |
3684 | &did_some_progress); | 3781 | &did_some_progress); |
@@ -3702,14 +3799,6 @@ retry: | |||
3702 | if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT)) | 3799 | if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT)) |
3703 | goto nopage; | 3800 | goto nopage; |
3704 | 3801 | ||
3705 | /* Make sure we know about allocations which stall for too long */ | ||
3706 | if (time_after(jiffies, alloc_start + stall_timeout)) { | ||
3707 | warn_alloc(gfp_mask, | ||
3708 | "page allocation stalls for %ums, order:%u", | ||
3709 | jiffies_to_msecs(jiffies-alloc_start), order); | ||
3710 | stall_timeout += 10 * HZ; | ||
3711 | } | ||
3712 | |||
3713 | if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, | 3802 | if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, |
3714 | did_some_progress > 0, &no_progress_loops)) | 3803 | did_some_progress > 0, &no_progress_loops)) |
3715 | goto retry; | 3804 | goto retry; |
@@ -3738,6 +3827,10 @@ retry: | |||
3738 | if (page) | 3827 | if (page) |
3739 | goto got_pg; | 3828 | goto got_pg; |
3740 | 3829 | ||
3830 | /* Avoid allocations with no watermarks from looping endlessly */ | ||
3831 | if (test_thread_flag(TIF_MEMDIE)) | ||
3832 | goto nopage; | ||
3833 | |||
3741 | /* Retry as long as the OOM killer is making progress */ | 3834 | /* Retry as long as the OOM killer is making progress */ |
3742 | if (did_some_progress) { | 3835 | if (did_some_progress) { |
3743 | no_progress_loops = 0; | 3836 | no_progress_loops = 0; |
@@ -3755,82 +3848,123 @@ nopage: | |||
3755 | if (read_mems_allowed_retry(cpuset_mems_cookie)) | 3848 | if (read_mems_allowed_retry(cpuset_mems_cookie)) |
3756 | goto retry_cpuset; | 3849 | goto retry_cpuset; |
3757 | 3850 | ||
3758 | warn_alloc(gfp_mask, | 3851 | /* |
3852 | * Make sure that __GFP_NOFAIL request doesn't leak out and make sure | ||
3853 | * we always retry | ||
3854 | */ | ||
3855 | if (gfp_mask & __GFP_NOFAIL) { | ||
3856 | /* | ||
3857 | * All existing users of the __GFP_NOFAIL are blockable, so warn | ||
3858 | * of any new users that actually require GFP_NOWAIT | ||
3859 | */ | ||
3860 | if (WARN_ON_ONCE(!can_direct_reclaim)) | ||
3861 | goto fail; | ||
3862 | |||
3863 | /* | ||
3864 | * PF_MEMALLOC request from this context is rather bizarre | ||
3865 | * because we cannot reclaim anything and only can loop waiting | ||
3866 | * for somebody to do a work for us | ||
3867 | */ | ||
3868 | WARN_ON_ONCE(current->flags & PF_MEMALLOC); | ||
3869 | |||
3870 | /* | ||
3871 | * non failing costly orders are a hard requirement which we | ||
3872 | * are not prepared for much so let's warn about these users | ||
3873 | * so that we can identify them and convert them to something | ||
3874 | * else. | ||
3875 | */ | ||
3876 | WARN_ON_ONCE(order > PAGE_ALLOC_COSTLY_ORDER); | ||
3877 | |||
3878 | /* | ||
3879 | * Help non-failing allocations by giving them access to memory | ||
3880 | * reserves but do not use ALLOC_NO_WATERMARKS because this | ||
3881 | * could deplete whole memory reserves which would just make | ||
3882 | * the situation worse | ||
3883 | */ | ||
3884 | page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_HARDER, ac); | ||
3885 | if (page) | ||
3886 | goto got_pg; | ||
3887 | |||
3888 | cond_resched(); | ||
3889 | goto retry; | ||
3890 | } | ||
3891 | fail: | ||
3892 | warn_alloc(gfp_mask, ac->nodemask, | ||
3759 | "page allocation failure: order:%u", order); | 3893 | "page allocation failure: order:%u", order); |
3760 | got_pg: | 3894 | got_pg: |
3761 | return page; | 3895 | return page; |
3762 | } | 3896 | } |
3763 | 3897 | ||
3764 | /* | 3898 | static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, |
3765 | * This is the 'heart' of the zoned buddy allocator. | 3899 | struct zonelist *zonelist, nodemask_t *nodemask, |
3766 | */ | 3900 | struct alloc_context *ac, gfp_t *alloc_mask, |
3767 | struct page * | 3901 | unsigned int *alloc_flags) |
3768 | __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | ||
3769 | struct zonelist *zonelist, nodemask_t *nodemask) | ||
3770 | { | 3902 | { |
3771 | struct page *page; | 3903 | ac->high_zoneidx = gfp_zone(gfp_mask); |
3772 | unsigned int alloc_flags = ALLOC_WMARK_LOW; | 3904 | ac->zonelist = zonelist; |
3773 | gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ | 3905 | ac->nodemask = nodemask; |
3774 | struct alloc_context ac = { | 3906 | ac->migratetype = gfpflags_to_migratetype(gfp_mask); |
3775 | .high_zoneidx = gfp_zone(gfp_mask), | ||
3776 | .zonelist = zonelist, | ||
3777 | .nodemask = nodemask, | ||
3778 | .migratetype = gfpflags_to_migratetype(gfp_mask), | ||
3779 | }; | ||
3780 | 3907 | ||
3781 | if (cpusets_enabled()) { | 3908 | if (cpusets_enabled()) { |
3782 | alloc_mask |= __GFP_HARDWALL; | 3909 | *alloc_mask |= __GFP_HARDWALL; |
3783 | alloc_flags |= ALLOC_CPUSET; | 3910 | if (!ac->nodemask) |
3784 | if (!ac.nodemask) | 3911 | ac->nodemask = &cpuset_current_mems_allowed; |
3785 | ac.nodemask = &cpuset_current_mems_allowed; | 3912 | else |
3913 | *alloc_flags |= ALLOC_CPUSET; | ||
3786 | } | 3914 | } |
3787 | 3915 | ||
3788 | gfp_mask &= gfp_allowed_mask; | ||
3789 | |||
3790 | lockdep_trace_alloc(gfp_mask); | 3916 | lockdep_trace_alloc(gfp_mask); |
3791 | 3917 | ||
3792 | might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); | 3918 | might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM); |
3793 | 3919 | ||
3794 | if (should_fail_alloc_page(gfp_mask, order)) | 3920 | if (should_fail_alloc_page(gfp_mask, order)) |
3795 | return NULL; | 3921 | return false; |
3796 | 3922 | ||
3797 | /* | 3923 | if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE) |
3798 | * Check the zones suitable for the gfp_mask contain at least one | 3924 | *alloc_flags |= ALLOC_CMA; |
3799 | * valid zone. It's possible to have an empty zonelist as a result | ||
3800 | * of __GFP_THISNODE and a memoryless node | ||
3801 | */ | ||
3802 | if (unlikely(!zonelist->_zonerefs->zone)) | ||
3803 | return NULL; | ||
3804 | 3925 | ||
3805 | if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) | 3926 | return true; |
3806 | alloc_flags |= ALLOC_CMA; | 3927 | } |
3807 | 3928 | ||
3929 | /* Determine whether to spread dirty pages and what the first usable zone */ | ||
3930 | static inline void finalise_ac(gfp_t gfp_mask, | ||
3931 | unsigned int order, struct alloc_context *ac) | ||
3932 | { | ||
3808 | /* Dirty zone balancing only done in the fast path */ | 3933 | /* Dirty zone balancing only done in the fast path */ |
3809 | ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); | 3934 | ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE); |
3810 | 3935 | ||
3811 | /* | 3936 | /* |
3812 | * The preferred zone is used for statistics but crucially it is | 3937 | * The preferred zone is used for statistics but crucially it is |
3813 | * also used as the starting point for the zonelist iterator. It | 3938 | * also used as the starting point for the zonelist iterator. It |
3814 | * may get reset for allocations that ignore memory policies. | 3939 | * may get reset for allocations that ignore memory policies. |
3815 | */ | 3940 | */ |
3816 | ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, | 3941 | ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, |
3817 | ac.high_zoneidx, ac.nodemask); | 3942 | ac->high_zoneidx, ac->nodemask); |
3818 | if (!ac.preferred_zoneref->zone) { | 3943 | } |
3819 | page = NULL; | 3944 | |
3820 | /* | 3945 | /* |
3821 | * This might be due to race with cpuset_current_mems_allowed | 3946 | * This is the 'heart' of the zoned buddy allocator. |
3822 | * update, so make sure we retry with original nodemask in the | 3947 | */ |
3823 | * slow path. | 3948 | struct page * |
3824 | */ | 3949 | __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, |
3825 | goto no_zone; | 3950 | struct zonelist *zonelist, nodemask_t *nodemask) |
3826 | } | 3951 | { |
3952 | struct page *page; | ||
3953 | unsigned int alloc_flags = ALLOC_WMARK_LOW; | ||
3954 | gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ | ||
3955 | struct alloc_context ac = { }; | ||
3956 | |||
3957 | gfp_mask &= gfp_allowed_mask; | ||
3958 | if (!prepare_alloc_pages(gfp_mask, order, zonelist, nodemask, &ac, &alloc_mask, &alloc_flags)) | ||
3959 | return NULL; | ||
3960 | |||
3961 | finalise_ac(gfp_mask, order, &ac); | ||
3827 | 3962 | ||
3828 | /* First allocation attempt */ | 3963 | /* First allocation attempt */ |
3829 | page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); | 3964 | page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); |
3830 | if (likely(page)) | 3965 | if (likely(page)) |
3831 | goto out; | 3966 | goto out; |
3832 | 3967 | ||
3833 | no_zone: | ||
3834 | /* | 3968 | /* |
3835 | * Runtime PM, block IO and its error handling path can deadlock | 3969 | * Runtime PM, block IO and its error handling path can deadlock |
3836 | * because I/O on the device might not complete. | 3970 | * because I/O on the device might not complete. |
@@ -4252,20 +4386,20 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
4252 | * Determine whether the node should be displayed or not, depending on whether | 4386 | * Determine whether the node should be displayed or not, depending on whether |
4253 | * SHOW_MEM_FILTER_NODES was passed to show_free_areas(). | 4387 | * SHOW_MEM_FILTER_NODES was passed to show_free_areas(). |
4254 | */ | 4388 | */ |
4255 | bool skip_free_areas_node(unsigned int flags, int nid) | 4389 | static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask) |
4256 | { | 4390 | { |
4257 | bool ret = false; | ||
4258 | unsigned int cpuset_mems_cookie; | ||
4259 | |||
4260 | if (!(flags & SHOW_MEM_FILTER_NODES)) | 4391 | if (!(flags & SHOW_MEM_FILTER_NODES)) |
4261 | goto out; | 4392 | return false; |
4262 | 4393 | ||
4263 | do { | 4394 | /* |
4264 | cpuset_mems_cookie = read_mems_allowed_begin(); | 4395 | * no node mask - aka implicit memory numa policy. Do not bother with |
4265 | ret = !node_isset(nid, cpuset_current_mems_allowed); | 4396 | * the synchronization - read_mems_allowed_begin - because we do not |
4266 | } while (read_mems_allowed_retry(cpuset_mems_cookie)); | 4397 | * have to be precise here. |
4267 | out: | 4398 | */ |
4268 | return ret; | 4399 | if (!nodemask) |
4400 | nodemask = &cpuset_current_mems_allowed; | ||
4401 | |||
4402 | return !node_isset(nid, *nodemask); | ||
4269 | } | 4403 | } |
4270 | 4404 | ||
4271 | #define K(x) ((x) << (PAGE_SHIFT-10)) | 4405 | #define K(x) ((x) << (PAGE_SHIFT-10)) |
@@ -4306,7 +4440,7 @@ static void show_migration_types(unsigned char type) | |||
4306 | * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's | 4440 | * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's |
4307 | * cpuset. | 4441 | * cpuset. |
4308 | */ | 4442 | */ |
4309 | void show_free_areas(unsigned int filter) | 4443 | void show_free_areas(unsigned int filter, nodemask_t *nodemask) |
4310 | { | 4444 | { |
4311 | unsigned long free_pcp = 0; | 4445 | unsigned long free_pcp = 0; |
4312 | int cpu; | 4446 | int cpu; |
@@ -4314,7 +4448,7 @@ void show_free_areas(unsigned int filter) | |||
4314 | pg_data_t *pgdat; | 4448 | pg_data_t *pgdat; |
4315 | 4449 | ||
4316 | for_each_populated_zone(zone) { | 4450 | for_each_populated_zone(zone) { |
4317 | if (skip_free_areas_node(filter, zone_to_nid(zone))) | 4451 | if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) |
4318 | continue; | 4452 | continue; |
4319 | 4453 | ||
4320 | for_each_online_cpu(cpu) | 4454 | for_each_online_cpu(cpu) |
@@ -4348,6 +4482,9 @@ void show_free_areas(unsigned int filter) | |||
4348 | global_page_state(NR_FREE_CMA_PAGES)); | 4482 | global_page_state(NR_FREE_CMA_PAGES)); |
4349 | 4483 | ||
4350 | for_each_online_pgdat(pgdat) { | 4484 | for_each_online_pgdat(pgdat) { |
4485 | if (show_mem_node_skip(filter, pgdat->node_id, nodemask)) | ||
4486 | continue; | ||
4487 | |||
4351 | printk("Node %d" | 4488 | printk("Node %d" |
4352 | " active_anon:%lukB" | 4489 | " active_anon:%lukB" |
4353 | " inactive_anon:%lukB" | 4490 | " inactive_anon:%lukB" |
@@ -4397,7 +4534,7 @@ void show_free_areas(unsigned int filter) | |||
4397 | for_each_populated_zone(zone) { | 4534 | for_each_populated_zone(zone) { |
4398 | int i; | 4535 | int i; |
4399 | 4536 | ||
4400 | if (skip_free_areas_node(filter, zone_to_nid(zone))) | 4537 | if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) |
4401 | continue; | 4538 | continue; |
4402 | 4539 | ||
4403 | free_pcp = 0; | 4540 | free_pcp = 0; |
@@ -4462,7 +4599,7 @@ void show_free_areas(unsigned int filter) | |||
4462 | unsigned long nr[MAX_ORDER], flags, total = 0; | 4599 | unsigned long nr[MAX_ORDER], flags, total = 0; |
4463 | unsigned char types[MAX_ORDER]; | 4600 | unsigned char types[MAX_ORDER]; |
4464 | 4601 | ||
4465 | if (skip_free_areas_node(filter, zone_to_nid(zone))) | 4602 | if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) |
4466 | continue; | 4603 | continue; |
4467 | show_node(zone); | 4604 | show_node(zone); |
4468 | printk(KERN_CONT "%s: ", zone->name); | 4605 | printk(KERN_CONT "%s: ", zone->name); |
@@ -5083,8 +5220,17 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
5083 | if (context != MEMMAP_EARLY) | 5220 | if (context != MEMMAP_EARLY) |
5084 | goto not_early; | 5221 | goto not_early; |
5085 | 5222 | ||
5086 | if (!early_pfn_valid(pfn)) | 5223 | if (!early_pfn_valid(pfn)) { |
5224 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | ||
5225 | /* | ||
5226 | * Skip to the pfn preceding the next valid one (or | ||
5227 | * end_pfn), such that we hit a valid pfn (or end_pfn) | ||
5228 | * on our next iteration of the loop. | ||
5229 | */ | ||
5230 | pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1; | ||
5231 | #endif | ||
5087 | continue; | 5232 | continue; |
5233 | } | ||
5088 | if (!early_pfn_in_nid(pfn, nid)) | 5234 | if (!early_pfn_in_nid(pfn, nid)) |
5089 | continue; | 5235 | continue; |
5090 | if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) | 5236 | if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) |
@@ -5780,7 +5926,7 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages, | |||
5780 | * the zone and SPARSEMEM is in use. If there are holes within the | 5926 | * the zone and SPARSEMEM is in use. If there are holes within the |
5781 | * zone, each populated memory region may cost us one or two extra | 5927 | * zone, each populated memory region may cost us one or two extra |
5782 | * memmap pages due to alignment because memmap pages for each | 5928 | * memmap pages due to alignment because memmap pages for each |
5783 | * populated regions may not naturally algined on page boundary. | 5929 | * populated regions may not be naturally aligned on page boundary. |
5784 | * So the (present_pages >> 4) heuristic is a tradeoff for that. | 5930 | * So the (present_pages >> 4) heuristic is a tradeoff for that. |
5785 | */ | 5931 | */ |
5786 | if (spanned_pages > present_pages + (present_pages >> 4) && | 5932 | if (spanned_pages > present_pages + (present_pages >> 4) && |
@@ -6344,8 +6490,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
6344 | 6490 | ||
6345 | start_pfn = end_pfn; | 6491 | start_pfn = end_pfn; |
6346 | } | 6492 | } |
6347 | arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0; | ||
6348 | arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0; | ||
6349 | 6493 | ||
6350 | /* Find the PFNs that ZONE_MOVABLE begins at in each node */ | 6494 | /* Find the PFNs that ZONE_MOVABLE begins at in each node */ |
6351 | memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); | 6495 | memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); |
@@ -7081,8 +7225,9 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
7081 | * If @count is not zero, it is okay to include less @count unmovable pages | 7225 | * If @count is not zero, it is okay to include less @count unmovable pages |
7082 | * | 7226 | * |
7083 | * PageLRU check without isolation or lru_lock could race so that | 7227 | * PageLRU check without isolation or lru_lock could race so that |
7084 | * MIGRATE_MOVABLE block might include unmovable pages. It means you can't | 7228 | * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable |
7085 | * expect this function should be exact. | 7229 | * check without lock_page also may miss some movable non-lru pages at |
7230 | * race condition. So you can't expect this function should be exact. | ||
7086 | */ | 7231 | */ |
7087 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | 7232 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, |
7088 | bool skip_hwpoisoned_pages) | 7233 | bool skip_hwpoisoned_pages) |
@@ -7138,6 +7283,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
7138 | if (skip_hwpoisoned_pages && PageHWPoison(page)) | 7283 | if (skip_hwpoisoned_pages && PageHWPoison(page)) |
7139 | continue; | 7284 | continue; |
7140 | 7285 | ||
7286 | if (__PageMovable(page)) | ||
7287 | continue; | ||
7288 | |||
7141 | if (!PageLRU(page)) | 7289 | if (!PageLRU(page)) |
7142 | found++; | 7290 | found++; |
7143 | /* | 7291 | /* |
@@ -7249,6 +7397,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, | |||
7249 | * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks | 7397 | * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks |
7250 | * in range must have the same migratetype and it must | 7398 | * in range must have the same migratetype and it must |
7251 | * be either of the two. | 7399 | * be either of the two. |
7400 | * @gfp_mask: GFP mask to use during compaction | ||
7252 | * | 7401 | * |
7253 | * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES | 7402 | * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES |
7254 | * aligned, however it's the caller's responsibility to guarantee that | 7403 | * aligned, however it's the caller's responsibility to guarantee that |
@@ -7262,7 +7411,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, | |||
7262 | * need to be freed with free_contig_range(). | 7411 | * need to be freed with free_contig_range(). |
7263 | */ | 7412 | */ |
7264 | int alloc_contig_range(unsigned long start, unsigned long end, | 7413 | int alloc_contig_range(unsigned long start, unsigned long end, |
7265 | unsigned migratetype) | 7414 | unsigned migratetype, gfp_t gfp_mask) |
7266 | { | 7415 | { |
7267 | unsigned long outer_start, outer_end; | 7416 | unsigned long outer_start, outer_end; |
7268 | unsigned int order; | 7417 | unsigned int order; |
@@ -7274,7 +7423,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
7274 | .zone = page_zone(pfn_to_page(start)), | 7423 | .zone = page_zone(pfn_to_page(start)), |
7275 | .mode = MIGRATE_SYNC, | 7424 | .mode = MIGRATE_SYNC, |
7276 | .ignore_skip_hint = true, | 7425 | .ignore_skip_hint = true, |
7277 | .gfp_mask = GFP_KERNEL, | 7426 | .gfp_mask = memalloc_noio_flags(gfp_mask), |
7278 | }; | 7427 | }; |
7279 | INIT_LIST_HEAD(&cc.migratepages); | 7428 | INIT_LIST_HEAD(&cc.migratepages); |
7280 | 7429 | ||