diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 102 |
1 files changed, 73 insertions, 29 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a13ded1938f0..a712fb9e04ce 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1161,11 +1161,47 @@ void drain_local_pages(void *arg) | |||
1161 | } | 1161 | } |
1162 | 1162 | ||
1163 | /* | 1163 | /* |
1164 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator | 1164 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator. |
1165 | * | ||
1166 | * Note that this code is protected against sending an IPI to an offline | ||
1167 | * CPU but does not guarantee sending an IPI to newly hotplugged CPUs: | ||
1168 | * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but | ||
1169 | * nothing keeps CPUs from showing up after we populated the cpumask and | ||
1170 | * before the call to on_each_cpu_mask(). | ||
1165 | */ | 1171 | */ |
1166 | void drain_all_pages(void) | 1172 | void drain_all_pages(void) |
1167 | { | 1173 | { |
1168 | on_each_cpu(drain_local_pages, NULL, 1); | 1174 | int cpu; |
1175 | struct per_cpu_pageset *pcp; | ||
1176 | struct zone *zone; | ||
1177 | |||
1178 | /* | ||
1179 | * Allocate in the BSS so we wont require allocation in | ||
1180 | * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y | ||
1181 | */ | ||
1182 | static cpumask_t cpus_with_pcps; | ||
1183 | |||
1184 | /* | ||
1185 | * We don't care about racing with CPU hotplug event | ||
1186 | * as offline notification will cause the notified | ||
1187 | * cpu to drain that CPU pcps and on_each_cpu_mask | ||
1188 | * disables preemption as part of its processing | ||
1189 | */ | ||
1190 | for_each_online_cpu(cpu) { | ||
1191 | bool has_pcps = false; | ||
1192 | for_each_populated_zone(zone) { | ||
1193 | pcp = per_cpu_ptr(zone->pageset, cpu); | ||
1194 | if (pcp->pcp.count) { | ||
1195 | has_pcps = true; | ||
1196 | break; | ||
1197 | } | ||
1198 | } | ||
1199 | if (has_pcps) | ||
1200 | cpumask_set_cpu(cpu, &cpus_with_pcps); | ||
1201 | else | ||
1202 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | ||
1203 | } | ||
1204 | on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1); | ||
1169 | } | 1205 | } |
1170 | 1206 | ||
1171 | #ifdef CONFIG_HIBERNATION | 1207 | #ifdef CONFIG_HIBERNATION |
@@ -1968,7 +2004,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
1968 | goto out; | 2004 | goto out; |
1969 | } | 2005 | } |
1970 | /* Exhausted what can be done so it's blamo time */ | 2006 | /* Exhausted what can be done so it's blamo time */ |
1971 | out_of_memory(zonelist, gfp_mask, order, nodemask); | 2007 | out_of_memory(zonelist, gfp_mask, order, nodemask, false); |
1972 | 2008 | ||
1973 | out: | 2009 | out: |
1974 | clear_zonelist_oom(zonelist, gfp_mask); | 2010 | clear_zonelist_oom(zonelist, gfp_mask); |
@@ -1990,7 +2026,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
1990 | if (!order) | 2026 | if (!order) |
1991 | return NULL; | 2027 | return NULL; |
1992 | 2028 | ||
1993 | if (compaction_deferred(preferred_zone)) { | 2029 | if (compaction_deferred(preferred_zone, order)) { |
1994 | *deferred_compaction = true; | 2030 | *deferred_compaction = true; |
1995 | return NULL; | 2031 | return NULL; |
1996 | } | 2032 | } |
@@ -2012,6 +2048,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2012 | if (page) { | 2048 | if (page) { |
2013 | preferred_zone->compact_considered = 0; | 2049 | preferred_zone->compact_considered = 0; |
2014 | preferred_zone->compact_defer_shift = 0; | 2050 | preferred_zone->compact_defer_shift = 0; |
2051 | if (order >= preferred_zone->compact_order_failed) | ||
2052 | preferred_zone->compact_order_failed = order + 1; | ||
2015 | count_vm_event(COMPACTSUCCESS); | 2053 | count_vm_event(COMPACTSUCCESS); |
2016 | return page; | 2054 | return page; |
2017 | } | 2055 | } |
@@ -2028,7 +2066,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2028 | * defer if the failure was a sync compaction failure. | 2066 | * defer if the failure was a sync compaction failure. |
2029 | */ | 2067 | */ |
2030 | if (sync_migration) | 2068 | if (sync_migration) |
2031 | defer_compaction(preferred_zone); | 2069 | defer_compaction(preferred_zone, order); |
2032 | 2070 | ||
2033 | cond_resched(); | 2071 | cond_resched(); |
2034 | } | 2072 | } |
@@ -2306,6 +2344,10 @@ rebalance: | |||
2306 | if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { | 2344 | if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { |
2307 | if (oom_killer_disabled) | 2345 | if (oom_killer_disabled) |
2308 | goto nopage; | 2346 | goto nopage; |
2347 | /* Coredumps can quickly deplete all memory reserves */ | ||
2348 | if ((current->flags & PF_DUMPCORE) && | ||
2349 | !(gfp_mask & __GFP_NOFAIL)) | ||
2350 | goto nopage; | ||
2309 | page = __alloc_pages_may_oom(gfp_mask, order, | 2351 | page = __alloc_pages_may_oom(gfp_mask, order, |
2310 | zonelist, high_zoneidx, | 2352 | zonelist, high_zoneidx, |
2311 | nodemask, preferred_zone, | 2353 | nodemask, preferred_zone, |
@@ -2378,8 +2420,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2378 | { | 2420 | { |
2379 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | 2421 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
2380 | struct zone *preferred_zone; | 2422 | struct zone *preferred_zone; |
2381 | struct page *page; | 2423 | struct page *page = NULL; |
2382 | int migratetype = allocflags_to_migratetype(gfp_mask); | 2424 | int migratetype = allocflags_to_migratetype(gfp_mask); |
2425 | unsigned int cpuset_mems_cookie; | ||
2383 | 2426 | ||
2384 | gfp_mask &= gfp_allowed_mask; | 2427 | gfp_mask &= gfp_allowed_mask; |
2385 | 2428 | ||
@@ -2398,15 +2441,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2398 | if (unlikely(!zonelist->_zonerefs->zone)) | 2441 | if (unlikely(!zonelist->_zonerefs->zone)) |
2399 | return NULL; | 2442 | return NULL; |
2400 | 2443 | ||
2401 | get_mems_allowed(); | 2444 | retry_cpuset: |
2445 | cpuset_mems_cookie = get_mems_allowed(); | ||
2446 | |||
2402 | /* The preferred zone is used for statistics later */ | 2447 | /* The preferred zone is used for statistics later */ |
2403 | first_zones_zonelist(zonelist, high_zoneidx, | 2448 | first_zones_zonelist(zonelist, high_zoneidx, |
2404 | nodemask ? : &cpuset_current_mems_allowed, | 2449 | nodemask ? : &cpuset_current_mems_allowed, |
2405 | &preferred_zone); | 2450 | &preferred_zone); |
2406 | if (!preferred_zone) { | 2451 | if (!preferred_zone) |
2407 | put_mems_allowed(); | 2452 | goto out; |
2408 | return NULL; | ||
2409 | } | ||
2410 | 2453 | ||
2411 | /* First allocation attempt */ | 2454 | /* First allocation attempt */ |
2412 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, | 2455 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, |
@@ -2416,9 +2459,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2416 | page = __alloc_pages_slowpath(gfp_mask, order, | 2459 | page = __alloc_pages_slowpath(gfp_mask, order, |
2417 | zonelist, high_zoneidx, nodemask, | 2460 | zonelist, high_zoneidx, nodemask, |
2418 | preferred_zone, migratetype); | 2461 | preferred_zone, migratetype); |
2419 | put_mems_allowed(); | ||
2420 | 2462 | ||
2421 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); | 2463 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); |
2464 | |||
2465 | out: | ||
2466 | /* | ||
2467 | * When updating a task's mems_allowed, it is possible to race with | ||
2468 | * parallel threads in such a way that an allocation can fail while | ||
2469 | * the mask is being updated. If a page allocation is about to fail, | ||
2470 | * check if the cpuset changed during allocation and if so, retry. | ||
2471 | */ | ||
2472 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) | ||
2473 | goto retry_cpuset; | ||
2474 | |||
2422 | return page; | 2475 | return page; |
2423 | } | 2476 | } |
2424 | EXPORT_SYMBOL(__alloc_pages_nodemask); | 2477 | EXPORT_SYMBOL(__alloc_pages_nodemask); |
@@ -2632,13 +2685,15 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
2632 | bool skip_free_areas_node(unsigned int flags, int nid) | 2685 | bool skip_free_areas_node(unsigned int flags, int nid) |
2633 | { | 2686 | { |
2634 | bool ret = false; | 2687 | bool ret = false; |
2688 | unsigned int cpuset_mems_cookie; | ||
2635 | 2689 | ||
2636 | if (!(flags & SHOW_MEM_FILTER_NODES)) | 2690 | if (!(flags & SHOW_MEM_FILTER_NODES)) |
2637 | goto out; | 2691 | goto out; |
2638 | 2692 | ||
2639 | get_mems_allowed(); | 2693 | do { |
2640 | ret = !node_isset(nid, cpuset_current_mems_allowed); | 2694 | cpuset_mems_cookie = get_mems_allowed(); |
2641 | put_mems_allowed(); | 2695 | ret = !node_isset(nid, cpuset_current_mems_allowed); |
2696 | } while (!put_mems_allowed(cpuset_mems_cookie)); | ||
2642 | out: | 2697 | out: |
2643 | return ret; | 2698 | return ret; |
2644 | } | 2699 | } |
@@ -3925,18 +3980,6 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn) | |||
3925 | } | 3980 | } |
3926 | } | 3981 | } |
3927 | 3982 | ||
3928 | int __init add_from_early_node_map(struct range *range, int az, | ||
3929 | int nr_range, int nid) | ||
3930 | { | ||
3931 | unsigned long start_pfn, end_pfn; | ||
3932 | int i; | ||
3933 | |||
3934 | /* need to go over early_node_map to find out good range for node */ | ||
3935 | for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) | ||
3936 | nr_range = add_range(range, az, nr_range, start_pfn, end_pfn); | ||
3937 | return nr_range; | ||
3938 | } | ||
3939 | |||
3940 | /** | 3983 | /** |
3941 | * sparse_memory_present_with_active_regions - Call memory_present for each active range | 3984 | * sparse_memory_present_with_active_regions - Call memory_present for each active range |
3942 | * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. | 3985 | * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. |
@@ -4521,7 +4564,7 @@ static unsigned long __init early_calculate_totalpages(void) | |||
4521 | * memory. When they don't, some nodes will have more kernelcore than | 4564 | * memory. When they don't, some nodes will have more kernelcore than |
4522 | * others | 4565 | * others |
4523 | */ | 4566 | */ |
4524 | static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) | 4567 | static void __init find_zone_movable_pfns_for_nodes(void) |
4525 | { | 4568 | { |
4526 | int i, nid; | 4569 | int i, nid; |
4527 | unsigned long usable_startpfn; | 4570 | unsigned long usable_startpfn; |
@@ -4713,7 +4756,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
4713 | 4756 | ||
4714 | /* Find the PFNs that ZONE_MOVABLE begins at in each node */ | 4757 | /* Find the PFNs that ZONE_MOVABLE begins at in each node */ |
4715 | memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); | 4758 | memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); |
4716 | find_zone_movable_pfns_for_nodes(zone_movable_pfn); | 4759 | find_zone_movable_pfns_for_nodes(); |
4717 | 4760 | ||
4718 | /* Print out the zone ranges */ | 4761 | /* Print out the zone ranges */ |
4719 | printk("Zone PFN ranges:\n"); | 4762 | printk("Zone PFN ranges:\n"); |
@@ -4823,6 +4866,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, | |||
4823 | int cpu = (unsigned long)hcpu; | 4866 | int cpu = (unsigned long)hcpu; |
4824 | 4867 | ||
4825 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | 4868 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { |
4869 | lru_add_drain_cpu(cpu); | ||
4826 | drain_pages(cpu); | 4870 | drain_pages(cpu); |
4827 | 4871 | ||
4828 | /* | 4872 | /* |