aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c102
1 files changed, 73 insertions, 29 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a13ded1938f0..a712fb9e04ce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1161,11 +1161,47 @@ void drain_local_pages(void *arg)
1161} 1161}
1162 1162
1163/* 1163/*
1164 * Spill all the per-cpu pages from all CPUs back into the buddy allocator 1164 * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
1165 *
1166 * Note that this code is protected against sending an IPI to an offline
1167 * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
1168 * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
1169 * nothing keeps CPUs from showing up after we populated the cpumask and
1170 * before the call to on_each_cpu_mask().
1165 */ 1171 */
1166void drain_all_pages(void) 1172void drain_all_pages(void)
1167{ 1173{
1168 on_each_cpu(drain_local_pages, NULL, 1); 1174 int cpu;
1175 struct per_cpu_pageset *pcp;
1176 struct zone *zone;
1177
1178 /*
1179 * Allocate in the BSS so we wont require allocation in
1180 * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y
1181 */
1182 static cpumask_t cpus_with_pcps;
1183
1184 /*
1185 * We don't care about racing with CPU hotplug event
1186 * as offline notification will cause the notified
1187 * cpu to drain that CPU pcps and on_each_cpu_mask
1188 * disables preemption as part of its processing
1189 */
1190 for_each_online_cpu(cpu) {
1191 bool has_pcps = false;
1192 for_each_populated_zone(zone) {
1193 pcp = per_cpu_ptr(zone->pageset, cpu);
1194 if (pcp->pcp.count) {
1195 has_pcps = true;
1196 break;
1197 }
1198 }
1199 if (has_pcps)
1200 cpumask_set_cpu(cpu, &cpus_with_pcps);
1201 else
1202 cpumask_clear_cpu(cpu, &cpus_with_pcps);
1203 }
1204 on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
1169} 1205}
1170 1206
1171#ifdef CONFIG_HIBERNATION 1207#ifdef CONFIG_HIBERNATION
@@ -1968,7 +2004,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
1968 goto out; 2004 goto out;
1969 } 2005 }
1970 /* Exhausted what can be done so it's blamo time */ 2006 /* Exhausted what can be done so it's blamo time */
1971 out_of_memory(zonelist, gfp_mask, order, nodemask); 2007 out_of_memory(zonelist, gfp_mask, order, nodemask, false);
1972 2008
1973out: 2009out:
1974 clear_zonelist_oom(zonelist, gfp_mask); 2010 clear_zonelist_oom(zonelist, gfp_mask);
@@ -1990,7 +2026,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
1990 if (!order) 2026 if (!order)
1991 return NULL; 2027 return NULL;
1992 2028
1993 if (compaction_deferred(preferred_zone)) { 2029 if (compaction_deferred(preferred_zone, order)) {
1994 *deferred_compaction = true; 2030 *deferred_compaction = true;
1995 return NULL; 2031 return NULL;
1996 } 2032 }
@@ -2012,6 +2048,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2012 if (page) { 2048 if (page) {
2013 preferred_zone->compact_considered = 0; 2049 preferred_zone->compact_considered = 0;
2014 preferred_zone->compact_defer_shift = 0; 2050 preferred_zone->compact_defer_shift = 0;
2051 if (order >= preferred_zone->compact_order_failed)
2052 preferred_zone->compact_order_failed = order + 1;
2015 count_vm_event(COMPACTSUCCESS); 2053 count_vm_event(COMPACTSUCCESS);
2016 return page; 2054 return page;
2017 } 2055 }
@@ -2028,7 +2066,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2028 * defer if the failure was a sync compaction failure. 2066 * defer if the failure was a sync compaction failure.
2029 */ 2067 */
2030 if (sync_migration) 2068 if (sync_migration)
2031 defer_compaction(preferred_zone); 2069 defer_compaction(preferred_zone, order);
2032 2070
2033 cond_resched(); 2071 cond_resched();
2034 } 2072 }
@@ -2306,6 +2344,10 @@ rebalance:
2306 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { 2344 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
2307 if (oom_killer_disabled) 2345 if (oom_killer_disabled)
2308 goto nopage; 2346 goto nopage;
2347 /* Coredumps can quickly deplete all memory reserves */
2348 if ((current->flags & PF_DUMPCORE) &&
2349 !(gfp_mask & __GFP_NOFAIL))
2350 goto nopage;
2309 page = __alloc_pages_may_oom(gfp_mask, order, 2351 page = __alloc_pages_may_oom(gfp_mask, order,
2310 zonelist, high_zoneidx, 2352 zonelist, high_zoneidx,
2311 nodemask, preferred_zone, 2353 nodemask, preferred_zone,
@@ -2378,8 +2420,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2378{ 2420{
2379 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 2421 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
2380 struct zone *preferred_zone; 2422 struct zone *preferred_zone;
2381 struct page *page; 2423 struct page *page = NULL;
2382 int migratetype = allocflags_to_migratetype(gfp_mask); 2424 int migratetype = allocflags_to_migratetype(gfp_mask);
2425 unsigned int cpuset_mems_cookie;
2383 2426
2384 gfp_mask &= gfp_allowed_mask; 2427 gfp_mask &= gfp_allowed_mask;
2385 2428
@@ -2398,15 +2441,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2398 if (unlikely(!zonelist->_zonerefs->zone)) 2441 if (unlikely(!zonelist->_zonerefs->zone))
2399 return NULL; 2442 return NULL;
2400 2443
2401 get_mems_allowed(); 2444retry_cpuset:
2445 cpuset_mems_cookie = get_mems_allowed();
2446
2402 /* The preferred zone is used for statistics later */ 2447 /* The preferred zone is used for statistics later */
2403 first_zones_zonelist(zonelist, high_zoneidx, 2448 first_zones_zonelist(zonelist, high_zoneidx,
2404 nodemask ? : &cpuset_current_mems_allowed, 2449 nodemask ? : &cpuset_current_mems_allowed,
2405 &preferred_zone); 2450 &preferred_zone);
2406 if (!preferred_zone) { 2451 if (!preferred_zone)
2407 put_mems_allowed(); 2452 goto out;
2408 return NULL;
2409 }
2410 2453
2411 /* First allocation attempt */ 2454 /* First allocation attempt */
2412 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2455 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
@@ -2416,9 +2459,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2416 page = __alloc_pages_slowpath(gfp_mask, order, 2459 page = __alloc_pages_slowpath(gfp_mask, order,
2417 zonelist, high_zoneidx, nodemask, 2460 zonelist, high_zoneidx, nodemask,
2418 preferred_zone, migratetype); 2461 preferred_zone, migratetype);
2419 put_mems_allowed();
2420 2462
2421 trace_mm_page_alloc(page, order, gfp_mask, migratetype); 2463 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
2464
2465out:
2466 /*
2467 * When updating a task's mems_allowed, it is possible to race with
2468 * parallel threads in such a way that an allocation can fail while
2469 * the mask is being updated. If a page allocation is about to fail,
2470 * check if the cpuset changed during allocation and if so, retry.
2471 */
2472 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
2473 goto retry_cpuset;
2474
2422 return page; 2475 return page;
2423} 2476}
2424EXPORT_SYMBOL(__alloc_pages_nodemask); 2477EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2632,13 +2685,15 @@ void si_meminfo_node(struct sysinfo *val, int nid)
2632bool skip_free_areas_node(unsigned int flags, int nid) 2685bool skip_free_areas_node(unsigned int flags, int nid)
2633{ 2686{
2634 bool ret = false; 2687 bool ret = false;
2688 unsigned int cpuset_mems_cookie;
2635 2689
2636 if (!(flags & SHOW_MEM_FILTER_NODES)) 2690 if (!(flags & SHOW_MEM_FILTER_NODES))
2637 goto out; 2691 goto out;
2638 2692
2639 get_mems_allowed(); 2693 do {
2640 ret = !node_isset(nid, cpuset_current_mems_allowed); 2694 cpuset_mems_cookie = get_mems_allowed();
2641 put_mems_allowed(); 2695 ret = !node_isset(nid, cpuset_current_mems_allowed);
2696 } while (!put_mems_allowed(cpuset_mems_cookie));
2642out: 2697out:
2643 return ret; 2698 return ret;
2644} 2699}
@@ -3925,18 +3980,6 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
3925 } 3980 }
3926} 3981}
3927 3982
3928int __init add_from_early_node_map(struct range *range, int az,
3929 int nr_range, int nid)
3930{
3931 unsigned long start_pfn, end_pfn;
3932 int i;
3933
3934 /* need to go over early_node_map to find out good range for node */
3935 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL)
3936 nr_range = add_range(range, az, nr_range, start_pfn, end_pfn);
3937 return nr_range;
3938}
3939
3940/** 3983/**
3941 * sparse_memory_present_with_active_regions - Call memory_present for each active range 3984 * sparse_memory_present_with_active_regions - Call memory_present for each active range
3942 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. 3985 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -4521,7 +4564,7 @@ static unsigned long __init early_calculate_totalpages(void)
4521 * memory. When they don't, some nodes will have more kernelcore than 4564 * memory. When they don't, some nodes will have more kernelcore than
4522 * others 4565 * others
4523 */ 4566 */
4524static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) 4567static void __init find_zone_movable_pfns_for_nodes(void)
4525{ 4568{
4526 int i, nid; 4569 int i, nid;
4527 unsigned long usable_startpfn; 4570 unsigned long usable_startpfn;
@@ -4713,7 +4756,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4713 4756
4714 /* Find the PFNs that ZONE_MOVABLE begins at in each node */ 4757 /* Find the PFNs that ZONE_MOVABLE begins at in each node */
4715 memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); 4758 memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
4716 find_zone_movable_pfns_for_nodes(zone_movable_pfn); 4759 find_zone_movable_pfns_for_nodes();
4717 4760
4718 /* Print out the zone ranges */ 4761 /* Print out the zone ranges */
4719 printk("Zone PFN ranges:\n"); 4762 printk("Zone PFN ranges:\n");
@@ -4823,6 +4866,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
4823 int cpu = (unsigned long)hcpu; 4866 int cpu = (unsigned long)hcpu;
4824 4867
4825 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 4868 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
4869 lru_add_drain_cpu(cpu);
4826 drain_pages(cpu); 4870 drain_pages(cpu);
4827 4871
4828 /* 4872 /*