aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-04-18 15:52:50 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-04-18 15:52:50 -0400
commit681e4a5e13c1c8315694eb4f44e0cdd84c9082d2 (patch)
tree699f14527c118859026e8ce0214e689d0b9c88cb /mm/page_alloc.c
parentb960d6c43a63ebd2d8518b328da3816b833ee8cc (diff)
parentc104f1fa1ecf4ee0fc06e31b1f77630b2551be81 (diff)
Merge commit 'c104f1fa1ecf4ee0fc06e31b1f77630b2551be81' into stable/for-linus-3.4
* commit 'c104f1fa1ecf4ee0fc06e31b1f77630b2551be81': (14566 commits) cpufreq: OMAP: fix build errors: depends on ARCH_OMAP2PLUS sparc64: Eliminate obsolete __handle_softirq() function sparc64: Fix bootup crash on sun4v. kconfig: delete last traces of __enabled_ from autoconf.h Revert "kconfig: fix __enabled_ macros definition for invisible and un-selected symbols" kconfig: fix IS_ENABLED to not require all options to be defined irq_domain: fix type mismatch in debugfs output format staging: android: fix mem leaks in __persistent_ram_init() staging: vt6656: Don't leak memory in drivers/staging/vt6656/ioctl.c::private_ioctl() staging: iio: hmc5843: Fix crash in probe function. panic: fix stack dump print on direct call to panic() drivers/rtc/rtc-pl031.c: enable clock on all ST variants Revert "mm: vmscan: fix misused nr_reclaimed in shrink_mem_cgroup_zone()" hugetlb: fix race condition in hugetlb_fault() drivers/rtc/rtc-twl.c: use static register while reading time drivers/rtc/rtc-s3c.c: add placeholder for driver private data drivers/rtc/rtc-s3c.c: fix compilation error MAINTAINERS: add PCDP console maintainer memcg: do not open code accesses to res_counter members drivers/rtc/rtc-efi.c: fix section mismatch warning ...
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c174
1 files changed, 132 insertions, 42 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 794e6715c226..a712fb9e04ce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1161,11 +1161,47 @@ void drain_local_pages(void *arg)
1161} 1161}
1162 1162
1163/* 1163/*
1164 * Spill all the per-cpu pages from all CPUs back into the buddy allocator 1164 * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
1165 *
1166 * Note that this code is protected against sending an IPI to an offline
1167 * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
1168 * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
1169 * nothing keeps CPUs from showing up after we populated the cpumask and
1170 * before the call to on_each_cpu_mask().
1165 */ 1171 */
1166void drain_all_pages(void) 1172void drain_all_pages(void)
1167{ 1173{
1168 on_each_cpu(drain_local_pages, NULL, 1); 1174 int cpu;
1175 struct per_cpu_pageset *pcp;
1176 struct zone *zone;
1177
1178 /*
1179 * Allocate in the BSS so we wont require allocation in
1180 * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y
1181 */
1182 static cpumask_t cpus_with_pcps;
1183
1184 /*
1185 * We don't care about racing with CPU hotplug event
1186 * as offline notification will cause the notified
1187 * cpu to drain that CPU pcps and on_each_cpu_mask
1188 * disables preemption as part of its processing
1189 */
1190 for_each_online_cpu(cpu) {
1191 bool has_pcps = false;
1192 for_each_populated_zone(zone) {
1193 pcp = per_cpu_ptr(zone->pageset, cpu);
1194 if (pcp->pcp.count) {
1195 has_pcps = true;
1196 break;
1197 }
1198 }
1199 if (has_pcps)
1200 cpumask_set_cpu(cpu, &cpus_with_pcps);
1201 else
1202 cpumask_clear_cpu(cpu, &cpus_with_pcps);
1203 }
1204 on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
1169} 1205}
1170 1206
1171#ifdef CONFIG_HIBERNATION 1207#ifdef CONFIG_HIBERNATION
@@ -1968,7 +2004,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
1968 goto out; 2004 goto out;
1969 } 2005 }
1970 /* Exhausted what can be done so it's blamo time */ 2006 /* Exhausted what can be done so it's blamo time */
1971 out_of_memory(zonelist, gfp_mask, order, nodemask); 2007 out_of_memory(zonelist, gfp_mask, order, nodemask, false);
1972 2008
1973out: 2009out:
1974 clear_zonelist_oom(zonelist, gfp_mask); 2010 clear_zonelist_oom(zonelist, gfp_mask);
@@ -1981,14 +2017,20 @@ static struct page *
1981__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, 2017__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
1982 struct zonelist *zonelist, enum zone_type high_zoneidx, 2018 struct zonelist *zonelist, enum zone_type high_zoneidx,
1983 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, 2019 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
1984 int migratetype, unsigned long *did_some_progress, 2020 int migratetype, bool sync_migration,
1985 bool sync_migration) 2021 bool *deferred_compaction,
2022 unsigned long *did_some_progress)
1986{ 2023{
1987 struct page *page; 2024 struct page *page;
1988 2025
1989 if (!order || compaction_deferred(preferred_zone)) 2026 if (!order)
1990 return NULL; 2027 return NULL;
1991 2028
2029 if (compaction_deferred(preferred_zone, order)) {
2030 *deferred_compaction = true;
2031 return NULL;
2032 }
2033
1992 current->flags |= PF_MEMALLOC; 2034 current->flags |= PF_MEMALLOC;
1993 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, 2035 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
1994 nodemask, sync_migration); 2036 nodemask, sync_migration);
@@ -2006,6 +2048,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2006 if (page) { 2048 if (page) {
2007 preferred_zone->compact_considered = 0; 2049 preferred_zone->compact_considered = 0;
2008 preferred_zone->compact_defer_shift = 0; 2050 preferred_zone->compact_defer_shift = 0;
2051 if (order >= preferred_zone->compact_order_failed)
2052 preferred_zone->compact_order_failed = order + 1;
2009 count_vm_event(COMPACTSUCCESS); 2053 count_vm_event(COMPACTSUCCESS);
2010 return page; 2054 return page;
2011 } 2055 }
@@ -2016,7 +2060,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2016 * but not enough to satisfy watermarks. 2060 * but not enough to satisfy watermarks.
2017 */ 2061 */
2018 count_vm_event(COMPACTFAIL); 2062 count_vm_event(COMPACTFAIL);
2019 defer_compaction(preferred_zone); 2063
2064 /*
2065 * As async compaction considers a subset of pageblocks, only
2066 * defer if the failure was a sync compaction failure.
2067 */
2068 if (sync_migration)
2069 defer_compaction(preferred_zone, order);
2020 2070
2021 cond_resched(); 2071 cond_resched();
2022 } 2072 }
@@ -2028,8 +2078,9 @@ static inline struct page *
2028__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, 2078__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2029 struct zonelist *zonelist, enum zone_type high_zoneidx, 2079 struct zonelist *zonelist, enum zone_type high_zoneidx,
2030 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, 2080 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
2031 int migratetype, unsigned long *did_some_progress, 2081 int migratetype, bool sync_migration,
2032 bool sync_migration) 2082 bool *deferred_compaction,
2083 unsigned long *did_some_progress)
2033{ 2084{
2034 return NULL; 2085 return NULL;
2035} 2086}
@@ -2179,6 +2230,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2179 unsigned long pages_reclaimed = 0; 2230 unsigned long pages_reclaimed = 0;
2180 unsigned long did_some_progress; 2231 unsigned long did_some_progress;
2181 bool sync_migration = false; 2232 bool sync_migration = false;
2233 bool deferred_compaction = false;
2182 2234
2183 /* 2235 /*
2184 * In the slowpath, we sanity check order to avoid ever trying to 2236 * In the slowpath, we sanity check order to avoid ever trying to
@@ -2259,12 +2311,22 @@ rebalance:
2259 zonelist, high_zoneidx, 2311 zonelist, high_zoneidx,
2260 nodemask, 2312 nodemask,
2261 alloc_flags, preferred_zone, 2313 alloc_flags, preferred_zone,
2262 migratetype, &did_some_progress, 2314 migratetype, sync_migration,
2263 sync_migration); 2315 &deferred_compaction,
2316 &did_some_progress);
2264 if (page) 2317 if (page)
2265 goto got_pg; 2318 goto got_pg;
2266 sync_migration = true; 2319 sync_migration = true;
2267 2320
2321 /*
2322 * If compaction is deferred for high-order allocations, it is because
2323 * sync compaction recently failed. In this is the case and the caller
2324 * has requested the system not be heavily disrupted, fail the
2325 * allocation now instead of entering direct reclaim
2326 */
2327 if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
2328 goto nopage;
2329
2268 /* Try direct reclaim and then allocating */ 2330 /* Try direct reclaim and then allocating */
2269 page = __alloc_pages_direct_reclaim(gfp_mask, order, 2331 page = __alloc_pages_direct_reclaim(gfp_mask, order,
2270 zonelist, high_zoneidx, 2332 zonelist, high_zoneidx,
@@ -2282,6 +2344,10 @@ rebalance:
2282 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { 2344 if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
2283 if (oom_killer_disabled) 2345 if (oom_killer_disabled)
2284 goto nopage; 2346 goto nopage;
2347 /* Coredumps can quickly deplete all memory reserves */
2348 if ((current->flags & PF_DUMPCORE) &&
2349 !(gfp_mask & __GFP_NOFAIL))
2350 goto nopage;
2285 page = __alloc_pages_may_oom(gfp_mask, order, 2351 page = __alloc_pages_may_oom(gfp_mask, order,
2286 zonelist, high_zoneidx, 2352 zonelist, high_zoneidx,
2287 nodemask, preferred_zone, 2353 nodemask, preferred_zone,
@@ -2328,8 +2394,9 @@ rebalance:
2328 zonelist, high_zoneidx, 2394 zonelist, high_zoneidx,
2329 nodemask, 2395 nodemask,
2330 alloc_flags, preferred_zone, 2396 alloc_flags, preferred_zone,
2331 migratetype, &did_some_progress, 2397 migratetype, sync_migration,
2332 sync_migration); 2398 &deferred_compaction,
2399 &did_some_progress);
2333 if (page) 2400 if (page)
2334 goto got_pg; 2401 goto got_pg;
2335 } 2402 }
@@ -2353,8 +2420,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2353{ 2420{
2354 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 2421 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
2355 struct zone *preferred_zone; 2422 struct zone *preferred_zone;
2356 struct page *page; 2423 struct page *page = NULL;
2357 int migratetype = allocflags_to_migratetype(gfp_mask); 2424 int migratetype = allocflags_to_migratetype(gfp_mask);
2425 unsigned int cpuset_mems_cookie;
2358 2426
2359 gfp_mask &= gfp_allowed_mask; 2427 gfp_mask &= gfp_allowed_mask;
2360 2428
@@ -2373,15 +2441,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2373 if (unlikely(!zonelist->_zonerefs->zone)) 2441 if (unlikely(!zonelist->_zonerefs->zone))
2374 return NULL; 2442 return NULL;
2375 2443
2376 get_mems_allowed(); 2444retry_cpuset:
2445 cpuset_mems_cookie = get_mems_allowed();
2446
2377 /* The preferred zone is used for statistics later */ 2447 /* The preferred zone is used for statistics later */
2378 first_zones_zonelist(zonelist, high_zoneidx, 2448 first_zones_zonelist(zonelist, high_zoneidx,
2379 nodemask ? : &cpuset_current_mems_allowed, 2449 nodemask ? : &cpuset_current_mems_allowed,
2380 &preferred_zone); 2450 &preferred_zone);
2381 if (!preferred_zone) { 2451 if (!preferred_zone)
2382 put_mems_allowed(); 2452 goto out;
2383 return NULL;
2384 }
2385 2453
2386 /* First allocation attempt */ 2454 /* First allocation attempt */
2387 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2455 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
@@ -2391,9 +2459,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2391 page = __alloc_pages_slowpath(gfp_mask, order, 2459 page = __alloc_pages_slowpath(gfp_mask, order,
2392 zonelist, high_zoneidx, nodemask, 2460 zonelist, high_zoneidx, nodemask,
2393 preferred_zone, migratetype); 2461 preferred_zone, migratetype);
2394 put_mems_allowed();
2395 2462
2396 trace_mm_page_alloc(page, order, gfp_mask, migratetype); 2463 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
2464
2465out:
2466 /*
2467 * When updating a task's mems_allowed, it is possible to race with
2468 * parallel threads in such a way that an allocation can fail while
2469 * the mask is being updated. If a page allocation is about to fail,
2470 * check if the cpuset changed during allocation and if so, retry.
2471 */
2472 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
2473 goto retry_cpuset;
2474
2397 return page; 2475 return page;
2398} 2476}
2399EXPORT_SYMBOL(__alloc_pages_nodemask); 2477EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2607,13 +2685,15 @@ void si_meminfo_node(struct sysinfo *val, int nid)
2607bool skip_free_areas_node(unsigned int flags, int nid) 2685bool skip_free_areas_node(unsigned int flags, int nid)
2608{ 2686{
2609 bool ret = false; 2687 bool ret = false;
2688 unsigned int cpuset_mems_cookie;
2610 2689
2611 if (!(flags & SHOW_MEM_FILTER_NODES)) 2690 if (!(flags & SHOW_MEM_FILTER_NODES))
2612 goto out; 2691 goto out;
2613 2692
2614 get_mems_allowed(); 2693 do {
2615 ret = !node_isset(nid, cpuset_current_mems_allowed); 2694 cpuset_mems_cookie = get_mems_allowed();
2616 put_mems_allowed(); 2695 ret = !node_isset(nid, cpuset_current_mems_allowed);
2696 } while (!put_mems_allowed(cpuset_mems_cookie));
2617out: 2697out:
2618 return ret; 2698 return ret;
2619} 2699}
@@ -3900,18 +3980,6 @@ void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
3900 } 3980 }
3901} 3981}
3902 3982
3903int __init add_from_early_node_map(struct range *range, int az,
3904 int nr_range, int nid)
3905{
3906 unsigned long start_pfn, end_pfn;
3907 int i;
3908
3909 /* need to go over early_node_map to find out good range for node */
3910 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL)
3911 nr_range = add_range(range, az, nr_range, start_pfn, end_pfn);
3912 return nr_range;
3913}
3914
3915/** 3983/**
3916 * sparse_memory_present_with_active_regions - Call memory_present for each active range 3984 * sparse_memory_present_with_active_regions - Call memory_present for each active range
3917 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used. 3985 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -4237,7 +4305,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4237 for (j = 0; j < MAX_NR_ZONES; j++) { 4305 for (j = 0; j < MAX_NR_ZONES; j++) {
4238 struct zone *zone = pgdat->node_zones + j; 4306 struct zone *zone = pgdat->node_zones + j;
4239 unsigned long size, realsize, memmap_pages; 4307 unsigned long size, realsize, memmap_pages;
4240 enum lru_list l; 4308 enum lru_list lru;
4241 4309
4242 size = zone_spanned_pages_in_node(nid, j, zones_size); 4310 size = zone_spanned_pages_in_node(nid, j, zones_size);
4243 realsize = size - zone_absent_pages_in_node(nid, j, 4311 realsize = size - zone_absent_pages_in_node(nid, j,
@@ -4287,8 +4355,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4287 zone->zone_pgdat = pgdat; 4355 zone->zone_pgdat = pgdat;
4288 4356
4289 zone_pcp_init(zone); 4357 zone_pcp_init(zone);
4290 for_each_lru(l) 4358 for_each_lru(lru)
4291 INIT_LIST_HEAD(&zone->lru[l].list); 4359 INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
4292 zone->reclaim_stat.recent_rotated[0] = 0; 4360 zone->reclaim_stat.recent_rotated[0] = 0;
4293 zone->reclaim_stat.recent_rotated[1] = 0; 4361 zone->reclaim_stat.recent_rotated[1] = 0;
4294 zone->reclaim_stat.recent_scanned[0] = 0; 4362 zone->reclaim_stat.recent_scanned[0] = 0;
@@ -4496,7 +4564,7 @@ static unsigned long __init early_calculate_totalpages(void)
4496 * memory. When they don't, some nodes will have more kernelcore than 4564 * memory. When they don't, some nodes will have more kernelcore than
4497 * others 4565 * others
4498 */ 4566 */
4499static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) 4567static void __init find_zone_movable_pfns_for_nodes(void)
4500{ 4568{
4501 int i, nid; 4569 int i, nid;
4502 unsigned long usable_startpfn; 4570 unsigned long usable_startpfn;
@@ -4642,8 +4710,10 @@ static void check_for_regular_memory(pg_data_t *pgdat)
4642 4710
4643 for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { 4711 for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) {
4644 struct zone *zone = &pgdat->node_zones[zone_type]; 4712 struct zone *zone = &pgdat->node_zones[zone_type];
4645 if (zone->present_pages) 4713 if (zone->present_pages) {
4646 node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); 4714 node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
4715 break;
4716 }
4647 } 4717 }
4648#endif 4718#endif
4649} 4719}
@@ -4686,7 +4756,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4686 4756
4687 /* Find the PFNs that ZONE_MOVABLE begins at in each node */ 4757 /* Find the PFNs that ZONE_MOVABLE begins at in each node */
4688 memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); 4758 memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
4689 find_zone_movable_pfns_for_nodes(zone_movable_pfn); 4759 find_zone_movable_pfns_for_nodes();
4690 4760
4691 /* Print out the zone ranges */ 4761 /* Print out the zone ranges */
4692 printk("Zone PFN ranges:\n"); 4762 printk("Zone PFN ranges:\n");
@@ -4796,6 +4866,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
4796 int cpu = (unsigned long)hcpu; 4866 int cpu = (unsigned long)hcpu;
4797 4867
4798 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 4868 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
4869 lru_add_drain_cpu(cpu);
4799 drain_pages(cpu); 4870 drain_pages(cpu);
4800 4871
4801 /* 4872 /*
@@ -5209,6 +5280,7 @@ void *__init alloc_large_system_hash(const char *tablename,
5209 max = ((unsigned long long)nr_all_pages << PAGE_SHIFT) >> 4; 5280 max = ((unsigned long long)nr_all_pages << PAGE_SHIFT) >> 4;
5210 do_div(max, bucketsize); 5281 do_div(max, bucketsize);
5211 } 5282 }
5283 max = min(max, 0x80000000ULL);
5212 5284
5213 if (numentries > max) 5285 if (numentries > max)
5214 numentries = max; 5286 numentries = max;
@@ -5386,7 +5458,25 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count)
5386 5458
5387bool is_pageblock_removable_nolock(struct page *page) 5459bool is_pageblock_removable_nolock(struct page *page)
5388{ 5460{
5389 struct zone *zone = page_zone(page); 5461 struct zone *zone;
5462 unsigned long pfn;
5463
5464 /*
5465 * We have to be careful here because we are iterating over memory
5466 * sections which are not zone aware so we might end up outside of
5467 * the zone but still within the section.
5468 * We have to take care about the node as well. If the node is offline
5469 * its NODE_DATA will be NULL - see page_zone.
5470 */
5471 if (!node_online(page_to_nid(page)))
5472 return false;
5473
5474 zone = page_zone(page);
5475 pfn = page_to_pfn(page);
5476 if (zone->zone_start_pfn > pfn ||
5477 zone->zone_start_pfn + zone->spanned_pages <= pfn)
5478 return false;
5479
5390 return __count_immobile_pages(zone, page, 0); 5480 return __count_immobile_pages(zone, page, 0);
5391} 5481}
5392 5482