aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c137
1 files changed, 80 insertions, 57 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 616a2c956b4b..a7198c065999 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,7 +48,6 @@
48#include <linux/backing-dev.h> 48#include <linux/backing-dev.h>
49#include <linux/fault-inject.h> 49#include <linux/fault-inject.h>
50#include <linux/page-isolation.h> 50#include <linux/page-isolation.h>
51#include <linux/page_cgroup.h>
52#include <linux/debugobjects.h> 51#include <linux/debugobjects.h>
53#include <linux/kmemleak.h> 52#include <linux/kmemleak.h>
54#include <linux/compaction.h> 53#include <linux/compaction.h>
@@ -641,8 +640,10 @@ static inline int free_pages_check(struct page *page)
641 bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set"; 640 bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
642 bad_flags = PAGE_FLAGS_CHECK_AT_FREE; 641 bad_flags = PAGE_FLAGS_CHECK_AT_FREE;
643 } 642 }
644 if (unlikely(mem_cgroup_bad_page_check(page))) 643#ifdef CONFIG_MEMCG
645 bad_reason = "cgroup check failed"; 644 if (unlikely(page->mem_cgroup))
645 bad_reason = "page still charged to cgroup";
646#endif
646 if (unlikely(bad_reason)) { 647 if (unlikely(bad_reason)) {
647 bad_page(page, bad_reason, bad_flags); 648 bad_page(page, bad_reason, bad_flags);
648 return 1; 649 return 1;
@@ -741,6 +742,9 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
741 int i; 742 int i;
742 int bad = 0; 743 int bad = 0;
743 744
745 VM_BUG_ON_PAGE(PageTail(page), page);
746 VM_BUG_ON_PAGE(PageHead(page) && compound_order(page) != order, page);
747
744 trace_mm_page_free(page, order); 748 trace_mm_page_free(page, order);
745 kmemcheck_free_shadow(page, order); 749 kmemcheck_free_shadow(page, order);
746 750
@@ -898,8 +902,10 @@ static inline int check_new_page(struct page *page)
898 bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; 902 bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
899 bad_flags = PAGE_FLAGS_CHECK_AT_PREP; 903 bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
900 } 904 }
901 if (unlikely(mem_cgroup_bad_page_check(page))) 905#ifdef CONFIG_MEMCG
902 bad_reason = "cgroup check failed"; 906 if (unlikely(page->mem_cgroup))
907 bad_reason = "page still charged to cgroup";
908#endif
903 if (unlikely(bad_reason)) { 909 if (unlikely(bad_reason)) {
904 bad_page(page, bad_reason, bad_flags); 910 bad_page(page, bad_reason, bad_flags);
905 return 1; 911 return 1;
@@ -1267,55 +1273,75 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
1267#endif 1273#endif
1268 1274
1269/* 1275/*
1270 * Drain pages of the indicated processor. 1276 * Drain pcplists of the indicated processor and zone.
1271 * 1277 *
1272 * The processor must either be the current processor and the 1278 * The processor must either be the current processor and the
1273 * thread pinned to the current processor or a processor that 1279 * thread pinned to the current processor or a processor that
1274 * is not online. 1280 * is not online.
1275 */ 1281 */
1276static void drain_pages(unsigned int cpu) 1282static void drain_pages_zone(unsigned int cpu, struct zone *zone)
1277{ 1283{
1278 unsigned long flags; 1284 unsigned long flags;
1279 struct zone *zone; 1285 struct per_cpu_pageset *pset;
1286 struct per_cpu_pages *pcp;
1280 1287
1281 for_each_populated_zone(zone) { 1288 local_irq_save(flags);
1282 struct per_cpu_pageset *pset; 1289 pset = per_cpu_ptr(zone->pageset, cpu);
1283 struct per_cpu_pages *pcp;
1284 1290
1285 local_irq_save(flags); 1291 pcp = &pset->pcp;
1286 pset = per_cpu_ptr(zone->pageset, cpu); 1292 if (pcp->count) {
1293 free_pcppages_bulk(zone, pcp->count, pcp);
1294 pcp->count = 0;
1295 }
1296 local_irq_restore(flags);
1297}
1287 1298
1288 pcp = &pset->pcp; 1299/*
1289 if (pcp->count) { 1300 * Drain pcplists of all zones on the indicated processor.
1290 free_pcppages_bulk(zone, pcp->count, pcp); 1301 *
1291 pcp->count = 0; 1302 * The processor must either be the current processor and the
1292 } 1303 * thread pinned to the current processor or a processor that
1293 local_irq_restore(flags); 1304 * is not online.
1305 */
1306static void drain_pages(unsigned int cpu)
1307{
1308 struct zone *zone;
1309
1310 for_each_populated_zone(zone) {
1311 drain_pages_zone(cpu, zone);
1294 } 1312 }
1295} 1313}
1296 1314
1297/* 1315/*
1298 * Spill all of this CPU's per-cpu pages back into the buddy allocator. 1316 * Spill all of this CPU's per-cpu pages back into the buddy allocator.
1317 *
1318 * The CPU has to be pinned. When zone parameter is non-NULL, spill just
1319 * the single zone's pages.
1299 */ 1320 */
1300void drain_local_pages(void *arg) 1321void drain_local_pages(struct zone *zone)
1301{ 1322{
1302 drain_pages(smp_processor_id()); 1323 int cpu = smp_processor_id();
1324
1325 if (zone)
1326 drain_pages_zone(cpu, zone);
1327 else
1328 drain_pages(cpu);
1303} 1329}
1304 1330
1305/* 1331/*
1306 * Spill all the per-cpu pages from all CPUs back into the buddy allocator. 1332 * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
1307 * 1333 *
1334 * When zone parameter is non-NULL, spill just the single zone's pages.
1335 *
1308 * Note that this code is protected against sending an IPI to an offline 1336 * Note that this code is protected against sending an IPI to an offline
1309 * CPU but does not guarantee sending an IPI to newly hotplugged CPUs: 1337 * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
1310 * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but 1338 * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
1311 * nothing keeps CPUs from showing up after we populated the cpumask and 1339 * nothing keeps CPUs from showing up after we populated the cpumask and
1312 * before the call to on_each_cpu_mask(). 1340 * before the call to on_each_cpu_mask().
1313 */ 1341 */
1314void drain_all_pages(void) 1342void drain_all_pages(struct zone *zone)
1315{ 1343{
1316 int cpu; 1344 int cpu;
1317 struct per_cpu_pageset *pcp;
1318 struct zone *zone;
1319 1345
1320 /* 1346 /*
1321 * Allocate in the BSS so we wont require allocation in 1347 * Allocate in the BSS so we wont require allocation in
@@ -1330,20 +1356,31 @@ void drain_all_pages(void)
1330 * disables preemption as part of its processing 1356 * disables preemption as part of its processing
1331 */ 1357 */
1332 for_each_online_cpu(cpu) { 1358 for_each_online_cpu(cpu) {
1359 struct per_cpu_pageset *pcp;
1360 struct zone *z;
1333 bool has_pcps = false; 1361 bool has_pcps = false;
1334 for_each_populated_zone(zone) { 1362
1363 if (zone) {
1335 pcp = per_cpu_ptr(zone->pageset, cpu); 1364 pcp = per_cpu_ptr(zone->pageset, cpu);
1336 if (pcp->pcp.count) { 1365 if (pcp->pcp.count)
1337 has_pcps = true; 1366 has_pcps = true;
1338 break; 1367 } else {
1368 for_each_populated_zone(z) {
1369 pcp = per_cpu_ptr(z->pageset, cpu);
1370 if (pcp->pcp.count) {
1371 has_pcps = true;
1372 break;
1373 }
1339 } 1374 }
1340 } 1375 }
1376
1341 if (has_pcps) 1377 if (has_pcps)
1342 cpumask_set_cpu(cpu, &cpus_with_pcps); 1378 cpumask_set_cpu(cpu, &cpus_with_pcps);
1343 else 1379 else
1344 cpumask_clear_cpu(cpu, &cpus_with_pcps); 1380 cpumask_clear_cpu(cpu, &cpus_with_pcps);
1345 } 1381 }
1346 on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1); 1382 on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
1383 zone, 1);
1347} 1384}
1348 1385
1349#ifdef CONFIG_HIBERNATION 1386#ifdef CONFIG_HIBERNATION
@@ -1705,7 +1742,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
1705 unsigned long mark, int classzone_idx, int alloc_flags, 1742 unsigned long mark, int classzone_idx, int alloc_flags,
1706 long free_pages) 1743 long free_pages)
1707{ 1744{
1708 /* free_pages my go negative - that's OK */ 1745 /* free_pages may go negative - that's OK */
1709 long min = mark; 1746 long min = mark;
1710 int o; 1747 int o;
1711 long free_cma = 0; 1748 long free_cma = 0;
@@ -2296,7 +2333,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2296 int classzone_idx, int migratetype, enum migrate_mode mode, 2333 int classzone_idx, int migratetype, enum migrate_mode mode,
2297 int *contended_compaction, bool *deferred_compaction) 2334 int *contended_compaction, bool *deferred_compaction)
2298{ 2335{
2299 struct zone *last_compact_zone = NULL;
2300 unsigned long compact_result; 2336 unsigned long compact_result;
2301 struct page *page; 2337 struct page *page;
2302 2338
@@ -2307,7 +2343,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2307 compact_result = try_to_compact_pages(zonelist, order, gfp_mask, 2343 compact_result = try_to_compact_pages(zonelist, order, gfp_mask,
2308 nodemask, mode, 2344 nodemask, mode,
2309 contended_compaction, 2345 contended_compaction,
2310 &last_compact_zone); 2346 alloc_flags, classzone_idx);
2311 current->flags &= ~PF_MEMALLOC; 2347 current->flags &= ~PF_MEMALLOC;
2312 2348
2313 switch (compact_result) { 2349 switch (compact_result) {
@@ -2326,10 +2362,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2326 */ 2362 */
2327 count_vm_event(COMPACTSTALL); 2363 count_vm_event(COMPACTSTALL);
2328 2364
2329 /* Page migration frees to the PCP lists but we want merging */
2330 drain_pages(get_cpu());
2331 put_cpu();
2332
2333 page = get_page_from_freelist(gfp_mask, nodemask, 2365 page = get_page_from_freelist(gfp_mask, nodemask,
2334 order, zonelist, high_zoneidx, 2366 order, zonelist, high_zoneidx,
2335 alloc_flags & ~ALLOC_NO_WATERMARKS, 2367 alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2345,14 +2377,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2345 } 2377 }
2346 2378
2347 /* 2379 /*
2348 * last_compact_zone is where try_to_compact_pages thought allocation
2349 * should succeed, so it did not defer compaction. But here we know
2350 * that it didn't succeed, so we do the defer.
2351 */
2352 if (last_compact_zone && mode != MIGRATE_ASYNC)
2353 defer_compaction(last_compact_zone, order);
2354
2355 /*
2356 * It's bad if compaction run occurs and fails. The most likely reason 2380 * It's bad if compaction run occurs and fails. The most likely reason
2357 * is that pages exist, but not enough to satisfy watermarks. 2381 * is that pages exist, but not enough to satisfy watermarks.
2358 */ 2382 */
@@ -2433,7 +2457,7 @@ retry:
2433 * pages are pinned on the per-cpu lists. Drain them and try again 2457 * pages are pinned on the per-cpu lists. Drain them and try again
2434 */ 2458 */
2435 if (!page && !drained) { 2459 if (!page && !drained) {
2436 drain_all_pages(); 2460 drain_all_pages(NULL);
2437 drained = true; 2461 drained = true;
2438 goto retry; 2462 goto retry;
2439 } 2463 }
@@ -3893,14 +3917,14 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
3893 else 3917 else
3894 page_group_by_mobility_disabled = 0; 3918 page_group_by_mobility_disabled = 0;
3895 3919
3896 printk("Built %i zonelists in %s order, mobility grouping %s. " 3920 pr_info("Built %i zonelists in %s order, mobility grouping %s. "
3897 "Total pages: %ld\n", 3921 "Total pages: %ld\n",
3898 nr_online_nodes, 3922 nr_online_nodes,
3899 zonelist_order_name[current_zonelist_order], 3923 zonelist_order_name[current_zonelist_order],
3900 page_group_by_mobility_disabled ? "off" : "on", 3924 page_group_by_mobility_disabled ? "off" : "on",
3901 vm_total_pages); 3925 vm_total_pages);
3902#ifdef CONFIG_NUMA 3926#ifdef CONFIG_NUMA
3903 printk("Policy zone: %s\n", zone_names[policy_zone]); 3927 pr_info("Policy zone: %s\n", zone_names[policy_zone]);
3904#endif 3928#endif
3905} 3929}
3906 3930
@@ -4832,7 +4856,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4832#endif 4856#endif
4833 init_waitqueue_head(&pgdat->kswapd_wait); 4857 init_waitqueue_head(&pgdat->kswapd_wait);
4834 init_waitqueue_head(&pgdat->pfmemalloc_wait); 4858 init_waitqueue_head(&pgdat->pfmemalloc_wait);
4835 pgdat_page_cgroup_init(pgdat);
4836 4859
4837 for (j = 0; j < MAX_NR_ZONES; j++) { 4860 for (j = 0; j < MAX_NR_ZONES; j++) {
4838 struct zone *zone = pgdat->node_zones + j; 4861 struct zone *zone = pgdat->node_zones + j;
@@ -5334,33 +5357,33 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
5334 find_zone_movable_pfns_for_nodes(); 5357 find_zone_movable_pfns_for_nodes();
5335 5358
5336 /* Print out the zone ranges */ 5359 /* Print out the zone ranges */
5337 printk("Zone ranges:\n"); 5360 pr_info("Zone ranges:\n");
5338 for (i = 0; i < MAX_NR_ZONES; i++) { 5361 for (i = 0; i < MAX_NR_ZONES; i++) {
5339 if (i == ZONE_MOVABLE) 5362 if (i == ZONE_MOVABLE)
5340 continue; 5363 continue;
5341 printk(KERN_CONT " %-8s ", zone_names[i]); 5364 pr_info(" %-8s ", zone_names[i]);
5342 if (arch_zone_lowest_possible_pfn[i] == 5365 if (arch_zone_lowest_possible_pfn[i] ==
5343 arch_zone_highest_possible_pfn[i]) 5366 arch_zone_highest_possible_pfn[i])
5344 printk(KERN_CONT "empty\n"); 5367 pr_cont("empty\n");
5345 else 5368 else
5346 printk(KERN_CONT "[mem %0#10lx-%0#10lx]\n", 5369 pr_cont("[mem %0#10lx-%0#10lx]\n",
5347 arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT, 5370 arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT,
5348 (arch_zone_highest_possible_pfn[i] 5371 (arch_zone_highest_possible_pfn[i]
5349 << PAGE_SHIFT) - 1); 5372 << PAGE_SHIFT) - 1);
5350 } 5373 }
5351 5374
5352 /* Print out the PFNs ZONE_MOVABLE begins at in each node */ 5375 /* Print out the PFNs ZONE_MOVABLE begins at in each node */
5353 printk("Movable zone start for each node\n"); 5376 pr_info("Movable zone start for each node\n");
5354 for (i = 0; i < MAX_NUMNODES; i++) { 5377 for (i = 0; i < MAX_NUMNODES; i++) {
5355 if (zone_movable_pfn[i]) 5378 if (zone_movable_pfn[i])
5356 printk(" Node %d: %#010lx\n", i, 5379 pr_info(" Node %d: %#010lx\n", i,
5357 zone_movable_pfn[i] << PAGE_SHIFT); 5380 zone_movable_pfn[i] << PAGE_SHIFT);
5358 } 5381 }
5359 5382
5360 /* Print out the early node map */ 5383 /* Print out the early node map */
5361 printk("Early memory node ranges\n"); 5384 pr_info("Early memory node ranges\n");
5362 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) 5385 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
5363 printk(" node %3d: [mem %#010lx-%#010lx]\n", nid, 5386 pr_info(" node %3d: [mem %#010lx-%#010lx]\n", nid,
5364 start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); 5387 start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
5365 5388
5366 /* Initialise every node */ 5389 /* Initialise every node */
@@ -5496,7 +5519,7 @@ void __init mem_init_print_info(const char *str)
5496 5519
5497#undef adj_init_size 5520#undef adj_init_size
5498 5521
5499 printk("Memory: %luK/%luK available " 5522 pr_info("Memory: %luK/%luK available "
5500 "(%luK kernel code, %luK rwdata, %luK rodata, " 5523 "(%luK kernel code, %luK rwdata, %luK rodata, "
5501 "%luK init, %luK bss, %luK reserved" 5524 "%luK init, %luK bss, %luK reserved"
5502#ifdef CONFIG_HIGHMEM 5525#ifdef CONFIG_HIGHMEM
@@ -6385,7 +6408,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
6385 */ 6408 */
6386 6409
6387 lru_add_drain_all(); 6410 lru_add_drain_all();
6388 drain_all_pages(); 6411 drain_all_pages(cc.zone);
6389 6412
6390 order = 0; 6413 order = 0;
6391 outer_start = start; 6414 outer_start = start;