diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 137 |
1 files changed, 80 insertions, 57 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 616a2c956b4b..a7198c065999 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -48,7 +48,6 @@ | |||
48 | #include <linux/backing-dev.h> | 48 | #include <linux/backing-dev.h> |
49 | #include <linux/fault-inject.h> | 49 | #include <linux/fault-inject.h> |
50 | #include <linux/page-isolation.h> | 50 | #include <linux/page-isolation.h> |
51 | #include <linux/page_cgroup.h> | ||
52 | #include <linux/debugobjects.h> | 51 | #include <linux/debugobjects.h> |
53 | #include <linux/kmemleak.h> | 52 | #include <linux/kmemleak.h> |
54 | #include <linux/compaction.h> | 53 | #include <linux/compaction.h> |
@@ -641,8 +640,10 @@ static inline int free_pages_check(struct page *page) | |||
641 | bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set"; | 640 | bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set"; |
642 | bad_flags = PAGE_FLAGS_CHECK_AT_FREE; | 641 | bad_flags = PAGE_FLAGS_CHECK_AT_FREE; |
643 | } | 642 | } |
644 | if (unlikely(mem_cgroup_bad_page_check(page))) | 643 | #ifdef CONFIG_MEMCG |
645 | bad_reason = "cgroup check failed"; | 644 | if (unlikely(page->mem_cgroup)) |
645 | bad_reason = "page still charged to cgroup"; | ||
646 | #endif | ||
646 | if (unlikely(bad_reason)) { | 647 | if (unlikely(bad_reason)) { |
647 | bad_page(page, bad_reason, bad_flags); | 648 | bad_page(page, bad_reason, bad_flags); |
648 | return 1; | 649 | return 1; |
@@ -741,6 +742,9 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
741 | int i; | 742 | int i; |
742 | int bad = 0; | 743 | int bad = 0; |
743 | 744 | ||
745 | VM_BUG_ON_PAGE(PageTail(page), page); | ||
746 | VM_BUG_ON_PAGE(PageHead(page) && compound_order(page) != order, page); | ||
747 | |||
744 | trace_mm_page_free(page, order); | 748 | trace_mm_page_free(page, order); |
745 | kmemcheck_free_shadow(page, order); | 749 | kmemcheck_free_shadow(page, order); |
746 | 750 | ||
@@ -898,8 +902,10 @@ static inline int check_new_page(struct page *page) | |||
898 | bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; | 902 | bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; |
899 | bad_flags = PAGE_FLAGS_CHECK_AT_PREP; | 903 | bad_flags = PAGE_FLAGS_CHECK_AT_PREP; |
900 | } | 904 | } |
901 | if (unlikely(mem_cgroup_bad_page_check(page))) | 905 | #ifdef CONFIG_MEMCG |
902 | bad_reason = "cgroup check failed"; | 906 | if (unlikely(page->mem_cgroup)) |
907 | bad_reason = "page still charged to cgroup"; | ||
908 | #endif | ||
903 | if (unlikely(bad_reason)) { | 909 | if (unlikely(bad_reason)) { |
904 | bad_page(page, bad_reason, bad_flags); | 910 | bad_page(page, bad_reason, bad_flags); |
905 | return 1; | 911 | return 1; |
@@ -1267,55 +1273,75 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |||
1267 | #endif | 1273 | #endif |
1268 | 1274 | ||
1269 | /* | 1275 | /* |
1270 | * Drain pages of the indicated processor. | 1276 | * Drain pcplists of the indicated processor and zone. |
1271 | * | 1277 | * |
1272 | * The processor must either be the current processor and the | 1278 | * The processor must either be the current processor and the |
1273 | * thread pinned to the current processor or a processor that | 1279 | * thread pinned to the current processor or a processor that |
1274 | * is not online. | 1280 | * is not online. |
1275 | */ | 1281 | */ |
1276 | static void drain_pages(unsigned int cpu) | 1282 | static void drain_pages_zone(unsigned int cpu, struct zone *zone) |
1277 | { | 1283 | { |
1278 | unsigned long flags; | 1284 | unsigned long flags; |
1279 | struct zone *zone; | 1285 | struct per_cpu_pageset *pset; |
1286 | struct per_cpu_pages *pcp; | ||
1280 | 1287 | ||
1281 | for_each_populated_zone(zone) { | 1288 | local_irq_save(flags); |
1282 | struct per_cpu_pageset *pset; | 1289 | pset = per_cpu_ptr(zone->pageset, cpu); |
1283 | struct per_cpu_pages *pcp; | ||
1284 | 1290 | ||
1285 | local_irq_save(flags); | 1291 | pcp = &pset->pcp; |
1286 | pset = per_cpu_ptr(zone->pageset, cpu); | 1292 | if (pcp->count) { |
1293 | free_pcppages_bulk(zone, pcp->count, pcp); | ||
1294 | pcp->count = 0; | ||
1295 | } | ||
1296 | local_irq_restore(flags); | ||
1297 | } | ||
1287 | 1298 | ||
1288 | pcp = &pset->pcp; | 1299 | /* |
1289 | if (pcp->count) { | 1300 | * Drain pcplists of all zones on the indicated processor. |
1290 | free_pcppages_bulk(zone, pcp->count, pcp); | 1301 | * |
1291 | pcp->count = 0; | 1302 | * The processor must either be the current processor and the |
1292 | } | 1303 | * thread pinned to the current processor or a processor that |
1293 | local_irq_restore(flags); | 1304 | * is not online. |
1305 | */ | ||
1306 | static void drain_pages(unsigned int cpu) | ||
1307 | { | ||
1308 | struct zone *zone; | ||
1309 | |||
1310 | for_each_populated_zone(zone) { | ||
1311 | drain_pages_zone(cpu, zone); | ||
1294 | } | 1312 | } |
1295 | } | 1313 | } |
1296 | 1314 | ||
1297 | /* | 1315 | /* |
1298 | * Spill all of this CPU's per-cpu pages back into the buddy allocator. | 1316 | * Spill all of this CPU's per-cpu pages back into the buddy allocator. |
1317 | * | ||
1318 | * The CPU has to be pinned. When zone parameter is non-NULL, spill just | ||
1319 | * the single zone's pages. | ||
1299 | */ | 1320 | */ |
1300 | void drain_local_pages(void *arg) | 1321 | void drain_local_pages(struct zone *zone) |
1301 | { | 1322 | { |
1302 | drain_pages(smp_processor_id()); | 1323 | int cpu = smp_processor_id(); |
1324 | |||
1325 | if (zone) | ||
1326 | drain_pages_zone(cpu, zone); | ||
1327 | else | ||
1328 | drain_pages(cpu); | ||
1303 | } | 1329 | } |
1304 | 1330 | ||
1305 | /* | 1331 | /* |
1306 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator. | 1332 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator. |
1307 | * | 1333 | * |
1334 | * When zone parameter is non-NULL, spill just the single zone's pages. | ||
1335 | * | ||
1308 | * Note that this code is protected against sending an IPI to an offline | 1336 | * Note that this code is protected against sending an IPI to an offline |
1309 | * CPU but does not guarantee sending an IPI to newly hotplugged CPUs: | 1337 | * CPU but does not guarantee sending an IPI to newly hotplugged CPUs: |
1310 | * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but | 1338 | * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but |
1311 | * nothing keeps CPUs from showing up after we populated the cpumask and | 1339 | * nothing keeps CPUs from showing up after we populated the cpumask and |
1312 | * before the call to on_each_cpu_mask(). | 1340 | * before the call to on_each_cpu_mask(). |
1313 | */ | 1341 | */ |
1314 | void drain_all_pages(void) | 1342 | void drain_all_pages(struct zone *zone) |
1315 | { | 1343 | { |
1316 | int cpu; | 1344 | int cpu; |
1317 | struct per_cpu_pageset *pcp; | ||
1318 | struct zone *zone; | ||
1319 | 1345 | ||
1320 | /* | 1346 | /* |
1321 | * Allocate in the BSS so we wont require allocation in | 1347 | * Allocate in the BSS so we wont require allocation in |
@@ -1330,20 +1356,31 @@ void drain_all_pages(void) | |||
1330 | * disables preemption as part of its processing | 1356 | * disables preemption as part of its processing |
1331 | */ | 1357 | */ |
1332 | for_each_online_cpu(cpu) { | 1358 | for_each_online_cpu(cpu) { |
1359 | struct per_cpu_pageset *pcp; | ||
1360 | struct zone *z; | ||
1333 | bool has_pcps = false; | 1361 | bool has_pcps = false; |
1334 | for_each_populated_zone(zone) { | 1362 | |
1363 | if (zone) { | ||
1335 | pcp = per_cpu_ptr(zone->pageset, cpu); | 1364 | pcp = per_cpu_ptr(zone->pageset, cpu); |
1336 | if (pcp->pcp.count) { | 1365 | if (pcp->pcp.count) |
1337 | has_pcps = true; | 1366 | has_pcps = true; |
1338 | break; | 1367 | } else { |
1368 | for_each_populated_zone(z) { | ||
1369 | pcp = per_cpu_ptr(z->pageset, cpu); | ||
1370 | if (pcp->pcp.count) { | ||
1371 | has_pcps = true; | ||
1372 | break; | ||
1373 | } | ||
1339 | } | 1374 | } |
1340 | } | 1375 | } |
1376 | |||
1341 | if (has_pcps) | 1377 | if (has_pcps) |
1342 | cpumask_set_cpu(cpu, &cpus_with_pcps); | 1378 | cpumask_set_cpu(cpu, &cpus_with_pcps); |
1343 | else | 1379 | else |
1344 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | 1380 | cpumask_clear_cpu(cpu, &cpus_with_pcps); |
1345 | } | 1381 | } |
1346 | on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1); | 1382 | on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages, |
1383 | zone, 1); | ||
1347 | } | 1384 | } |
1348 | 1385 | ||
1349 | #ifdef CONFIG_HIBERNATION | 1386 | #ifdef CONFIG_HIBERNATION |
@@ -1705,7 +1742,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order, | |||
1705 | unsigned long mark, int classzone_idx, int alloc_flags, | 1742 | unsigned long mark, int classzone_idx, int alloc_flags, |
1706 | long free_pages) | 1743 | long free_pages) |
1707 | { | 1744 | { |
1708 | /* free_pages my go negative - that's OK */ | 1745 | /* free_pages may go negative - that's OK */ |
1709 | long min = mark; | 1746 | long min = mark; |
1710 | int o; | 1747 | int o; |
1711 | long free_cma = 0; | 1748 | long free_cma = 0; |
@@ -2296,7 +2333,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2296 | int classzone_idx, int migratetype, enum migrate_mode mode, | 2333 | int classzone_idx, int migratetype, enum migrate_mode mode, |
2297 | int *contended_compaction, bool *deferred_compaction) | 2334 | int *contended_compaction, bool *deferred_compaction) |
2298 | { | 2335 | { |
2299 | struct zone *last_compact_zone = NULL; | ||
2300 | unsigned long compact_result; | 2336 | unsigned long compact_result; |
2301 | struct page *page; | 2337 | struct page *page; |
2302 | 2338 | ||
@@ -2307,7 +2343,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2307 | compact_result = try_to_compact_pages(zonelist, order, gfp_mask, | 2343 | compact_result = try_to_compact_pages(zonelist, order, gfp_mask, |
2308 | nodemask, mode, | 2344 | nodemask, mode, |
2309 | contended_compaction, | 2345 | contended_compaction, |
2310 | &last_compact_zone); | 2346 | alloc_flags, classzone_idx); |
2311 | current->flags &= ~PF_MEMALLOC; | 2347 | current->flags &= ~PF_MEMALLOC; |
2312 | 2348 | ||
2313 | switch (compact_result) { | 2349 | switch (compact_result) { |
@@ -2326,10 +2362,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2326 | */ | 2362 | */ |
2327 | count_vm_event(COMPACTSTALL); | 2363 | count_vm_event(COMPACTSTALL); |
2328 | 2364 | ||
2329 | /* Page migration frees to the PCP lists but we want merging */ | ||
2330 | drain_pages(get_cpu()); | ||
2331 | put_cpu(); | ||
2332 | |||
2333 | page = get_page_from_freelist(gfp_mask, nodemask, | 2365 | page = get_page_from_freelist(gfp_mask, nodemask, |
2334 | order, zonelist, high_zoneidx, | 2366 | order, zonelist, high_zoneidx, |
2335 | alloc_flags & ~ALLOC_NO_WATERMARKS, | 2367 | alloc_flags & ~ALLOC_NO_WATERMARKS, |
@@ -2345,14 +2377,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2345 | } | 2377 | } |
2346 | 2378 | ||
2347 | /* | 2379 | /* |
2348 | * last_compact_zone is where try_to_compact_pages thought allocation | ||
2349 | * should succeed, so it did not defer compaction. But here we know | ||
2350 | * that it didn't succeed, so we do the defer. | ||
2351 | */ | ||
2352 | if (last_compact_zone && mode != MIGRATE_ASYNC) | ||
2353 | defer_compaction(last_compact_zone, order); | ||
2354 | |||
2355 | /* | ||
2356 | * It's bad if compaction run occurs and fails. The most likely reason | 2380 | * It's bad if compaction run occurs and fails. The most likely reason |
2357 | * is that pages exist, but not enough to satisfy watermarks. | 2381 | * is that pages exist, but not enough to satisfy watermarks. |
2358 | */ | 2382 | */ |
@@ -2433,7 +2457,7 @@ retry: | |||
2433 | * pages are pinned on the per-cpu lists. Drain them and try again | 2457 | * pages are pinned on the per-cpu lists. Drain them and try again |
2434 | */ | 2458 | */ |
2435 | if (!page && !drained) { | 2459 | if (!page && !drained) { |
2436 | drain_all_pages(); | 2460 | drain_all_pages(NULL); |
2437 | drained = true; | 2461 | drained = true; |
2438 | goto retry; | 2462 | goto retry; |
2439 | } | 2463 | } |
@@ -3893,14 +3917,14 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) | |||
3893 | else | 3917 | else |
3894 | page_group_by_mobility_disabled = 0; | 3918 | page_group_by_mobility_disabled = 0; |
3895 | 3919 | ||
3896 | printk("Built %i zonelists in %s order, mobility grouping %s. " | 3920 | pr_info("Built %i zonelists in %s order, mobility grouping %s. " |
3897 | "Total pages: %ld\n", | 3921 | "Total pages: %ld\n", |
3898 | nr_online_nodes, | 3922 | nr_online_nodes, |
3899 | zonelist_order_name[current_zonelist_order], | 3923 | zonelist_order_name[current_zonelist_order], |
3900 | page_group_by_mobility_disabled ? "off" : "on", | 3924 | page_group_by_mobility_disabled ? "off" : "on", |
3901 | vm_total_pages); | 3925 | vm_total_pages); |
3902 | #ifdef CONFIG_NUMA | 3926 | #ifdef CONFIG_NUMA |
3903 | printk("Policy zone: %s\n", zone_names[policy_zone]); | 3927 | pr_info("Policy zone: %s\n", zone_names[policy_zone]); |
3904 | #endif | 3928 | #endif |
3905 | } | 3929 | } |
3906 | 3930 | ||
@@ -4832,7 +4856,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4832 | #endif | 4856 | #endif |
4833 | init_waitqueue_head(&pgdat->kswapd_wait); | 4857 | init_waitqueue_head(&pgdat->kswapd_wait); |
4834 | init_waitqueue_head(&pgdat->pfmemalloc_wait); | 4858 | init_waitqueue_head(&pgdat->pfmemalloc_wait); |
4835 | pgdat_page_cgroup_init(pgdat); | ||
4836 | 4859 | ||
4837 | for (j = 0; j < MAX_NR_ZONES; j++) { | 4860 | for (j = 0; j < MAX_NR_ZONES; j++) { |
4838 | struct zone *zone = pgdat->node_zones + j; | 4861 | struct zone *zone = pgdat->node_zones + j; |
@@ -5334,33 +5357,33 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
5334 | find_zone_movable_pfns_for_nodes(); | 5357 | find_zone_movable_pfns_for_nodes(); |
5335 | 5358 | ||
5336 | /* Print out the zone ranges */ | 5359 | /* Print out the zone ranges */ |
5337 | printk("Zone ranges:\n"); | 5360 | pr_info("Zone ranges:\n"); |
5338 | for (i = 0; i < MAX_NR_ZONES; i++) { | 5361 | for (i = 0; i < MAX_NR_ZONES; i++) { |
5339 | if (i == ZONE_MOVABLE) | 5362 | if (i == ZONE_MOVABLE) |
5340 | continue; | 5363 | continue; |
5341 | printk(KERN_CONT " %-8s ", zone_names[i]); | 5364 | pr_info(" %-8s ", zone_names[i]); |
5342 | if (arch_zone_lowest_possible_pfn[i] == | 5365 | if (arch_zone_lowest_possible_pfn[i] == |
5343 | arch_zone_highest_possible_pfn[i]) | 5366 | arch_zone_highest_possible_pfn[i]) |
5344 | printk(KERN_CONT "empty\n"); | 5367 | pr_cont("empty\n"); |
5345 | else | 5368 | else |
5346 | printk(KERN_CONT "[mem %0#10lx-%0#10lx]\n", | 5369 | pr_cont("[mem %0#10lx-%0#10lx]\n", |
5347 | arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT, | 5370 | arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT, |
5348 | (arch_zone_highest_possible_pfn[i] | 5371 | (arch_zone_highest_possible_pfn[i] |
5349 | << PAGE_SHIFT) - 1); | 5372 | << PAGE_SHIFT) - 1); |
5350 | } | 5373 | } |
5351 | 5374 | ||
5352 | /* Print out the PFNs ZONE_MOVABLE begins at in each node */ | 5375 | /* Print out the PFNs ZONE_MOVABLE begins at in each node */ |
5353 | printk("Movable zone start for each node\n"); | 5376 | pr_info("Movable zone start for each node\n"); |
5354 | for (i = 0; i < MAX_NUMNODES; i++) { | 5377 | for (i = 0; i < MAX_NUMNODES; i++) { |
5355 | if (zone_movable_pfn[i]) | 5378 | if (zone_movable_pfn[i]) |
5356 | printk(" Node %d: %#010lx\n", i, | 5379 | pr_info(" Node %d: %#010lx\n", i, |
5357 | zone_movable_pfn[i] << PAGE_SHIFT); | 5380 | zone_movable_pfn[i] << PAGE_SHIFT); |
5358 | } | 5381 | } |
5359 | 5382 | ||
5360 | /* Print out the early node map */ | 5383 | /* Print out the early node map */ |
5361 | printk("Early memory node ranges\n"); | 5384 | pr_info("Early memory node ranges\n"); |
5362 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) | 5385 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) |
5363 | printk(" node %3d: [mem %#010lx-%#010lx]\n", nid, | 5386 | pr_info(" node %3d: [mem %#010lx-%#010lx]\n", nid, |
5364 | start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); | 5387 | start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); |
5365 | 5388 | ||
5366 | /* Initialise every node */ | 5389 | /* Initialise every node */ |
@@ -5496,7 +5519,7 @@ void __init mem_init_print_info(const char *str) | |||
5496 | 5519 | ||
5497 | #undef adj_init_size | 5520 | #undef adj_init_size |
5498 | 5521 | ||
5499 | printk("Memory: %luK/%luK available " | 5522 | pr_info("Memory: %luK/%luK available " |
5500 | "(%luK kernel code, %luK rwdata, %luK rodata, " | 5523 | "(%luK kernel code, %luK rwdata, %luK rodata, " |
5501 | "%luK init, %luK bss, %luK reserved" | 5524 | "%luK init, %luK bss, %luK reserved" |
5502 | #ifdef CONFIG_HIGHMEM | 5525 | #ifdef CONFIG_HIGHMEM |
@@ -6385,7 +6408,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
6385 | */ | 6408 | */ |
6386 | 6409 | ||
6387 | lru_add_drain_all(); | 6410 | lru_add_drain_all(); |
6388 | drain_all_pages(); | 6411 | drain_all_pages(cc.zone); |
6389 | 6412 | ||
6390 | order = 0; | 6413 | order = 0; |
6391 | outer_start = start; | 6414 | outer_start = start; |