aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c317
1 files changed, 201 insertions, 116 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c13ea7538891..bb90971182bd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -558,7 +558,8 @@ static inline void __free_one_page(struct page *page,
558 if (page_is_guard(buddy)) { 558 if (page_is_guard(buddy)) {
559 clear_page_guard_flag(buddy); 559 clear_page_guard_flag(buddy);
560 set_page_private(page, 0); 560 set_page_private(page, 0);
561 __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); 561 __mod_zone_freepage_state(zone, 1 << order,
562 migratetype);
562 } else { 563 } else {
563 list_del(&buddy->lru); 564 list_del(&buddy->lru);
564 zone->free_area[order].nr_free--; 565 zone->free_area[order].nr_free--;
@@ -597,17 +598,6 @@ out:
597 zone->free_area[order].nr_free++; 598 zone->free_area[order].nr_free++;
598} 599}
599 600
600/*
601 * free_page_mlock() -- clean up attempts to free and mlocked() page.
602 * Page should not be on lru, so no need to fix that up.
603 * free_pages_check() will verify...
604 */
605static inline void free_page_mlock(struct page *page)
606{
607 __dec_zone_page_state(page, NR_MLOCK);
608 __count_vm_event(UNEVICTABLE_MLOCKFREED);
609}
610
611static inline int free_pages_check(struct page *page) 601static inline int free_pages_check(struct page *page)
612{ 602{
613 if (unlikely(page_mapcount(page) | 603 if (unlikely(page_mapcount(page) |
@@ -668,12 +658,17 @@ static void free_pcppages_bulk(struct zone *zone, int count,
668 batch_free = to_free; 658 batch_free = to_free;
669 659
670 do { 660 do {
661 int mt; /* migratetype of the to-be-freed page */
662
671 page = list_entry(list->prev, struct page, lru); 663 page = list_entry(list->prev, struct page, lru);
672 /* must delete as __free_one_page list manipulates */ 664 /* must delete as __free_one_page list manipulates */
673 list_del(&page->lru); 665 list_del(&page->lru);
666 mt = get_freepage_migratetype(page);
674 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ 667 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
675 __free_one_page(page, zone, 0, page_private(page)); 668 __free_one_page(page, zone, 0, mt);
676 trace_mm_page_pcpu_drain(page, 0, page_private(page)); 669 trace_mm_page_pcpu_drain(page, 0, mt);
670 if (is_migrate_cma(mt))
671 __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
677 } while (--to_free && --batch_free && !list_empty(list)); 672 } while (--to_free && --batch_free && !list_empty(list));
678 } 673 }
679 __mod_zone_page_state(zone, NR_FREE_PAGES, count); 674 __mod_zone_page_state(zone, NR_FREE_PAGES, count);
@@ -688,7 +683,8 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
688 zone->pages_scanned = 0; 683 zone->pages_scanned = 0;
689 684
690 __free_one_page(page, zone, order, migratetype); 685 __free_one_page(page, zone, order, migratetype);
691 __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); 686 if (unlikely(migratetype != MIGRATE_ISOLATE))
687 __mod_zone_freepage_state(zone, 1 << order, migratetype);
692 spin_unlock(&zone->lock); 688 spin_unlock(&zone->lock);
693} 689}
694 690
@@ -721,17 +717,16 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
721static void __free_pages_ok(struct page *page, unsigned int order) 717static void __free_pages_ok(struct page *page, unsigned int order)
722{ 718{
723 unsigned long flags; 719 unsigned long flags;
724 int wasMlocked = __TestClearPageMlocked(page); 720 int migratetype;
725 721
726 if (!free_pages_prepare(page, order)) 722 if (!free_pages_prepare(page, order))
727 return; 723 return;
728 724
729 local_irq_save(flags); 725 local_irq_save(flags);
730 if (unlikely(wasMlocked))
731 free_page_mlock(page);
732 __count_vm_events(PGFREE, 1 << order); 726 __count_vm_events(PGFREE, 1 << order);
733 free_one_page(page_zone(page), page, order, 727 migratetype = get_pageblock_migratetype(page);
734 get_pageblock_migratetype(page)); 728 set_freepage_migratetype(page, migratetype);
729 free_one_page(page_zone(page), page, order, migratetype);
735 local_irq_restore(flags); 730 local_irq_restore(flags);
736} 731}
737 732
@@ -811,7 +806,8 @@ static inline void expand(struct zone *zone, struct page *page,
811 set_page_guard_flag(&page[size]); 806 set_page_guard_flag(&page[size]);
812 set_page_private(&page[size], high); 807 set_page_private(&page[size], high);
813 /* Guard pages are not available for any usage */ 808 /* Guard pages are not available for any usage */
814 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high)); 809 __mod_zone_freepage_state(zone, -(1 << high),
810 migratetype);
815 continue; 811 continue;
816 } 812 }
817#endif 813#endif
@@ -915,7 +911,7 @@ static int fallbacks[MIGRATE_TYPES][4] = {
915 * Note that start_page and end_pages are not aligned on a pageblock 911 * Note that start_page and end_pages are not aligned on a pageblock
916 * boundary. If alignment is required, use move_freepages_block() 912 * boundary. If alignment is required, use move_freepages_block()
917 */ 913 */
918static int move_freepages(struct zone *zone, 914int move_freepages(struct zone *zone,
919 struct page *start_page, struct page *end_page, 915 struct page *start_page, struct page *end_page,
920 int migratetype) 916 int migratetype)
921{ 917{
@@ -951,6 +947,7 @@ static int move_freepages(struct zone *zone,
951 order = page_order(page); 947 order = page_order(page);
952 list_move(&page->lru, 948 list_move(&page->lru,
953 &zone->free_area[order].free_list[migratetype]); 949 &zone->free_area[order].free_list[migratetype]);
950 set_freepage_migratetype(page, migratetype);
954 page += 1 << order; 951 page += 1 << order;
955 pages_moved += 1 << order; 952 pages_moved += 1 << order;
956 } 953 }
@@ -1135,8 +1132,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
1135 if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) 1132 if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE)
1136 mt = migratetype; 1133 mt = migratetype;
1137 } 1134 }
1138 set_page_private(page, mt); 1135 set_freepage_migratetype(page, mt);
1139 list = &page->lru; 1136 list = &page->lru;
1137 if (is_migrate_cma(mt))
1138 __mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
1139 -(1 << order));
1140 } 1140 }
1141 __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); 1141 __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
1142 spin_unlock(&zone->lock); 1142 spin_unlock(&zone->lock);
@@ -1296,16 +1296,13 @@ void free_hot_cold_page(struct page *page, int cold)
1296 struct per_cpu_pages *pcp; 1296 struct per_cpu_pages *pcp;
1297 unsigned long flags; 1297 unsigned long flags;
1298 int migratetype; 1298 int migratetype;
1299 int wasMlocked = __TestClearPageMlocked(page);
1300 1299
1301 if (!free_pages_prepare(page, 0)) 1300 if (!free_pages_prepare(page, 0))
1302 return; 1301 return;
1303 1302
1304 migratetype = get_pageblock_migratetype(page); 1303 migratetype = get_pageblock_migratetype(page);
1305 set_page_private(page, migratetype); 1304 set_freepage_migratetype(page, migratetype);
1306 local_irq_save(flags); 1305 local_irq_save(flags);
1307 if (unlikely(wasMlocked))
1308 free_page_mlock(page);
1309 __count_vm_event(PGFREE); 1306 __count_vm_event(PGFREE);
1310 1307
1311 /* 1308 /*
@@ -1380,20 +1377,16 @@ void split_page(struct page *page, unsigned int order)
1380} 1377}
1381 1378
1382/* 1379/*
1383 * Similar to split_page except the page is already free. As this is only 1380 * Similar to the split_page family of functions except that the page
1384 * being used for migration, the migratetype of the block also changes. 1381 * required at the given order and being isolated now to prevent races
1385 * As this is called with interrupts disabled, the caller is responsible 1382 * with parallel allocators
1386 * for calling arch_alloc_page() and kernel_map_page() after interrupts
1387 * are enabled.
1388 *
1389 * Note: this is probably too low level an operation for use in drivers.
1390 * Please consult with lkml before using this in your driver.
1391 */ 1383 */
1392int split_free_page(struct page *page) 1384int capture_free_page(struct page *page, int alloc_order, int migratetype)
1393{ 1385{
1394 unsigned int order; 1386 unsigned int order;
1395 unsigned long watermark; 1387 unsigned long watermark;
1396 struct zone *zone; 1388 struct zone *zone;
1389 int mt;
1397 1390
1398 BUG_ON(!PageBuddy(page)); 1391 BUG_ON(!PageBuddy(page));
1399 1392
@@ -1409,12 +1402,16 @@ int split_free_page(struct page *page)
1409 list_del(&page->lru); 1402 list_del(&page->lru);
1410 zone->free_area[order].nr_free--; 1403 zone->free_area[order].nr_free--;
1411 rmv_page_order(page); 1404 rmv_page_order(page);
1412 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order));
1413 1405
1414 /* Split into individual pages */ 1406 mt = get_pageblock_migratetype(page);
1415 set_page_refcounted(page); 1407 if (unlikely(mt != MIGRATE_ISOLATE))
1416 split_page(page, order); 1408 __mod_zone_freepage_state(zone, -(1UL << order), mt);
1417 1409
1410 if (alloc_order != order)
1411 expand(zone, page, alloc_order, order,
1412 &zone->free_area[order], migratetype);
1413
1414 /* Set the pageblock if the captured page is at least a pageblock */
1418 if (order >= pageblock_order - 1) { 1415 if (order >= pageblock_order - 1) {
1419 struct page *endpage = page + (1 << order) - 1; 1416 struct page *endpage = page + (1 << order) - 1;
1420 for (; page < endpage; page += pageblock_nr_pages) { 1417 for (; page < endpage; page += pageblock_nr_pages) {
@@ -1425,7 +1422,35 @@ int split_free_page(struct page *page)
1425 } 1422 }
1426 } 1423 }
1427 1424
1428 return 1 << order; 1425 return 1UL << order;
1426}
1427
1428/*
1429 * Similar to split_page except the page is already free. As this is only
1430 * being used for migration, the migratetype of the block also changes.
1431 * As this is called with interrupts disabled, the caller is responsible
1432 * for calling arch_alloc_page() and kernel_map_page() after interrupts
1433 * are enabled.
1434 *
1435 * Note: this is probably too low level an operation for use in drivers.
1436 * Please consult with lkml before using this in your driver.
1437 */
1438int split_free_page(struct page *page)
1439{
1440 unsigned int order;
1441 int nr_pages;
1442
1443 BUG_ON(!PageBuddy(page));
1444 order = page_order(page);
1445
1446 nr_pages = capture_free_page(page, order, 0);
1447 if (!nr_pages)
1448 return 0;
1449
1450 /* Split into individual pages */
1451 set_page_refcounted(page);
1452 split_page(page, order);
1453 return nr_pages;
1429} 1454}
1430 1455
1431/* 1456/*
@@ -1484,7 +1509,8 @@ again:
1484 spin_unlock(&zone->lock); 1509 spin_unlock(&zone->lock);
1485 if (!page) 1510 if (!page)
1486 goto failed; 1511 goto failed;
1487 __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); 1512 __mod_zone_freepage_state(zone, -(1 << order),
1513 get_pageblock_migratetype(page));
1488 } 1514 }
1489 1515
1490 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1516 __count_zone_vm_events(PGALLOC, zone, 1 << order);
@@ -1501,19 +1527,6 @@ failed:
1501 return NULL; 1527 return NULL;
1502} 1528}
1503 1529
1504/* The ALLOC_WMARK bits are used as an index to zone->watermark */
1505#define ALLOC_WMARK_MIN WMARK_MIN
1506#define ALLOC_WMARK_LOW WMARK_LOW
1507#define ALLOC_WMARK_HIGH WMARK_HIGH
1508#define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */
1509
1510/* Mask to get the watermark bits */
1511#define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1)
1512
1513#define ALLOC_HARDER 0x10 /* try to alloc harder */
1514#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
1515#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
1516
1517#ifdef CONFIG_FAIL_PAGE_ALLOC 1530#ifdef CONFIG_FAIL_PAGE_ALLOC
1518 1531
1519static struct { 1532static struct {
@@ -1608,7 +1621,11 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
1608 min -= min / 2; 1621 min -= min / 2;
1609 if (alloc_flags & ALLOC_HARDER) 1622 if (alloc_flags & ALLOC_HARDER)
1610 min -= min / 4; 1623 min -= min / 4;
1611 1624#ifdef CONFIG_CMA
1625 /* If allocation can't use CMA areas don't use free CMA pages */
1626 if (!(alloc_flags & ALLOC_CMA))
1627 free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
1628#endif
1612 if (free_pages <= min + lowmem_reserve) 1629 if (free_pages <= min + lowmem_reserve)
1613 return false; 1630 return false;
1614 for (o = 0; o < order; o++) { 1631 for (o = 0; o < order; o++) {
@@ -1782,6 +1799,22 @@ static void zlc_clear_zones_full(struct zonelist *zonelist)
1782 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); 1799 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
1783} 1800}
1784 1801
1802static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
1803{
1804 return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes);
1805}
1806
1807static void __paginginit init_zone_allows_reclaim(int nid)
1808{
1809 int i;
1810
1811 for_each_online_node(i)
1812 if (node_distance(nid, i) <= RECLAIM_DISTANCE) {
1813 node_set(i, NODE_DATA(nid)->reclaim_nodes);
1814 zone_reclaim_mode = 1;
1815 }
1816}
1817
1785#else /* CONFIG_NUMA */ 1818#else /* CONFIG_NUMA */
1786 1819
1787static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) 1820static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
@@ -1802,6 +1835,15 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
1802static void zlc_clear_zones_full(struct zonelist *zonelist) 1835static void zlc_clear_zones_full(struct zonelist *zonelist)
1803{ 1836{
1804} 1837}
1838
1839static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
1840{
1841 return true;
1842}
1843
1844static inline void init_zone_allows_reclaim(int nid)
1845{
1846}
1805#endif /* CONFIG_NUMA */ 1847#endif /* CONFIG_NUMA */
1806 1848
1807/* 1849/*
@@ -1886,7 +1928,8 @@ zonelist_scan:
1886 did_zlc_setup = 1; 1928 did_zlc_setup = 1;
1887 } 1929 }
1888 1930
1889 if (zone_reclaim_mode == 0) 1931 if (zone_reclaim_mode == 0 ||
1932 !zone_allows_reclaim(preferred_zone, zone))
1890 goto this_zone_full; 1933 goto this_zone_full;
1891 1934
1892 /* 1935 /*
@@ -2105,7 +2148,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2105 bool *contended_compaction, bool *deferred_compaction, 2148 bool *contended_compaction, bool *deferred_compaction,
2106 unsigned long *did_some_progress) 2149 unsigned long *did_some_progress)
2107{ 2150{
2108 struct page *page; 2151 struct page *page = NULL;
2109 2152
2110 if (!order) 2153 if (!order)
2111 return NULL; 2154 return NULL;
@@ -2118,10 +2161,16 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2118 current->flags |= PF_MEMALLOC; 2161 current->flags |= PF_MEMALLOC;
2119 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, 2162 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
2120 nodemask, sync_migration, 2163 nodemask, sync_migration,
2121 contended_compaction); 2164 contended_compaction, &page);
2122 current->flags &= ~PF_MEMALLOC; 2165 current->flags &= ~PF_MEMALLOC;
2123 if (*did_some_progress != COMPACT_SKIPPED) {
2124 2166
2167 /* If compaction captured a page, prep and use it */
2168 if (page) {
2169 prep_new_page(page, order, gfp_mask);
2170 goto got_page;
2171 }
2172
2173 if (*did_some_progress != COMPACT_SKIPPED) {
2125 /* Page migration frees to the PCP lists but we want merging */ 2174 /* Page migration frees to the PCP lists but we want merging */
2126 drain_pages(get_cpu()); 2175 drain_pages(get_cpu());
2127 put_cpu(); 2176 put_cpu();
@@ -2131,6 +2180,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2131 alloc_flags & ~ALLOC_NO_WATERMARKS, 2180 alloc_flags & ~ALLOC_NO_WATERMARKS,
2132 preferred_zone, migratetype); 2181 preferred_zone, migratetype);
2133 if (page) { 2182 if (page) {
2183got_page:
2184 preferred_zone->compact_blockskip_flush = false;
2134 preferred_zone->compact_considered = 0; 2185 preferred_zone->compact_considered = 0;
2135 preferred_zone->compact_defer_shift = 0; 2186 preferred_zone->compact_defer_shift = 0;
2136 if (order >= preferred_zone->compact_order_failed) 2187 if (order >= preferred_zone->compact_order_failed)
@@ -2315,7 +2366,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
2315 unlikely(test_thread_flag(TIF_MEMDIE)))) 2366 unlikely(test_thread_flag(TIF_MEMDIE))))
2316 alloc_flags |= ALLOC_NO_WATERMARKS; 2367 alloc_flags |= ALLOC_NO_WATERMARKS;
2317 } 2368 }
2318 2369#ifdef CONFIG_CMA
2370 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
2371 alloc_flags |= ALLOC_CMA;
2372#endif
2319 return alloc_flags; 2373 return alloc_flags;
2320} 2374}
2321 2375
@@ -2362,9 +2416,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2362 goto nopage; 2416 goto nopage;
2363 2417
2364restart: 2418restart:
2365 if (!(gfp_mask & __GFP_NO_KSWAPD)) 2419 wake_all_kswapd(order, zonelist, high_zoneidx,
2366 wake_all_kswapd(order, zonelist, high_zoneidx, 2420 zone_idx(preferred_zone));
2367 zone_idx(preferred_zone));
2368 2421
2369 /* 2422 /*
2370 * OK, we're below the kswapd watermark and have kicked background 2423 * OK, we're below the kswapd watermark and have kicked background
@@ -2441,7 +2494,7 @@ rebalance:
2441 * system then fail the allocation instead of entering direct reclaim. 2494 * system then fail the allocation instead of entering direct reclaim.
2442 */ 2495 */
2443 if ((deferred_compaction || contended_compaction) && 2496 if ((deferred_compaction || contended_compaction) &&
2444 (gfp_mask & __GFP_NO_KSWAPD)) 2497 (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE)
2445 goto nopage; 2498 goto nopage;
2446 2499
2447 /* Try direct reclaim and then allocating */ 2500 /* Try direct reclaim and then allocating */
@@ -2541,6 +2594,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2541 struct page *page = NULL; 2594 struct page *page = NULL;
2542 int migratetype = allocflags_to_migratetype(gfp_mask); 2595 int migratetype = allocflags_to_migratetype(gfp_mask);
2543 unsigned int cpuset_mems_cookie; 2596 unsigned int cpuset_mems_cookie;
2597 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
2544 2598
2545 gfp_mask &= gfp_allowed_mask; 2599 gfp_mask &= gfp_allowed_mask;
2546 2600
@@ -2569,9 +2623,13 @@ retry_cpuset:
2569 if (!preferred_zone) 2623 if (!preferred_zone)
2570 goto out; 2624 goto out;
2571 2625
2626#ifdef CONFIG_CMA
2627 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
2628 alloc_flags |= ALLOC_CMA;
2629#endif
2572 /* First allocation attempt */ 2630 /* First allocation attempt */
2573 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2631 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
2574 zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, 2632 zonelist, high_zoneidx, alloc_flags,
2575 preferred_zone, migratetype); 2633 preferred_zone, migratetype);
2576 if (unlikely(!page)) 2634 if (unlikely(!page))
2577 page = __alloc_pages_slowpath(gfp_mask, order, 2635 page = __alloc_pages_slowpath(gfp_mask, order,
@@ -2852,7 +2910,8 @@ void show_free_areas(unsigned int filter)
2852 " unevictable:%lu" 2910 " unevictable:%lu"
2853 " dirty:%lu writeback:%lu unstable:%lu\n" 2911 " dirty:%lu writeback:%lu unstable:%lu\n"
2854 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" 2912 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
2855 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", 2913 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
2914 " free_cma:%lu\n",
2856 global_page_state(NR_ACTIVE_ANON), 2915 global_page_state(NR_ACTIVE_ANON),
2857 global_page_state(NR_INACTIVE_ANON), 2916 global_page_state(NR_INACTIVE_ANON),
2858 global_page_state(NR_ISOLATED_ANON), 2917 global_page_state(NR_ISOLATED_ANON),
@@ -2869,7 +2928,8 @@ void show_free_areas(unsigned int filter)
2869 global_page_state(NR_FILE_MAPPED), 2928 global_page_state(NR_FILE_MAPPED),
2870 global_page_state(NR_SHMEM), 2929 global_page_state(NR_SHMEM),
2871 global_page_state(NR_PAGETABLE), 2930 global_page_state(NR_PAGETABLE),
2872 global_page_state(NR_BOUNCE)); 2931 global_page_state(NR_BOUNCE),
2932 global_page_state(NR_FREE_CMA_PAGES));
2873 2933
2874 for_each_populated_zone(zone) { 2934 for_each_populated_zone(zone) {
2875 int i; 2935 int i;
@@ -2901,6 +2961,7 @@ void show_free_areas(unsigned int filter)
2901 " pagetables:%lukB" 2961 " pagetables:%lukB"
2902 " unstable:%lukB" 2962 " unstable:%lukB"
2903 " bounce:%lukB" 2963 " bounce:%lukB"
2964 " free_cma:%lukB"
2904 " writeback_tmp:%lukB" 2965 " writeback_tmp:%lukB"
2905 " pages_scanned:%lu" 2966 " pages_scanned:%lu"
2906 " all_unreclaimable? %s" 2967 " all_unreclaimable? %s"
@@ -2930,6 +2991,7 @@ void show_free_areas(unsigned int filter)
2930 K(zone_page_state(zone, NR_PAGETABLE)), 2991 K(zone_page_state(zone, NR_PAGETABLE)),
2931 K(zone_page_state(zone, NR_UNSTABLE_NFS)), 2992 K(zone_page_state(zone, NR_UNSTABLE_NFS)),
2932 K(zone_page_state(zone, NR_BOUNCE)), 2993 K(zone_page_state(zone, NR_BOUNCE)),
2994 K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
2933 K(zone_page_state(zone, NR_WRITEBACK_TEMP)), 2995 K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
2934 zone->pages_scanned, 2996 zone->pages_scanned,
2935 (zone->all_unreclaimable ? "yes" : "no") 2997 (zone->all_unreclaimable ? "yes" : "no")
@@ -3328,21 +3390,13 @@ static void build_zonelists(pg_data_t *pgdat)
3328 j = 0; 3390 j = 0;
3329 3391
3330 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { 3392 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
3331 int distance = node_distance(local_node, node);
3332
3333 /*
3334 * If another node is sufficiently far away then it is better
3335 * to reclaim pages in a zone before going off node.
3336 */
3337 if (distance > RECLAIM_DISTANCE)
3338 zone_reclaim_mode = 1;
3339
3340 /* 3393 /*
3341 * We don't want to pressure a particular node. 3394 * We don't want to pressure a particular node.
3342 * So adding penalty to the first node in same 3395 * So adding penalty to the first node in same
3343 * distance group to make it round-robin. 3396 * distance group to make it round-robin.
3344 */ 3397 */
3345 if (distance != node_distance(local_node, prev_node)) 3398 if (node_distance(local_node, node) !=
3399 node_distance(local_node, prev_node))
3346 node_load[node] = load; 3400 node_load[node] = load;
3347 3401
3348 prev_node = node; 3402 prev_node = node;
@@ -4438,11 +4492,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4438 4492
4439 zone->spanned_pages = size; 4493 zone->spanned_pages = size;
4440 zone->present_pages = realsize; 4494 zone->present_pages = realsize;
4441#if defined CONFIG_COMPACTION || defined CONFIG_CMA
4442 zone->compact_cached_free_pfn = zone->zone_start_pfn +
4443 zone->spanned_pages;
4444 zone->compact_cached_free_pfn &= ~(pageblock_nr_pages-1);
4445#endif
4446#ifdef CONFIG_NUMA 4495#ifdef CONFIG_NUMA
4447 zone->node = nid; 4496 zone->node = nid;
4448 zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) 4497 zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
@@ -4521,6 +4570,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
4521 4570
4522 pgdat->node_id = nid; 4571 pgdat->node_id = nid;
4523 pgdat->node_start_pfn = node_start_pfn; 4572 pgdat->node_start_pfn = node_start_pfn;
4573 init_zone_allows_reclaim(nid);
4524 calculate_node_totalpages(pgdat, zones_size, zholes_size); 4574 calculate_node_totalpages(pgdat, zones_size, zholes_size);
4525 4575
4526 alloc_node_mem_map(pgdat); 4576 alloc_node_mem_map(pgdat);
@@ -4879,7 +4929,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4879 zone_movable_pfn[i] << PAGE_SHIFT); 4929 zone_movable_pfn[i] << PAGE_SHIFT);
4880 } 4930 }
4881 4931
4882 /* Print out the early_node_map[] */ 4932 /* Print out the early node map */
4883 printk("Early memory node ranges\n"); 4933 printk("Early memory node ranges\n");
4884 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) 4934 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4885 printk(" node %3d: [mem %#010lx-%#010lx]\n", nid, 4935 printk(" node %3d: [mem %#010lx-%#010lx]\n", nid,
@@ -5619,47 +5669,28 @@ static unsigned long pfn_max_align_up(unsigned long pfn)
5619 pageblock_nr_pages)); 5669 pageblock_nr_pages));
5620} 5670}
5621 5671
5622static struct page *
5623__alloc_contig_migrate_alloc(struct page *page, unsigned long private,
5624 int **resultp)
5625{
5626 gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
5627
5628 if (PageHighMem(page))
5629 gfp_mask |= __GFP_HIGHMEM;
5630
5631 return alloc_page(gfp_mask);
5632}
5633
5634/* [start, end) must belong to a single zone. */ 5672/* [start, end) must belong to a single zone. */
5635static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) 5673static int __alloc_contig_migrate_range(struct compact_control *cc,
5674 unsigned long start, unsigned long end)
5636{ 5675{
5637 /* This function is based on compact_zone() from compaction.c. */ 5676 /* This function is based on compact_zone() from compaction.c. */
5638 5677 unsigned long nr_reclaimed;
5639 unsigned long pfn = start; 5678 unsigned long pfn = start;
5640 unsigned int tries = 0; 5679 unsigned int tries = 0;
5641 int ret = 0; 5680 int ret = 0;
5642 5681
5643 struct compact_control cc = {
5644 .nr_migratepages = 0,
5645 .order = -1,
5646 .zone = page_zone(pfn_to_page(start)),
5647 .sync = true,
5648 };
5649 INIT_LIST_HEAD(&cc.migratepages);
5650
5651 migrate_prep_local(); 5682 migrate_prep_local();
5652 5683
5653 while (pfn < end || !list_empty(&cc.migratepages)) { 5684 while (pfn < end || !list_empty(&cc->migratepages)) {
5654 if (fatal_signal_pending(current)) { 5685 if (fatal_signal_pending(current)) {
5655 ret = -EINTR; 5686 ret = -EINTR;
5656 break; 5687 break;
5657 } 5688 }
5658 5689
5659 if (list_empty(&cc.migratepages)) { 5690 if (list_empty(&cc->migratepages)) {
5660 cc.nr_migratepages = 0; 5691 cc->nr_migratepages = 0;
5661 pfn = isolate_migratepages_range(cc.zone, &cc, 5692 pfn = isolate_migratepages_range(cc->zone, cc,
5662 pfn, end); 5693 pfn, end, true);
5663 if (!pfn) { 5694 if (!pfn) {
5664 ret = -EINTR; 5695 ret = -EINTR;
5665 break; 5696 break;
@@ -5670,12 +5701,16 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
5670 break; 5701 break;
5671 } 5702 }
5672 5703
5673 ret = migrate_pages(&cc.migratepages, 5704 nr_reclaimed = reclaim_clean_pages_from_list(cc->zone,
5674 __alloc_contig_migrate_alloc, 5705 &cc->migratepages);
5706 cc->nr_migratepages -= nr_reclaimed;
5707
5708 ret = migrate_pages(&cc->migratepages,
5709 alloc_migrate_target,
5675 0, false, MIGRATE_SYNC); 5710 0, false, MIGRATE_SYNC);
5676 } 5711 }
5677 5712
5678 putback_lru_pages(&cc.migratepages); 5713 putback_lru_pages(&cc->migratepages);
5679 return ret > 0 ? 0 : ret; 5714 return ret > 0 ? 0 : ret;
5680} 5715}
5681 5716
@@ -5754,6 +5789,15 @@ int alloc_contig_range(unsigned long start, unsigned long end,
5754 unsigned long outer_start, outer_end; 5789 unsigned long outer_start, outer_end;
5755 int ret = 0, order; 5790 int ret = 0, order;
5756 5791
5792 struct compact_control cc = {
5793 .nr_migratepages = 0,
5794 .order = -1,
5795 .zone = page_zone(pfn_to_page(start)),
5796 .sync = true,
5797 .ignore_skip_hint = true,
5798 };
5799 INIT_LIST_HEAD(&cc.migratepages);
5800
5757 /* 5801 /*
5758 * What we do here is we mark all pageblocks in range as 5802 * What we do here is we mark all pageblocks in range as
5759 * MIGRATE_ISOLATE. Because pageblock and max order pages may 5803 * MIGRATE_ISOLATE. Because pageblock and max order pages may
@@ -5783,7 +5827,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
5783 if (ret) 5827 if (ret)
5784 goto done; 5828 goto done;
5785 5829
5786 ret = __alloc_contig_migrate_range(start, end); 5830 ret = __alloc_contig_migrate_range(&cc, start, end);
5787 if (ret) 5831 if (ret)
5788 goto done; 5832 goto done;
5789 5833
@@ -5832,7 +5876,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
5832 __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); 5876 __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
5833 5877
5834 /* Grab isolated pages from freelists. */ 5878 /* Grab isolated pages from freelists. */
5835 outer_end = isolate_freepages_range(outer_start, end); 5879 outer_end = isolate_freepages_range(&cc, outer_start, end);
5836 if (!outer_end) { 5880 if (!outer_end) {
5837 ret = -EBUSY; 5881 ret = -EBUSY;
5838 goto done; 5882 goto done;
@@ -5874,6 +5918,7 @@ static int __meminit __zone_pcp_update(void *data)
5874 local_irq_save(flags); 5918 local_irq_save(flags);
5875 if (pcp->count > 0) 5919 if (pcp->count > 0)
5876 free_pcppages_bulk(zone, pcp->count, pcp); 5920 free_pcppages_bulk(zone, pcp->count, pcp);
5921 drain_zonestat(zone, pset);
5877 setup_pageset(pset, batch); 5922 setup_pageset(pset, batch);
5878 local_irq_restore(flags); 5923 local_irq_restore(flags);
5879 } 5924 }
@@ -5890,10 +5935,16 @@ void __meminit zone_pcp_update(struct zone *zone)
5890void zone_pcp_reset(struct zone *zone) 5935void zone_pcp_reset(struct zone *zone)
5891{ 5936{
5892 unsigned long flags; 5937 unsigned long flags;
5938 int cpu;
5939 struct per_cpu_pageset *pset;
5893 5940
5894 /* avoid races with drain_pages() */ 5941 /* avoid races with drain_pages() */
5895 local_irq_save(flags); 5942 local_irq_save(flags);
5896 if (zone->pageset != &boot_pageset) { 5943 if (zone->pageset != &boot_pageset) {
5944 for_each_online_cpu(cpu) {
5945 pset = per_cpu_ptr(zone->pageset, cpu);
5946 drain_zonestat(zone, pset);
5947 }
5897 free_percpu(zone->pageset); 5948 free_percpu(zone->pageset);
5898 zone->pageset = &boot_pageset; 5949 zone->pageset = &boot_pageset;
5899 } 5950 }
@@ -6047,3 +6098,37 @@ void dump_page(struct page *page)
6047 dump_page_flags(page->flags); 6098 dump_page_flags(page->flags);
6048 mem_cgroup_print_bad_page(page); 6099 mem_cgroup_print_bad_page(page);
6049} 6100}
6101
6102/* reset zone->present_pages */
6103void reset_zone_present_pages(void)
6104{
6105 struct zone *z;
6106 int i, nid;
6107
6108 for_each_node_state(nid, N_HIGH_MEMORY) {
6109 for (i = 0; i < MAX_NR_ZONES; i++) {
6110 z = NODE_DATA(nid)->node_zones + i;
6111 z->present_pages = 0;
6112 }
6113 }
6114}
6115
6116/* calculate zone's present pages in buddy system */
6117void fixup_zone_present_pages(int nid, unsigned long start_pfn,
6118 unsigned long end_pfn)
6119{
6120 struct zone *z;
6121 unsigned long zone_start_pfn, zone_end_pfn;
6122 int i;
6123
6124 for (i = 0; i < MAX_NR_ZONES; i++) {
6125 z = NODE_DATA(nid)->node_zones + i;
6126 zone_start_pfn = z->zone_start_pfn;
6127 zone_end_pfn = zone_start_pfn + z->spanned_pages;
6128
6129 /* if the two regions intersect */
6130 if (!(zone_start_pfn >= end_pfn || zone_end_pfn <= start_pfn))
6131 z->present_pages += min(end_pfn, zone_end_pfn) -
6132 max(start_pfn, zone_start_pfn);
6133 }
6134}