aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c238
1 files changed, 143 insertions, 95 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eee3efa58c91..df2022ff0c8a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -90,6 +90,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
90#ifdef CONFIG_HIGHMEM 90#ifdef CONFIG_HIGHMEM
91 [N_HIGH_MEMORY] = { { [0] = 1UL } }, 91 [N_HIGH_MEMORY] = { { [0] = 1UL } },
92#endif 92#endif
93#ifdef CONFIG_MOVABLE_NODE
94 [N_MEMORY] = { { [0] = 1UL } },
95#endif
93 [N_CPU] = { { [0] = 1UL } }, 96 [N_CPU] = { { [0] = 1UL } },
94#endif /* NUMA */ 97#endif /* NUMA */
95}; 98};
@@ -218,11 +221,6 @@ EXPORT_SYMBOL(nr_online_nodes);
218 221
219int page_group_by_mobility_disabled __read_mostly; 222int page_group_by_mobility_disabled __read_mostly;
220 223
221/*
222 * NOTE:
223 * Don't use set_pageblock_migratetype(page, MIGRATE_ISOLATE) directly.
224 * Instead, use {un}set_pageblock_isolate.
225 */
226void set_pageblock_migratetype(struct page *page, int migratetype) 224void set_pageblock_migratetype(struct page *page, int migratetype)
227{ 225{
228 226
@@ -368,8 +366,7 @@ static int destroy_compound_page(struct page *page, unsigned long order)
368 int nr_pages = 1 << order; 366 int nr_pages = 1 << order;
369 int bad = 0; 367 int bad = 0;
370 368
371 if (unlikely(compound_order(page) != order) || 369 if (unlikely(compound_order(page) != order)) {
372 unlikely(!PageHead(page))) {
373 bad_page(page); 370 bad_page(page);
374 bad++; 371 bad++;
375 } 372 }
@@ -608,6 +605,7 @@ static inline int free_pages_check(struct page *page)
608 bad_page(page); 605 bad_page(page);
609 return 1; 606 return 1;
610 } 607 }
608 reset_page_last_nid(page);
611 if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) 609 if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
612 page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; 610 page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
613 return 0; 611 return 0;
@@ -732,6 +730,13 @@ static void __free_pages_ok(struct page *page, unsigned int order)
732 local_irq_restore(flags); 730 local_irq_restore(flags);
733} 731}
734 732
733/*
734 * Read access to zone->managed_pages is safe because it's unsigned long,
735 * but we still need to serialize writers. Currently all callers of
736 * __free_pages_bootmem() except put_page_bootmem() should only be used
737 * at boot time. So for shorter boot time, we shift the burden to
738 * put_page_bootmem() to serialize writers.
739 */
735void __meminit __free_pages_bootmem(struct page *page, unsigned int order) 740void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
736{ 741{
737 unsigned int nr_pages = 1 << order; 742 unsigned int nr_pages = 1 << order;
@@ -747,6 +752,7 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
747 set_page_count(p, 0); 752 set_page_count(p, 0);
748 } 753 }
749 754
755 page_zone(page)->managed_pages += 1 << order;
750 set_page_refcounted(page); 756 set_page_refcounted(page);
751 __free_pages(page, order); 757 __free_pages(page, order);
752} 758}
@@ -1378,14 +1384,8 @@ void split_page(struct page *page, unsigned int order)
1378 set_page_refcounted(page + i); 1384 set_page_refcounted(page + i);
1379} 1385}
1380 1386
1381/* 1387static int __isolate_free_page(struct page *page, unsigned int order)
1382 * Similar to the split_page family of functions except that the page
1383 * required at the given order and being isolated now to prevent races
1384 * with parallel allocators
1385 */
1386int capture_free_page(struct page *page, int alloc_order, int migratetype)
1387{ 1388{
1388 unsigned int order;
1389 unsigned long watermark; 1389 unsigned long watermark;
1390 struct zone *zone; 1390 struct zone *zone;
1391 int mt; 1391 int mt;
@@ -1393,7 +1393,6 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1393 BUG_ON(!PageBuddy(page)); 1393 BUG_ON(!PageBuddy(page));
1394 1394
1395 zone = page_zone(page); 1395 zone = page_zone(page);
1396 order = page_order(page);
1397 mt = get_pageblock_migratetype(page); 1396 mt = get_pageblock_migratetype(page);
1398 1397
1399 if (mt != MIGRATE_ISOLATE) { 1398 if (mt != MIGRATE_ISOLATE) {
@@ -1402,7 +1401,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1402 if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) 1401 if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
1403 return 0; 1402 return 0;
1404 1403
1405 __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); 1404 __mod_zone_freepage_state(zone, -(1UL << order), mt);
1406 } 1405 }
1407 1406
1408 /* Remove page from free list */ 1407 /* Remove page from free list */
@@ -1410,11 +1409,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1410 zone->free_area[order].nr_free--; 1409 zone->free_area[order].nr_free--;
1411 rmv_page_order(page); 1410 rmv_page_order(page);
1412 1411
1413 if (alloc_order != order) 1412 /* Set the pageblock if the isolated page is at least a pageblock */
1414 expand(zone, page, alloc_order, order,
1415 &zone->free_area[order], migratetype);
1416
1417 /* Set the pageblock if the captured page is at least a pageblock */
1418 if (order >= pageblock_order - 1) { 1413 if (order >= pageblock_order - 1) {
1419 struct page *endpage = page + (1 << order) - 1; 1414 struct page *endpage = page + (1 << order) - 1;
1420 for (; page < endpage; page += pageblock_nr_pages) { 1415 for (; page < endpage; page += pageblock_nr_pages) {
@@ -1425,7 +1420,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1425 } 1420 }
1426 } 1421 }
1427 1422
1428 return 1UL << alloc_order; 1423 return 1UL << order;
1429} 1424}
1430 1425
1431/* 1426/*
@@ -1443,10 +1438,9 @@ int split_free_page(struct page *page)
1443 unsigned int order; 1438 unsigned int order;
1444 int nr_pages; 1439 int nr_pages;
1445 1440
1446 BUG_ON(!PageBuddy(page));
1447 order = page_order(page); 1441 order = page_order(page);
1448 1442
1449 nr_pages = capture_free_page(page, order, 0); 1443 nr_pages = __isolate_free_page(page, order);
1450 if (!nr_pages) 1444 if (!nr_pages)
1451 return 0; 1445 return 0;
1452 1446
@@ -1644,20 +1638,6 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
1644 return true; 1638 return true;
1645} 1639}
1646 1640
1647#ifdef CONFIG_MEMORY_ISOLATION
1648static inline unsigned long nr_zone_isolate_freepages(struct zone *zone)
1649{
1650 if (unlikely(zone->nr_pageblock_isolate))
1651 return zone->nr_pageblock_isolate * pageblock_nr_pages;
1652 return 0;
1653}
1654#else
1655static inline unsigned long nr_zone_isolate_freepages(struct zone *zone)
1656{
1657 return 0;
1658}
1659#endif
1660
1661bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, 1641bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
1662 int classzone_idx, int alloc_flags) 1642 int classzone_idx, int alloc_flags)
1663{ 1643{
@@ -1673,14 +1653,6 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
1673 if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) 1653 if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
1674 free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); 1654 free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
1675 1655
1676 /*
1677 * If the zone has MIGRATE_ISOLATE type free pages, we should consider
1678 * it. nr_zone_isolate_freepages is never accurate so kswapd might not
1679 * sleep although it could do so. But this is more desirable for memory
1680 * hotplug than sleeping which can cause a livelock in the direct
1681 * reclaim path.
1682 */
1683 free_pages -= nr_zone_isolate_freepages(z);
1684 return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, 1656 return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
1685 free_pages); 1657 free_pages);
1686} 1658}
@@ -1695,7 +1667,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
1695 * 1667 *
1696 * If the zonelist cache is present in the passed in zonelist, then 1668 * If the zonelist cache is present in the passed in zonelist, then
1697 * returns a pointer to the allowed node mask (either the current 1669 * returns a pointer to the allowed node mask (either the current
1698 * tasks mems_allowed, or node_states[N_HIGH_MEMORY].) 1670 * tasks mems_allowed, or node_states[N_MEMORY].)
1699 * 1671 *
1700 * If the zonelist cache is not available for this zonelist, does 1672 * If the zonelist cache is not available for this zonelist, does
1701 * nothing and returns NULL. 1673 * nothing and returns NULL.
@@ -1724,7 +1696,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
1724 1696
1725 allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? 1697 allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?
1726 &cpuset_current_mems_allowed : 1698 &cpuset_current_mems_allowed :
1727 &node_states[N_HIGH_MEMORY]; 1699 &node_states[N_MEMORY];
1728 return allowednodes; 1700 return allowednodes;
1729} 1701}
1730 1702
@@ -2152,8 +2124,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2152 bool *contended_compaction, bool *deferred_compaction, 2124 bool *contended_compaction, bool *deferred_compaction,
2153 unsigned long *did_some_progress) 2125 unsigned long *did_some_progress)
2154{ 2126{
2155 struct page *page = NULL;
2156
2157 if (!order) 2127 if (!order)
2158 return NULL; 2128 return NULL;
2159 2129
@@ -2165,16 +2135,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2165 current->flags |= PF_MEMALLOC; 2135 current->flags |= PF_MEMALLOC;
2166 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, 2136 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
2167 nodemask, sync_migration, 2137 nodemask, sync_migration,
2168 contended_compaction, &page); 2138 contended_compaction);
2169 current->flags &= ~PF_MEMALLOC; 2139 current->flags &= ~PF_MEMALLOC;
2170 2140
2171 /* If compaction captured a page, prep and use it */
2172 if (page) {
2173 prep_new_page(page, order, gfp_mask);
2174 goto got_page;
2175 }
2176
2177 if (*did_some_progress != COMPACT_SKIPPED) { 2141 if (*did_some_progress != COMPACT_SKIPPED) {
2142 struct page *page;
2143
2178 /* Page migration frees to the PCP lists but we want merging */ 2144 /* Page migration frees to the PCP lists but we want merging */
2179 drain_pages(get_cpu()); 2145 drain_pages(get_cpu());
2180 put_cpu(); 2146 put_cpu();
@@ -2184,7 +2150,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2184 alloc_flags & ~ALLOC_NO_WATERMARKS, 2150 alloc_flags & ~ALLOC_NO_WATERMARKS,
2185 preferred_zone, migratetype); 2151 preferred_zone, migratetype);
2186 if (page) { 2152 if (page) {
2187got_page:
2188 preferred_zone->compact_blockskip_flush = false; 2153 preferred_zone->compact_blockskip_flush = false;
2189 preferred_zone->compact_considered = 0; 2154 preferred_zone->compact_considered = 0;
2190 preferred_zone->compact_defer_shift = 0; 2155 preferred_zone->compact_defer_shift = 0;
@@ -2601,6 +2566,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2601 int migratetype = allocflags_to_migratetype(gfp_mask); 2566 int migratetype = allocflags_to_migratetype(gfp_mask);
2602 unsigned int cpuset_mems_cookie; 2567 unsigned int cpuset_mems_cookie;
2603 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; 2568 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
2569 struct mem_cgroup *memcg = NULL;
2604 2570
2605 gfp_mask &= gfp_allowed_mask; 2571 gfp_mask &= gfp_allowed_mask;
2606 2572
@@ -2619,6 +2585,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2619 if (unlikely(!zonelist->_zonerefs->zone)) 2585 if (unlikely(!zonelist->_zonerefs->zone))
2620 return NULL; 2586 return NULL;
2621 2587
2588 /*
2589 * Will only have any effect when __GFP_KMEMCG is set. This is
2590 * verified in the (always inline) callee
2591 */
2592 if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
2593 return NULL;
2594
2622retry_cpuset: 2595retry_cpuset:
2623 cpuset_mems_cookie = get_mems_allowed(); 2596 cpuset_mems_cookie = get_mems_allowed();
2624 2597
@@ -2654,6 +2627,8 @@ out:
2654 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) 2627 if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
2655 goto retry_cpuset; 2628 goto retry_cpuset;
2656 2629
2630 memcg_kmem_commit_charge(page, memcg, order);
2631
2657 return page; 2632 return page;
2658} 2633}
2659EXPORT_SYMBOL(__alloc_pages_nodemask); 2634EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2706,6 +2681,31 @@ void free_pages(unsigned long addr, unsigned int order)
2706 2681
2707EXPORT_SYMBOL(free_pages); 2682EXPORT_SYMBOL(free_pages);
2708 2683
2684/*
2685 * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
2686 * pages allocated with __GFP_KMEMCG.
2687 *
2688 * Those pages are accounted to a particular memcg, embedded in the
2689 * corresponding page_cgroup. To avoid adding a hit in the allocator to search
2690 * for that information only to find out that it is NULL for users who have no
2691 * interest in that whatsoever, we provide these functions.
2692 *
2693 * The caller knows better which flags it relies on.
2694 */
2695void __free_memcg_kmem_pages(struct page *page, unsigned int order)
2696{
2697 memcg_kmem_uncharge_pages(page, order);
2698 __free_pages(page, order);
2699}
2700
2701void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
2702{
2703 if (addr != 0) {
2704 VM_BUG_ON(!virt_addr_valid((void *)addr));
2705 __free_memcg_kmem_pages(virt_to_page((void *)addr), order);
2706 }
2707}
2708
2709static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) 2709static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
2710{ 2710{
2711 if (addr) { 2711 if (addr) {
@@ -2981,6 +2981,7 @@ void show_free_areas(unsigned int filter)
2981 " isolated(anon):%lukB" 2981 " isolated(anon):%lukB"
2982 " isolated(file):%lukB" 2982 " isolated(file):%lukB"
2983 " present:%lukB" 2983 " present:%lukB"
2984 " managed:%lukB"
2984 " mlocked:%lukB" 2985 " mlocked:%lukB"
2985 " dirty:%lukB" 2986 " dirty:%lukB"
2986 " writeback:%lukB" 2987 " writeback:%lukB"
@@ -3010,6 +3011,7 @@ void show_free_areas(unsigned int filter)
3010 K(zone_page_state(zone, NR_ISOLATED_ANON)), 3011 K(zone_page_state(zone, NR_ISOLATED_ANON)),
3011 K(zone_page_state(zone, NR_ISOLATED_FILE)), 3012 K(zone_page_state(zone, NR_ISOLATED_FILE)),
3012 K(zone->present_pages), 3013 K(zone->present_pages),
3014 K(zone->managed_pages),
3013 K(zone_page_state(zone, NR_MLOCK)), 3015 K(zone_page_state(zone, NR_MLOCK)),
3014 K(zone_page_state(zone, NR_FILE_DIRTY)), 3016 K(zone_page_state(zone, NR_FILE_DIRTY)),
3015 K(zone_page_state(zone, NR_WRITEBACK)), 3017 K(zone_page_state(zone, NR_WRITEBACK)),
@@ -3238,7 +3240,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
3238 return node; 3240 return node;
3239 } 3241 }
3240 3242
3241 for_each_node_state(n, N_HIGH_MEMORY) { 3243 for_each_node_state(n, N_MEMORY) {
3242 3244
3243 /* Don't want a node to appear more than once */ 3245 /* Don't want a node to appear more than once */
3244 if (node_isset(n, *used_node_mask)) 3246 if (node_isset(n, *used_node_mask))
@@ -3380,7 +3382,7 @@ static int default_zonelist_order(void)
3380 * local memory, NODE_ORDER may be suitable. 3382 * local memory, NODE_ORDER may be suitable.
3381 */ 3383 */
3382 average_size = total_size / 3384 average_size = total_size /
3383 (nodes_weight(node_states[N_HIGH_MEMORY]) + 1); 3385 (nodes_weight(node_states[N_MEMORY]) + 1);
3384 for_each_online_node(nid) { 3386 for_each_online_node(nid) {
3385 low_kmem_size = 0; 3387 low_kmem_size = 0;
3386 total_size = 0; 3388 total_size = 0;
@@ -3870,6 +3872,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
3870 mminit_verify_page_links(page, zone, nid, pfn); 3872 mminit_verify_page_links(page, zone, nid, pfn);
3871 init_page_count(page); 3873 init_page_count(page);
3872 reset_page_mapcount(page); 3874 reset_page_mapcount(page);
3875 reset_page_last_nid(page);
3873 SetPageReserved(page); 3876 SetPageReserved(page);
3874 /* 3877 /*
3875 * Mark the block movable so that blocks are reserved for 3878 * Mark the block movable so that blocks are reserved for
@@ -4476,6 +4479,26 @@ void __init set_pageblock_order(void)
4476 4479
4477#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ 4480#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
4478 4481
4482static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
4483 unsigned long present_pages)
4484{
4485 unsigned long pages = spanned_pages;
4486
4487 /*
4488 * Provide a more accurate estimation if there are holes within
4489 * the zone and SPARSEMEM is in use. If there are holes within the
4490 * zone, each populated memory region may cost us one or two extra
4491 * memmap pages due to alignment because memmap pages for each
4492 * populated regions may not naturally algined on page boundary.
4493 * So the (present_pages >> 4) heuristic is a tradeoff for that.
4494 */
4495 if (spanned_pages > present_pages + (present_pages >> 4) &&
4496 IS_ENABLED(CONFIG_SPARSEMEM))
4497 pages = present_pages;
4498
4499 return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
4500}
4501
4479/* 4502/*
4480 * Set up the zone data structures: 4503 * Set up the zone data structures:
4481 * - mark all pages reserved 4504 * - mark all pages reserved
@@ -4493,54 +4516,67 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4493 int ret; 4516 int ret;
4494 4517
4495 pgdat_resize_init(pgdat); 4518 pgdat_resize_init(pgdat);
4519#ifdef CONFIG_NUMA_BALANCING
4520 spin_lock_init(&pgdat->numabalancing_migrate_lock);
4521 pgdat->numabalancing_migrate_nr_pages = 0;
4522 pgdat->numabalancing_migrate_next_window = jiffies;
4523#endif
4496 init_waitqueue_head(&pgdat->kswapd_wait); 4524 init_waitqueue_head(&pgdat->kswapd_wait);
4497 init_waitqueue_head(&pgdat->pfmemalloc_wait); 4525 init_waitqueue_head(&pgdat->pfmemalloc_wait);
4498 pgdat_page_cgroup_init(pgdat); 4526 pgdat_page_cgroup_init(pgdat);
4499 4527
4500 for (j = 0; j < MAX_NR_ZONES; j++) { 4528 for (j = 0; j < MAX_NR_ZONES; j++) {
4501 struct zone *zone = pgdat->node_zones + j; 4529 struct zone *zone = pgdat->node_zones + j;
4502 unsigned long size, realsize, memmap_pages; 4530 unsigned long size, realsize, freesize, memmap_pages;
4503 4531
4504 size = zone_spanned_pages_in_node(nid, j, zones_size); 4532 size = zone_spanned_pages_in_node(nid, j, zones_size);
4505 realsize = size - zone_absent_pages_in_node(nid, j, 4533 realsize = freesize = size - zone_absent_pages_in_node(nid, j,
4506 zholes_size); 4534 zholes_size);
4507 4535
4508 /* 4536 /*
4509 * Adjust realsize so that it accounts for how much memory 4537 * Adjust freesize so that it accounts for how much memory
4510 * is used by this zone for memmap. This affects the watermark 4538 * is used by this zone for memmap. This affects the watermark
4511 * and per-cpu initialisations 4539 * and per-cpu initialisations
4512 */ 4540 */
4513 memmap_pages = 4541 memmap_pages = calc_memmap_size(size, realsize);
4514 PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; 4542 if (freesize >= memmap_pages) {
4515 if (realsize >= memmap_pages) { 4543 freesize -= memmap_pages;
4516 realsize -= memmap_pages;
4517 if (memmap_pages) 4544 if (memmap_pages)
4518 printk(KERN_DEBUG 4545 printk(KERN_DEBUG
4519 " %s zone: %lu pages used for memmap\n", 4546 " %s zone: %lu pages used for memmap\n",
4520 zone_names[j], memmap_pages); 4547 zone_names[j], memmap_pages);
4521 } else 4548 } else
4522 printk(KERN_WARNING 4549 printk(KERN_WARNING
4523 " %s zone: %lu pages exceeds realsize %lu\n", 4550 " %s zone: %lu pages exceeds freesize %lu\n",
4524 zone_names[j], memmap_pages, realsize); 4551 zone_names[j], memmap_pages, freesize);
4525 4552
4526 /* Account for reserved pages */ 4553 /* Account for reserved pages */
4527 if (j == 0 && realsize > dma_reserve) { 4554 if (j == 0 && freesize > dma_reserve) {
4528 realsize -= dma_reserve; 4555 freesize -= dma_reserve;
4529 printk(KERN_DEBUG " %s zone: %lu pages reserved\n", 4556 printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
4530 zone_names[0], dma_reserve); 4557 zone_names[0], dma_reserve);
4531 } 4558 }
4532 4559
4533 if (!is_highmem_idx(j)) 4560 if (!is_highmem_idx(j))
4534 nr_kernel_pages += realsize; 4561 nr_kernel_pages += freesize;
4535 nr_all_pages += realsize; 4562 /* Charge for highmem memmap if there are enough kernel pages */
4563 else if (nr_kernel_pages > memmap_pages * 2)
4564 nr_kernel_pages -= memmap_pages;
4565 nr_all_pages += freesize;
4536 4566
4537 zone->spanned_pages = size; 4567 zone->spanned_pages = size;
4538 zone->present_pages = realsize; 4568 zone->present_pages = freesize;
4569 /*
4570 * Set an approximate value for lowmem here, it will be adjusted
4571 * when the bootmem allocator frees pages into the buddy system.
4572 * And all highmem pages will be managed by the buddy system.
4573 */
4574 zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
4539#ifdef CONFIG_NUMA 4575#ifdef CONFIG_NUMA
4540 zone->node = nid; 4576 zone->node = nid;
4541 zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) 4577 zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio)
4542 / 100; 4578 / 100;
4543 zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; 4579 zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;
4544#endif 4580#endif
4545 zone->name = zone_names[j]; 4581 zone->name = zone_names[j];
4546 spin_lock_init(&zone->lock); 4582 spin_lock_init(&zone->lock);
@@ -4731,7 +4767,7 @@ unsigned long __init find_min_pfn_with_active_regions(void)
4731/* 4767/*
4732 * early_calculate_totalpages() 4768 * early_calculate_totalpages()
4733 * Sum pages in active regions for movable zone. 4769 * Sum pages in active regions for movable zone.
4734 * Populate N_HIGH_MEMORY for calculating usable_nodes. 4770 * Populate N_MEMORY for calculating usable_nodes.
4735 */ 4771 */
4736static unsigned long __init early_calculate_totalpages(void) 4772static unsigned long __init early_calculate_totalpages(void)
4737{ 4773{
@@ -4744,7 +4780,7 @@ static unsigned long __init early_calculate_totalpages(void)
4744 4780
4745 totalpages += pages; 4781 totalpages += pages;
4746 if (pages) 4782 if (pages)
4747 node_set_state(nid, N_HIGH_MEMORY); 4783 node_set_state(nid, N_MEMORY);
4748 } 4784 }
4749 return totalpages; 4785 return totalpages;
4750} 4786}
@@ -4761,9 +4797,9 @@ static void __init find_zone_movable_pfns_for_nodes(void)
4761 unsigned long usable_startpfn; 4797 unsigned long usable_startpfn;
4762 unsigned long kernelcore_node, kernelcore_remaining; 4798 unsigned long kernelcore_node, kernelcore_remaining;
4763 /* save the state before borrow the nodemask */ 4799 /* save the state before borrow the nodemask */
4764 nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; 4800 nodemask_t saved_node_state = node_states[N_MEMORY];
4765 unsigned long totalpages = early_calculate_totalpages(); 4801 unsigned long totalpages = early_calculate_totalpages();
4766 int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); 4802 int usable_nodes = nodes_weight(node_states[N_MEMORY]);
4767 4803
4768 /* 4804 /*
4769 * If movablecore was specified, calculate what size of 4805 * If movablecore was specified, calculate what size of
@@ -4798,7 +4834,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
4798restart: 4834restart:
4799 /* Spread kernelcore memory as evenly as possible throughout nodes */ 4835 /* Spread kernelcore memory as evenly as possible throughout nodes */
4800 kernelcore_node = required_kernelcore / usable_nodes; 4836 kernelcore_node = required_kernelcore / usable_nodes;
4801 for_each_node_state(nid, N_HIGH_MEMORY) { 4837 for_each_node_state(nid, N_MEMORY) {
4802 unsigned long start_pfn, end_pfn; 4838 unsigned long start_pfn, end_pfn;
4803 4839
4804 /* 4840 /*
@@ -4890,23 +4926,27 @@ restart:
4890 4926
4891out: 4927out:
4892 /* restore the node_state */ 4928 /* restore the node_state */
4893 node_states[N_HIGH_MEMORY] = saved_node_state; 4929 node_states[N_MEMORY] = saved_node_state;
4894} 4930}
4895 4931
4896/* Any regular memory on that node ? */ 4932/* Any regular or high memory on that node ? */
4897static void __init check_for_regular_memory(pg_data_t *pgdat) 4933static void check_for_memory(pg_data_t *pgdat, int nid)
4898{ 4934{
4899#ifdef CONFIG_HIGHMEM
4900 enum zone_type zone_type; 4935 enum zone_type zone_type;
4901 4936
4902 for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { 4937 if (N_MEMORY == N_NORMAL_MEMORY)
4938 return;
4939
4940 for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) {
4903 struct zone *zone = &pgdat->node_zones[zone_type]; 4941 struct zone *zone = &pgdat->node_zones[zone_type];
4904 if (zone->present_pages) { 4942 if (zone->present_pages) {
4905 node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); 4943 node_set_state(nid, N_HIGH_MEMORY);
4944 if (N_NORMAL_MEMORY != N_HIGH_MEMORY &&
4945 zone_type <= ZONE_NORMAL)
4946 node_set_state(nid, N_NORMAL_MEMORY);
4906 break; 4947 break;
4907 } 4948 }
4908 } 4949 }
4909#endif
4910} 4950}
4911 4951
4912/** 4952/**
@@ -4989,8 +5029,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
4989 5029
4990 /* Any memory on that node */ 5030 /* Any memory on that node */
4991 if (pgdat->node_present_pages) 5031 if (pgdat->node_present_pages)
4992 node_set_state(nid, N_HIGH_MEMORY); 5032 node_set_state(nid, N_MEMORY);
4993 check_for_regular_memory(pgdat); 5033 check_for_memory(pgdat, nid);
4994 } 5034 }
4995} 5035}
4996 5036
@@ -5545,7 +5585,7 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
5545 pfn &= (PAGES_PER_SECTION-1); 5585 pfn &= (PAGES_PER_SECTION-1);
5546 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; 5586 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
5547#else 5587#else
5548 pfn = pfn - zone->zone_start_pfn; 5588 pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages);
5549 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; 5589 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
5550#endif /* CONFIG_SPARSEMEM */ 5590#endif /* CONFIG_SPARSEMEM */
5551} 5591}
@@ -5727,7 +5767,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
5727 unsigned int tries = 0; 5767 unsigned int tries = 0;
5728 int ret = 0; 5768 int ret = 0;
5729 5769
5730 migrate_prep_local(); 5770 migrate_prep();
5731 5771
5732 while (pfn < end || !list_empty(&cc->migratepages)) { 5772 while (pfn < end || !list_empty(&cc->migratepages)) {
5733 if (fatal_signal_pending(current)) { 5773 if (fatal_signal_pending(current)) {
@@ -5755,7 +5795,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
5755 5795
5756 ret = migrate_pages(&cc->migratepages, 5796 ret = migrate_pages(&cc->migratepages,
5757 alloc_migrate_target, 5797 alloc_migrate_target,
5758 0, false, MIGRATE_SYNC); 5798 0, false, MIGRATE_SYNC,
5799 MR_CMA);
5759 } 5800 }
5760 5801
5761 putback_movable_pages(&cc->migratepages); 5802 putback_movable_pages(&cc->migratepages);
@@ -5891,8 +5932,15 @@ done:
5891 5932
5892void free_contig_range(unsigned long pfn, unsigned nr_pages) 5933void free_contig_range(unsigned long pfn, unsigned nr_pages)
5893{ 5934{
5894 for (; nr_pages--; ++pfn) 5935 unsigned int count = 0;
5895 __free_page(pfn_to_page(pfn)); 5936
5937 for (; nr_pages--; pfn++) {
5938 struct page *page = pfn_to_page(pfn);
5939
5940 count += page_count(page) != 1;
5941 __free_page(page);
5942 }
5943 WARN(count != 0, "%d pages are still in use!\n", count);
5896} 5944}
5897#endif 5945#endif
5898 5946