diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 416 |
1 files changed, 233 insertions, 183 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7e208f0ad68c..df2022ff0c8a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -90,6 +90,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = { | |||
90 | #ifdef CONFIG_HIGHMEM | 90 | #ifdef CONFIG_HIGHMEM |
91 | [N_HIGH_MEMORY] = { { [0] = 1UL } }, | 91 | [N_HIGH_MEMORY] = { { [0] = 1UL } }, |
92 | #endif | 92 | #endif |
93 | #ifdef CONFIG_MOVABLE_NODE | ||
94 | [N_MEMORY] = { { [0] = 1UL } }, | ||
95 | #endif | ||
93 | [N_CPU] = { { [0] = 1UL } }, | 96 | [N_CPU] = { { [0] = 1UL } }, |
94 | #endif /* NUMA */ | 97 | #endif /* NUMA */ |
95 | }; | 98 | }; |
@@ -218,11 +221,6 @@ EXPORT_SYMBOL(nr_online_nodes); | |||
218 | 221 | ||
219 | int page_group_by_mobility_disabled __read_mostly; | 222 | int page_group_by_mobility_disabled __read_mostly; |
220 | 223 | ||
221 | /* | ||
222 | * NOTE: | ||
223 | * Don't use set_pageblock_migratetype(page, MIGRATE_ISOLATE) directly. | ||
224 | * Instead, use {un}set_pageblock_isolate. | ||
225 | */ | ||
226 | void set_pageblock_migratetype(struct page *page, int migratetype) | 224 | void set_pageblock_migratetype(struct page *page, int migratetype) |
227 | { | 225 | { |
228 | 226 | ||
@@ -368,8 +366,7 @@ static int destroy_compound_page(struct page *page, unsigned long order) | |||
368 | int nr_pages = 1 << order; | 366 | int nr_pages = 1 << order; |
369 | int bad = 0; | 367 | int bad = 0; |
370 | 368 | ||
371 | if (unlikely(compound_order(page) != order) || | 369 | if (unlikely(compound_order(page) != order)) { |
372 | unlikely(!PageHead(page))) { | ||
373 | bad_page(page); | 370 | bad_page(page); |
374 | bad++; | 371 | bad++; |
375 | } | 372 | } |
@@ -523,7 +520,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
523 | * If a block is freed, and its buddy is also free, then this | 520 | * If a block is freed, and its buddy is also free, then this |
524 | * triggers coalescing into a block of larger size. | 521 | * triggers coalescing into a block of larger size. |
525 | * | 522 | * |
526 | * -- wli | 523 | * -- nyc |
527 | */ | 524 | */ |
528 | 525 | ||
529 | static inline void __free_one_page(struct page *page, | 526 | static inline void __free_one_page(struct page *page, |
@@ -608,6 +605,7 @@ static inline int free_pages_check(struct page *page) | |||
608 | bad_page(page); | 605 | bad_page(page); |
609 | return 1; | 606 | return 1; |
610 | } | 607 | } |
608 | reset_page_last_nid(page); | ||
611 | if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) | 609 | if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) |
612 | page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; | 610 | page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; |
613 | return 0; | 611 | return 0; |
@@ -667,11 +665,13 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
667 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ | 665 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ |
668 | __free_one_page(page, zone, 0, mt); | 666 | __free_one_page(page, zone, 0, mt); |
669 | trace_mm_page_pcpu_drain(page, 0, mt); | 667 | trace_mm_page_pcpu_drain(page, 0, mt); |
670 | if (is_migrate_cma(mt)) | 668 | if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) { |
671 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); | 669 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1); |
670 | if (is_migrate_cma(mt)) | ||
671 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); | ||
672 | } | ||
672 | } while (--to_free && --batch_free && !list_empty(list)); | 673 | } while (--to_free && --batch_free && !list_empty(list)); |
673 | } | 674 | } |
674 | __mod_zone_page_state(zone, NR_FREE_PAGES, count); | ||
675 | spin_unlock(&zone->lock); | 675 | spin_unlock(&zone->lock); |
676 | } | 676 | } |
677 | 677 | ||
@@ -730,6 +730,13 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
730 | local_irq_restore(flags); | 730 | local_irq_restore(flags); |
731 | } | 731 | } |
732 | 732 | ||
733 | /* | ||
734 | * Read access to zone->managed_pages is safe because it's unsigned long, | ||
735 | * but we still need to serialize writers. Currently all callers of | ||
736 | * __free_pages_bootmem() except put_page_bootmem() should only be used | ||
737 | * at boot time. So for shorter boot time, we shift the burden to | ||
738 | * put_page_bootmem() to serialize writers. | ||
739 | */ | ||
733 | void __meminit __free_pages_bootmem(struct page *page, unsigned int order) | 740 | void __meminit __free_pages_bootmem(struct page *page, unsigned int order) |
734 | { | 741 | { |
735 | unsigned int nr_pages = 1 << order; | 742 | unsigned int nr_pages = 1 << order; |
@@ -745,6 +752,7 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) | |||
745 | set_page_count(p, 0); | 752 | set_page_count(p, 0); |
746 | } | 753 | } |
747 | 754 | ||
755 | page_zone(page)->managed_pages += 1 << order; | ||
748 | set_page_refcounted(page); | 756 | set_page_refcounted(page); |
749 | __free_pages(page, order); | 757 | __free_pages(page, order); |
750 | } | 758 | } |
@@ -780,7 +788,7 @@ void __init init_cma_reserved_pageblock(struct page *page) | |||
780 | * large block of memory acted on by a series of small allocations. | 788 | * large block of memory acted on by a series of small allocations. |
781 | * This behavior is a critical factor in sglist merging's success. | 789 | * This behavior is a critical factor in sglist merging's success. |
782 | * | 790 | * |
783 | * -- wli | 791 | * -- nyc |
784 | */ | 792 | */ |
785 | static inline void expand(struct zone *zone, struct page *page, | 793 | static inline void expand(struct zone *zone, struct page *page, |
786 | int low, int high, struct free_area *area, | 794 | int low, int high, struct free_area *area, |
@@ -1376,14 +1384,8 @@ void split_page(struct page *page, unsigned int order) | |||
1376 | set_page_refcounted(page + i); | 1384 | set_page_refcounted(page + i); |
1377 | } | 1385 | } |
1378 | 1386 | ||
1379 | /* | 1387 | static int __isolate_free_page(struct page *page, unsigned int order) |
1380 | * Similar to the split_page family of functions except that the page | ||
1381 | * required at the given order and being isolated now to prevent races | ||
1382 | * with parallel allocators | ||
1383 | */ | ||
1384 | int capture_free_page(struct page *page, int alloc_order, int migratetype) | ||
1385 | { | 1388 | { |
1386 | unsigned int order; | ||
1387 | unsigned long watermark; | 1389 | unsigned long watermark; |
1388 | struct zone *zone; | 1390 | struct zone *zone; |
1389 | int mt; | 1391 | int mt; |
@@ -1391,27 +1393,23 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) | |||
1391 | BUG_ON(!PageBuddy(page)); | 1393 | BUG_ON(!PageBuddy(page)); |
1392 | 1394 | ||
1393 | zone = page_zone(page); | 1395 | zone = page_zone(page); |
1394 | order = page_order(page); | 1396 | mt = get_pageblock_migratetype(page); |
1395 | 1397 | ||
1396 | /* Obey watermarks as if the page was being allocated */ | 1398 | if (mt != MIGRATE_ISOLATE) { |
1397 | watermark = low_wmark_pages(zone) + (1 << order); | 1399 | /* Obey watermarks as if the page was being allocated */ |
1398 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) | 1400 | watermark = low_wmark_pages(zone) + (1 << order); |
1399 | return 0; | 1401 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) |
1402 | return 0; | ||
1403 | |||
1404 | __mod_zone_freepage_state(zone, -(1UL << order), mt); | ||
1405 | } | ||
1400 | 1406 | ||
1401 | /* Remove page from free list */ | 1407 | /* Remove page from free list */ |
1402 | list_del(&page->lru); | 1408 | list_del(&page->lru); |
1403 | zone->free_area[order].nr_free--; | 1409 | zone->free_area[order].nr_free--; |
1404 | rmv_page_order(page); | 1410 | rmv_page_order(page); |
1405 | 1411 | ||
1406 | mt = get_pageblock_migratetype(page); | 1412 | /* Set the pageblock if the isolated page is at least a pageblock */ |
1407 | if (unlikely(mt != MIGRATE_ISOLATE)) | ||
1408 | __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); | ||
1409 | |||
1410 | if (alloc_order != order) | ||
1411 | expand(zone, page, alloc_order, order, | ||
1412 | &zone->free_area[order], migratetype); | ||
1413 | |||
1414 | /* Set the pageblock if the captured page is at least a pageblock */ | ||
1415 | if (order >= pageblock_order - 1) { | 1413 | if (order >= pageblock_order - 1) { |
1416 | struct page *endpage = page + (1 << order) - 1; | 1414 | struct page *endpage = page + (1 << order) - 1; |
1417 | for (; page < endpage; page += pageblock_nr_pages) { | 1415 | for (; page < endpage; page += pageblock_nr_pages) { |
@@ -1422,7 +1420,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) | |||
1422 | } | 1420 | } |
1423 | } | 1421 | } |
1424 | 1422 | ||
1425 | return 1UL << alloc_order; | 1423 | return 1UL << order; |
1426 | } | 1424 | } |
1427 | 1425 | ||
1428 | /* | 1426 | /* |
@@ -1440,10 +1438,9 @@ int split_free_page(struct page *page) | |||
1440 | unsigned int order; | 1438 | unsigned int order; |
1441 | int nr_pages; | 1439 | int nr_pages; |
1442 | 1440 | ||
1443 | BUG_ON(!PageBuddy(page)); | ||
1444 | order = page_order(page); | 1441 | order = page_order(page); |
1445 | 1442 | ||
1446 | nr_pages = capture_free_page(page, order, 0); | 1443 | nr_pages = __isolate_free_page(page, order); |
1447 | if (!nr_pages) | 1444 | if (!nr_pages) |
1448 | return 0; | 1445 | return 0; |
1449 | 1446 | ||
@@ -1641,20 +1638,6 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
1641 | return true; | 1638 | return true; |
1642 | } | 1639 | } |
1643 | 1640 | ||
1644 | #ifdef CONFIG_MEMORY_ISOLATION | ||
1645 | static inline unsigned long nr_zone_isolate_freepages(struct zone *zone) | ||
1646 | { | ||
1647 | if (unlikely(zone->nr_pageblock_isolate)) | ||
1648 | return zone->nr_pageblock_isolate * pageblock_nr_pages; | ||
1649 | return 0; | ||
1650 | } | ||
1651 | #else | ||
1652 | static inline unsigned long nr_zone_isolate_freepages(struct zone *zone) | ||
1653 | { | ||
1654 | return 0; | ||
1655 | } | ||
1656 | #endif | ||
1657 | |||
1658 | bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, | 1641 | bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, |
1659 | int classzone_idx, int alloc_flags) | 1642 | int classzone_idx, int alloc_flags) |
1660 | { | 1643 | { |
@@ -1670,14 +1653,6 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, | |||
1670 | if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) | 1653 | if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) |
1671 | free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); | 1654 | free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); |
1672 | 1655 | ||
1673 | /* | ||
1674 | * If the zone has MIGRATE_ISOLATE type free pages, we should consider | ||
1675 | * it. nr_zone_isolate_freepages is never accurate so kswapd might not | ||
1676 | * sleep although it could do so. But this is more desirable for memory | ||
1677 | * hotplug than sleeping which can cause a livelock in the direct | ||
1678 | * reclaim path. | ||
1679 | */ | ||
1680 | free_pages -= nr_zone_isolate_freepages(z); | ||
1681 | return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, | 1656 | return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, |
1682 | free_pages); | 1657 | free_pages); |
1683 | } | 1658 | } |
@@ -1692,7 +1667,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, | |||
1692 | * | 1667 | * |
1693 | * If the zonelist cache is present in the passed in zonelist, then | 1668 | * If the zonelist cache is present in the passed in zonelist, then |
1694 | * returns a pointer to the allowed node mask (either the current | 1669 | * returns a pointer to the allowed node mask (either the current |
1695 | * tasks mems_allowed, or node_states[N_HIGH_MEMORY].) | 1670 | * tasks mems_allowed, or node_states[N_MEMORY].) |
1696 | * | 1671 | * |
1697 | * If the zonelist cache is not available for this zonelist, does | 1672 | * If the zonelist cache is not available for this zonelist, does |
1698 | * nothing and returns NULL. | 1673 | * nothing and returns NULL. |
@@ -1721,7 +1696,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) | |||
1721 | 1696 | ||
1722 | allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? | 1697 | allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? |
1723 | &cpuset_current_mems_allowed : | 1698 | &cpuset_current_mems_allowed : |
1724 | &node_states[N_HIGH_MEMORY]; | 1699 | &node_states[N_MEMORY]; |
1725 | return allowednodes; | 1700 | return allowednodes; |
1726 | } | 1701 | } |
1727 | 1702 | ||
@@ -1871,7 +1846,7 @@ zonelist_scan: | |||
1871 | */ | 1846 | */ |
1872 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 1847 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
1873 | high_zoneidx, nodemask) { | 1848 | high_zoneidx, nodemask) { |
1874 | if (NUMA_BUILD && zlc_active && | 1849 | if (IS_ENABLED(CONFIG_NUMA) && zlc_active && |
1875 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | 1850 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) |
1876 | continue; | 1851 | continue; |
1877 | if ((alloc_flags & ALLOC_CPUSET) && | 1852 | if ((alloc_flags & ALLOC_CPUSET) && |
@@ -1917,7 +1892,8 @@ zonelist_scan: | |||
1917 | classzone_idx, alloc_flags)) | 1892 | classzone_idx, alloc_flags)) |
1918 | goto try_this_zone; | 1893 | goto try_this_zone; |
1919 | 1894 | ||
1920 | if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { | 1895 | if (IS_ENABLED(CONFIG_NUMA) && |
1896 | !did_zlc_setup && nr_online_nodes > 1) { | ||
1921 | /* | 1897 | /* |
1922 | * we do zlc_setup if there are multiple nodes | 1898 | * we do zlc_setup if there are multiple nodes |
1923 | * and before considering the first zone allowed | 1899 | * and before considering the first zone allowed |
@@ -1936,7 +1912,7 @@ zonelist_scan: | |||
1936 | * As we may have just activated ZLC, check if the first | 1912 | * As we may have just activated ZLC, check if the first |
1937 | * eligible zone has failed zone_reclaim recently. | 1913 | * eligible zone has failed zone_reclaim recently. |
1938 | */ | 1914 | */ |
1939 | if (NUMA_BUILD && zlc_active && | 1915 | if (IS_ENABLED(CONFIG_NUMA) && zlc_active && |
1940 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | 1916 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) |
1941 | continue; | 1917 | continue; |
1942 | 1918 | ||
@@ -1962,11 +1938,11 @@ try_this_zone: | |||
1962 | if (page) | 1938 | if (page) |
1963 | break; | 1939 | break; |
1964 | this_zone_full: | 1940 | this_zone_full: |
1965 | if (NUMA_BUILD) | 1941 | if (IS_ENABLED(CONFIG_NUMA)) |
1966 | zlc_mark_zone_full(zonelist, z); | 1942 | zlc_mark_zone_full(zonelist, z); |
1967 | } | 1943 | } |
1968 | 1944 | ||
1969 | if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { | 1945 | if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) { |
1970 | /* Disable zlc cache for second zonelist scan */ | 1946 | /* Disable zlc cache for second zonelist scan */ |
1971 | zlc_active = 0; | 1947 | zlc_active = 0; |
1972 | goto zonelist_scan; | 1948 | goto zonelist_scan; |
@@ -2148,8 +2124,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2148 | bool *contended_compaction, bool *deferred_compaction, | 2124 | bool *contended_compaction, bool *deferred_compaction, |
2149 | unsigned long *did_some_progress) | 2125 | unsigned long *did_some_progress) |
2150 | { | 2126 | { |
2151 | struct page *page = NULL; | ||
2152 | |||
2153 | if (!order) | 2127 | if (!order) |
2154 | return NULL; | 2128 | return NULL; |
2155 | 2129 | ||
@@ -2161,16 +2135,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2161 | current->flags |= PF_MEMALLOC; | 2135 | current->flags |= PF_MEMALLOC; |
2162 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | 2136 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, |
2163 | nodemask, sync_migration, | 2137 | nodemask, sync_migration, |
2164 | contended_compaction, &page); | 2138 | contended_compaction); |
2165 | current->flags &= ~PF_MEMALLOC; | 2139 | current->flags &= ~PF_MEMALLOC; |
2166 | 2140 | ||
2167 | /* If compaction captured a page, prep and use it */ | ||
2168 | if (page) { | ||
2169 | prep_new_page(page, order, gfp_mask); | ||
2170 | goto got_page; | ||
2171 | } | ||
2172 | |||
2173 | if (*did_some_progress != COMPACT_SKIPPED) { | 2141 | if (*did_some_progress != COMPACT_SKIPPED) { |
2142 | struct page *page; | ||
2143 | |||
2174 | /* Page migration frees to the PCP lists but we want merging */ | 2144 | /* Page migration frees to the PCP lists but we want merging */ |
2175 | drain_pages(get_cpu()); | 2145 | drain_pages(get_cpu()); |
2176 | put_cpu(); | 2146 | put_cpu(); |
@@ -2180,7 +2150,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2180 | alloc_flags & ~ALLOC_NO_WATERMARKS, | 2150 | alloc_flags & ~ALLOC_NO_WATERMARKS, |
2181 | preferred_zone, migratetype); | 2151 | preferred_zone, migratetype); |
2182 | if (page) { | 2152 | if (page) { |
2183 | got_page: | ||
2184 | preferred_zone->compact_blockskip_flush = false; | 2153 | preferred_zone->compact_blockskip_flush = false; |
2185 | preferred_zone->compact_considered = 0; | 2154 | preferred_zone->compact_considered = 0; |
2186 | preferred_zone->compact_defer_shift = 0; | 2155 | preferred_zone->compact_defer_shift = 0; |
@@ -2266,7 +2235,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
2266 | return NULL; | 2235 | return NULL; |
2267 | 2236 | ||
2268 | /* After successful reclaim, reconsider all zones for allocation */ | 2237 | /* After successful reclaim, reconsider all zones for allocation */ |
2269 | if (NUMA_BUILD) | 2238 | if (IS_ENABLED(CONFIG_NUMA)) |
2270 | zlc_clear_zones_full(zonelist); | 2239 | zlc_clear_zones_full(zonelist); |
2271 | 2240 | ||
2272 | retry: | 2241 | retry: |
@@ -2412,7 +2381,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2412 | * allowed per node queues are empty and that nodes are | 2381 | * allowed per node queues are empty and that nodes are |
2413 | * over allocated. | 2382 | * over allocated. |
2414 | */ | 2383 | */ |
2415 | if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | 2384 | if (IS_ENABLED(CONFIG_NUMA) && |
2385 | (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | ||
2416 | goto nopage; | 2386 | goto nopage; |
2417 | 2387 | ||
2418 | restart: | 2388 | restart: |
@@ -2596,6 +2566,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2596 | int migratetype = allocflags_to_migratetype(gfp_mask); | 2566 | int migratetype = allocflags_to_migratetype(gfp_mask); |
2597 | unsigned int cpuset_mems_cookie; | 2567 | unsigned int cpuset_mems_cookie; |
2598 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; | 2568 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; |
2569 | struct mem_cgroup *memcg = NULL; | ||
2599 | 2570 | ||
2600 | gfp_mask &= gfp_allowed_mask; | 2571 | gfp_mask &= gfp_allowed_mask; |
2601 | 2572 | ||
@@ -2614,6 +2585,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2614 | if (unlikely(!zonelist->_zonerefs->zone)) | 2585 | if (unlikely(!zonelist->_zonerefs->zone)) |
2615 | return NULL; | 2586 | return NULL; |
2616 | 2587 | ||
2588 | /* | ||
2589 | * Will only have any effect when __GFP_KMEMCG is set. This is | ||
2590 | * verified in the (always inline) callee | ||
2591 | */ | ||
2592 | if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order)) | ||
2593 | return NULL; | ||
2594 | |||
2617 | retry_cpuset: | 2595 | retry_cpuset: |
2618 | cpuset_mems_cookie = get_mems_allowed(); | 2596 | cpuset_mems_cookie = get_mems_allowed(); |
2619 | 2597 | ||
@@ -2649,6 +2627,8 @@ out: | |||
2649 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) | 2627 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) |
2650 | goto retry_cpuset; | 2628 | goto retry_cpuset; |
2651 | 2629 | ||
2630 | memcg_kmem_commit_charge(page, memcg, order); | ||
2631 | |||
2652 | return page; | 2632 | return page; |
2653 | } | 2633 | } |
2654 | EXPORT_SYMBOL(__alloc_pages_nodemask); | 2634 | EXPORT_SYMBOL(__alloc_pages_nodemask); |
@@ -2701,6 +2681,31 @@ void free_pages(unsigned long addr, unsigned int order) | |||
2701 | 2681 | ||
2702 | EXPORT_SYMBOL(free_pages); | 2682 | EXPORT_SYMBOL(free_pages); |
2703 | 2683 | ||
2684 | /* | ||
2685 | * __free_memcg_kmem_pages and free_memcg_kmem_pages will free | ||
2686 | * pages allocated with __GFP_KMEMCG. | ||
2687 | * | ||
2688 | * Those pages are accounted to a particular memcg, embedded in the | ||
2689 | * corresponding page_cgroup. To avoid adding a hit in the allocator to search | ||
2690 | * for that information only to find out that it is NULL for users who have no | ||
2691 | * interest in that whatsoever, we provide these functions. | ||
2692 | * | ||
2693 | * The caller knows better which flags it relies on. | ||
2694 | */ | ||
2695 | void __free_memcg_kmem_pages(struct page *page, unsigned int order) | ||
2696 | { | ||
2697 | memcg_kmem_uncharge_pages(page, order); | ||
2698 | __free_pages(page, order); | ||
2699 | } | ||
2700 | |||
2701 | void free_memcg_kmem_pages(unsigned long addr, unsigned int order) | ||
2702 | { | ||
2703 | if (addr != 0) { | ||
2704 | VM_BUG_ON(!virt_addr_valid((void *)addr)); | ||
2705 | __free_memcg_kmem_pages(virt_to_page((void *)addr), order); | ||
2706 | } | ||
2707 | } | ||
2708 | |||
2704 | static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) | 2709 | static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) |
2705 | { | 2710 | { |
2706 | if (addr) { | 2711 | if (addr) { |
@@ -2819,7 +2824,7 @@ unsigned int nr_free_pagecache_pages(void) | |||
2819 | 2824 | ||
2820 | static inline void show_node(struct zone *zone) | 2825 | static inline void show_node(struct zone *zone) |
2821 | { | 2826 | { |
2822 | if (NUMA_BUILD) | 2827 | if (IS_ENABLED(CONFIG_NUMA)) |
2823 | printk("Node %d ", zone_to_nid(zone)); | 2828 | printk("Node %d ", zone_to_nid(zone)); |
2824 | } | 2829 | } |
2825 | 2830 | ||
@@ -2877,6 +2882,31 @@ out: | |||
2877 | 2882 | ||
2878 | #define K(x) ((x) << (PAGE_SHIFT-10)) | 2883 | #define K(x) ((x) << (PAGE_SHIFT-10)) |
2879 | 2884 | ||
2885 | static void show_migration_types(unsigned char type) | ||
2886 | { | ||
2887 | static const char types[MIGRATE_TYPES] = { | ||
2888 | [MIGRATE_UNMOVABLE] = 'U', | ||
2889 | [MIGRATE_RECLAIMABLE] = 'E', | ||
2890 | [MIGRATE_MOVABLE] = 'M', | ||
2891 | [MIGRATE_RESERVE] = 'R', | ||
2892 | #ifdef CONFIG_CMA | ||
2893 | [MIGRATE_CMA] = 'C', | ||
2894 | #endif | ||
2895 | [MIGRATE_ISOLATE] = 'I', | ||
2896 | }; | ||
2897 | char tmp[MIGRATE_TYPES + 1]; | ||
2898 | char *p = tmp; | ||
2899 | int i; | ||
2900 | |||
2901 | for (i = 0; i < MIGRATE_TYPES; i++) { | ||
2902 | if (type & (1 << i)) | ||
2903 | *p++ = types[i]; | ||
2904 | } | ||
2905 | |||
2906 | *p = '\0'; | ||
2907 | printk("(%s) ", tmp); | ||
2908 | } | ||
2909 | |||
2880 | /* | 2910 | /* |
2881 | * Show free area list (used inside shift_scroll-lock stuff) | 2911 | * Show free area list (used inside shift_scroll-lock stuff) |
2882 | * We also calculate the percentage fragmentation. We do this by counting the | 2912 | * We also calculate the percentage fragmentation. We do this by counting the |
@@ -2951,6 +2981,7 @@ void show_free_areas(unsigned int filter) | |||
2951 | " isolated(anon):%lukB" | 2981 | " isolated(anon):%lukB" |
2952 | " isolated(file):%lukB" | 2982 | " isolated(file):%lukB" |
2953 | " present:%lukB" | 2983 | " present:%lukB" |
2984 | " managed:%lukB" | ||
2954 | " mlocked:%lukB" | 2985 | " mlocked:%lukB" |
2955 | " dirty:%lukB" | 2986 | " dirty:%lukB" |
2956 | " writeback:%lukB" | 2987 | " writeback:%lukB" |
@@ -2980,6 +3011,7 @@ void show_free_areas(unsigned int filter) | |||
2980 | K(zone_page_state(zone, NR_ISOLATED_ANON)), | 3011 | K(zone_page_state(zone, NR_ISOLATED_ANON)), |
2981 | K(zone_page_state(zone, NR_ISOLATED_FILE)), | 3012 | K(zone_page_state(zone, NR_ISOLATED_FILE)), |
2982 | K(zone->present_pages), | 3013 | K(zone->present_pages), |
3014 | K(zone->managed_pages), | ||
2983 | K(zone_page_state(zone, NR_MLOCK)), | 3015 | K(zone_page_state(zone, NR_MLOCK)), |
2984 | K(zone_page_state(zone, NR_FILE_DIRTY)), | 3016 | K(zone_page_state(zone, NR_FILE_DIRTY)), |
2985 | K(zone_page_state(zone, NR_WRITEBACK)), | 3017 | K(zone_page_state(zone, NR_WRITEBACK)), |
@@ -3005,6 +3037,7 @@ void show_free_areas(unsigned int filter) | |||
3005 | 3037 | ||
3006 | for_each_populated_zone(zone) { | 3038 | for_each_populated_zone(zone) { |
3007 | unsigned long nr[MAX_ORDER], flags, order, total = 0; | 3039 | unsigned long nr[MAX_ORDER], flags, order, total = 0; |
3040 | unsigned char types[MAX_ORDER]; | ||
3008 | 3041 | ||
3009 | if (skip_free_areas_node(filter, zone_to_nid(zone))) | 3042 | if (skip_free_areas_node(filter, zone_to_nid(zone))) |
3010 | continue; | 3043 | continue; |
@@ -3013,12 +3046,24 @@ void show_free_areas(unsigned int filter) | |||
3013 | 3046 | ||
3014 | spin_lock_irqsave(&zone->lock, flags); | 3047 | spin_lock_irqsave(&zone->lock, flags); |
3015 | for (order = 0; order < MAX_ORDER; order++) { | 3048 | for (order = 0; order < MAX_ORDER; order++) { |
3016 | nr[order] = zone->free_area[order].nr_free; | 3049 | struct free_area *area = &zone->free_area[order]; |
3050 | int type; | ||
3051 | |||
3052 | nr[order] = area->nr_free; | ||
3017 | total += nr[order] << order; | 3053 | total += nr[order] << order; |
3054 | |||
3055 | types[order] = 0; | ||
3056 | for (type = 0; type < MIGRATE_TYPES; type++) { | ||
3057 | if (!list_empty(&area->free_list[type])) | ||
3058 | types[order] |= 1 << type; | ||
3059 | } | ||
3018 | } | 3060 | } |
3019 | spin_unlock_irqrestore(&zone->lock, flags); | 3061 | spin_unlock_irqrestore(&zone->lock, flags); |
3020 | for (order = 0; order < MAX_ORDER; order++) | 3062 | for (order = 0; order < MAX_ORDER; order++) { |
3021 | printk("%lu*%lukB ", nr[order], K(1UL) << order); | 3063 | printk("%lu*%lukB ", nr[order], K(1UL) << order); |
3064 | if (nr[order]) | ||
3065 | show_migration_types(types[order]); | ||
3066 | } | ||
3022 | printk("= %lukB\n", K(total)); | 3067 | printk("= %lukB\n", K(total)); |
3023 | } | 3068 | } |
3024 | 3069 | ||
@@ -3195,7 +3240,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) | |||
3195 | return node; | 3240 | return node; |
3196 | } | 3241 | } |
3197 | 3242 | ||
3198 | for_each_node_state(n, N_HIGH_MEMORY) { | 3243 | for_each_node_state(n, N_MEMORY) { |
3199 | 3244 | ||
3200 | /* Don't want a node to appear more than once */ | 3245 | /* Don't want a node to appear more than once */ |
3201 | if (node_isset(n, *used_node_mask)) | 3246 | if (node_isset(n, *used_node_mask)) |
@@ -3337,7 +3382,7 @@ static int default_zonelist_order(void) | |||
3337 | * local memory, NODE_ORDER may be suitable. | 3382 | * local memory, NODE_ORDER may be suitable. |
3338 | */ | 3383 | */ |
3339 | average_size = total_size / | 3384 | average_size = total_size / |
3340 | (nodes_weight(node_states[N_HIGH_MEMORY]) + 1); | 3385 | (nodes_weight(node_states[N_MEMORY]) + 1); |
3341 | for_each_online_node(nid) { | 3386 | for_each_online_node(nid) { |
3342 | low_kmem_size = 0; | 3387 | low_kmem_size = 0; |
3343 | total_size = 0; | 3388 | total_size = 0; |
@@ -3827,6 +3872,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
3827 | mminit_verify_page_links(page, zone, nid, pfn); | 3872 | mminit_verify_page_links(page, zone, nid, pfn); |
3828 | init_page_count(page); | 3873 | init_page_count(page); |
3829 | reset_page_mapcount(page); | 3874 | reset_page_mapcount(page); |
3875 | reset_page_last_nid(page); | ||
3830 | SetPageReserved(page); | 3876 | SetPageReserved(page); |
3831 | /* | 3877 | /* |
3832 | * Mark the block movable so that blocks are reserved for | 3878 | * Mark the block movable so that blocks are reserved for |
@@ -4433,6 +4479,26 @@ void __init set_pageblock_order(void) | |||
4433 | 4479 | ||
4434 | #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ | 4480 | #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ |
4435 | 4481 | ||
4482 | static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages, | ||
4483 | unsigned long present_pages) | ||
4484 | { | ||
4485 | unsigned long pages = spanned_pages; | ||
4486 | |||
4487 | /* | ||
4488 | * Provide a more accurate estimation if there are holes within | ||
4489 | * the zone and SPARSEMEM is in use. If there are holes within the | ||
4490 | * zone, each populated memory region may cost us one or two extra | ||
4491 | * memmap pages due to alignment because memmap pages for each | ||
4492 | * populated regions may not naturally algined on page boundary. | ||
4493 | * So the (present_pages >> 4) heuristic is a tradeoff for that. | ||
4494 | */ | ||
4495 | if (spanned_pages > present_pages + (present_pages >> 4) && | ||
4496 | IS_ENABLED(CONFIG_SPARSEMEM)) | ||
4497 | pages = present_pages; | ||
4498 | |||
4499 | return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT; | ||
4500 | } | ||
4501 | |||
4436 | /* | 4502 | /* |
4437 | * Set up the zone data structures: | 4503 | * Set up the zone data structures: |
4438 | * - mark all pages reserved | 4504 | * - mark all pages reserved |
@@ -4450,54 +4516,67 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4450 | int ret; | 4516 | int ret; |
4451 | 4517 | ||
4452 | pgdat_resize_init(pgdat); | 4518 | pgdat_resize_init(pgdat); |
4519 | #ifdef CONFIG_NUMA_BALANCING | ||
4520 | spin_lock_init(&pgdat->numabalancing_migrate_lock); | ||
4521 | pgdat->numabalancing_migrate_nr_pages = 0; | ||
4522 | pgdat->numabalancing_migrate_next_window = jiffies; | ||
4523 | #endif | ||
4453 | init_waitqueue_head(&pgdat->kswapd_wait); | 4524 | init_waitqueue_head(&pgdat->kswapd_wait); |
4454 | init_waitqueue_head(&pgdat->pfmemalloc_wait); | 4525 | init_waitqueue_head(&pgdat->pfmemalloc_wait); |
4455 | pgdat_page_cgroup_init(pgdat); | 4526 | pgdat_page_cgroup_init(pgdat); |
4456 | 4527 | ||
4457 | for (j = 0; j < MAX_NR_ZONES; j++) { | 4528 | for (j = 0; j < MAX_NR_ZONES; j++) { |
4458 | struct zone *zone = pgdat->node_zones + j; | 4529 | struct zone *zone = pgdat->node_zones + j; |
4459 | unsigned long size, realsize, memmap_pages; | 4530 | unsigned long size, realsize, freesize, memmap_pages; |
4460 | 4531 | ||
4461 | size = zone_spanned_pages_in_node(nid, j, zones_size); | 4532 | size = zone_spanned_pages_in_node(nid, j, zones_size); |
4462 | realsize = size - zone_absent_pages_in_node(nid, j, | 4533 | realsize = freesize = size - zone_absent_pages_in_node(nid, j, |
4463 | zholes_size); | 4534 | zholes_size); |
4464 | 4535 | ||
4465 | /* | 4536 | /* |
4466 | * Adjust realsize so that it accounts for how much memory | 4537 | * Adjust freesize so that it accounts for how much memory |
4467 | * is used by this zone for memmap. This affects the watermark | 4538 | * is used by this zone for memmap. This affects the watermark |
4468 | * and per-cpu initialisations | 4539 | * and per-cpu initialisations |
4469 | */ | 4540 | */ |
4470 | memmap_pages = | 4541 | memmap_pages = calc_memmap_size(size, realsize); |
4471 | PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; | 4542 | if (freesize >= memmap_pages) { |
4472 | if (realsize >= memmap_pages) { | 4543 | freesize -= memmap_pages; |
4473 | realsize -= memmap_pages; | ||
4474 | if (memmap_pages) | 4544 | if (memmap_pages) |
4475 | printk(KERN_DEBUG | 4545 | printk(KERN_DEBUG |
4476 | " %s zone: %lu pages used for memmap\n", | 4546 | " %s zone: %lu pages used for memmap\n", |
4477 | zone_names[j], memmap_pages); | 4547 | zone_names[j], memmap_pages); |
4478 | } else | 4548 | } else |
4479 | printk(KERN_WARNING | 4549 | printk(KERN_WARNING |
4480 | " %s zone: %lu pages exceeds realsize %lu\n", | 4550 | " %s zone: %lu pages exceeds freesize %lu\n", |
4481 | zone_names[j], memmap_pages, realsize); | 4551 | zone_names[j], memmap_pages, freesize); |
4482 | 4552 | ||
4483 | /* Account for reserved pages */ | 4553 | /* Account for reserved pages */ |
4484 | if (j == 0 && realsize > dma_reserve) { | 4554 | if (j == 0 && freesize > dma_reserve) { |
4485 | realsize -= dma_reserve; | 4555 | freesize -= dma_reserve; |
4486 | printk(KERN_DEBUG " %s zone: %lu pages reserved\n", | 4556 | printk(KERN_DEBUG " %s zone: %lu pages reserved\n", |
4487 | zone_names[0], dma_reserve); | 4557 | zone_names[0], dma_reserve); |
4488 | } | 4558 | } |
4489 | 4559 | ||
4490 | if (!is_highmem_idx(j)) | 4560 | if (!is_highmem_idx(j)) |
4491 | nr_kernel_pages += realsize; | 4561 | nr_kernel_pages += freesize; |
4492 | nr_all_pages += realsize; | 4562 | /* Charge for highmem memmap if there are enough kernel pages */ |
4563 | else if (nr_kernel_pages > memmap_pages * 2) | ||
4564 | nr_kernel_pages -= memmap_pages; | ||
4565 | nr_all_pages += freesize; | ||
4493 | 4566 | ||
4494 | zone->spanned_pages = size; | 4567 | zone->spanned_pages = size; |
4495 | zone->present_pages = realsize; | 4568 | zone->present_pages = freesize; |
4569 | /* | ||
4570 | * Set an approximate value for lowmem here, it will be adjusted | ||
4571 | * when the bootmem allocator frees pages into the buddy system. | ||
4572 | * And all highmem pages will be managed by the buddy system. | ||
4573 | */ | ||
4574 | zone->managed_pages = is_highmem_idx(j) ? realsize : freesize; | ||
4496 | #ifdef CONFIG_NUMA | 4575 | #ifdef CONFIG_NUMA |
4497 | zone->node = nid; | 4576 | zone->node = nid; |
4498 | zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) | 4577 | zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio) |
4499 | / 100; | 4578 | / 100; |
4500 | zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; | 4579 | zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100; |
4501 | #endif | 4580 | #endif |
4502 | zone->name = zone_names[j]; | 4581 | zone->name = zone_names[j]; |
4503 | spin_lock_init(&zone->lock); | 4582 | spin_lock_init(&zone->lock); |
@@ -4688,7 +4767,7 @@ unsigned long __init find_min_pfn_with_active_regions(void) | |||
4688 | /* | 4767 | /* |
4689 | * early_calculate_totalpages() | 4768 | * early_calculate_totalpages() |
4690 | * Sum pages in active regions for movable zone. | 4769 | * Sum pages in active regions for movable zone. |
4691 | * Populate N_HIGH_MEMORY for calculating usable_nodes. | 4770 | * Populate N_MEMORY for calculating usable_nodes. |
4692 | */ | 4771 | */ |
4693 | static unsigned long __init early_calculate_totalpages(void) | 4772 | static unsigned long __init early_calculate_totalpages(void) |
4694 | { | 4773 | { |
@@ -4701,7 +4780,7 @@ static unsigned long __init early_calculate_totalpages(void) | |||
4701 | 4780 | ||
4702 | totalpages += pages; | 4781 | totalpages += pages; |
4703 | if (pages) | 4782 | if (pages) |
4704 | node_set_state(nid, N_HIGH_MEMORY); | 4783 | node_set_state(nid, N_MEMORY); |
4705 | } | 4784 | } |
4706 | return totalpages; | 4785 | return totalpages; |
4707 | } | 4786 | } |
@@ -4718,9 +4797,9 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
4718 | unsigned long usable_startpfn; | 4797 | unsigned long usable_startpfn; |
4719 | unsigned long kernelcore_node, kernelcore_remaining; | 4798 | unsigned long kernelcore_node, kernelcore_remaining; |
4720 | /* save the state before borrow the nodemask */ | 4799 | /* save the state before borrow the nodemask */ |
4721 | nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; | 4800 | nodemask_t saved_node_state = node_states[N_MEMORY]; |
4722 | unsigned long totalpages = early_calculate_totalpages(); | 4801 | unsigned long totalpages = early_calculate_totalpages(); |
4723 | int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); | 4802 | int usable_nodes = nodes_weight(node_states[N_MEMORY]); |
4724 | 4803 | ||
4725 | /* | 4804 | /* |
4726 | * If movablecore was specified, calculate what size of | 4805 | * If movablecore was specified, calculate what size of |
@@ -4755,7 +4834,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
4755 | restart: | 4834 | restart: |
4756 | /* Spread kernelcore memory as evenly as possible throughout nodes */ | 4835 | /* Spread kernelcore memory as evenly as possible throughout nodes */ |
4757 | kernelcore_node = required_kernelcore / usable_nodes; | 4836 | kernelcore_node = required_kernelcore / usable_nodes; |
4758 | for_each_node_state(nid, N_HIGH_MEMORY) { | 4837 | for_each_node_state(nid, N_MEMORY) { |
4759 | unsigned long start_pfn, end_pfn; | 4838 | unsigned long start_pfn, end_pfn; |
4760 | 4839 | ||
4761 | /* | 4840 | /* |
@@ -4847,23 +4926,27 @@ restart: | |||
4847 | 4926 | ||
4848 | out: | 4927 | out: |
4849 | /* restore the node_state */ | 4928 | /* restore the node_state */ |
4850 | node_states[N_HIGH_MEMORY] = saved_node_state; | 4929 | node_states[N_MEMORY] = saved_node_state; |
4851 | } | 4930 | } |
4852 | 4931 | ||
4853 | /* Any regular memory on that node ? */ | 4932 | /* Any regular or high memory on that node ? */ |
4854 | static void __init check_for_regular_memory(pg_data_t *pgdat) | 4933 | static void check_for_memory(pg_data_t *pgdat, int nid) |
4855 | { | 4934 | { |
4856 | #ifdef CONFIG_HIGHMEM | ||
4857 | enum zone_type zone_type; | 4935 | enum zone_type zone_type; |
4858 | 4936 | ||
4859 | for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { | 4937 | if (N_MEMORY == N_NORMAL_MEMORY) |
4938 | return; | ||
4939 | |||
4940 | for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) { | ||
4860 | struct zone *zone = &pgdat->node_zones[zone_type]; | 4941 | struct zone *zone = &pgdat->node_zones[zone_type]; |
4861 | if (zone->present_pages) { | 4942 | if (zone->present_pages) { |
4862 | node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); | 4943 | node_set_state(nid, N_HIGH_MEMORY); |
4944 | if (N_NORMAL_MEMORY != N_HIGH_MEMORY && | ||
4945 | zone_type <= ZONE_NORMAL) | ||
4946 | node_set_state(nid, N_NORMAL_MEMORY); | ||
4863 | break; | 4947 | break; |
4864 | } | 4948 | } |
4865 | } | 4949 | } |
4866 | #endif | ||
4867 | } | 4950 | } |
4868 | 4951 | ||
4869 | /** | 4952 | /** |
@@ -4946,8 +5029,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
4946 | 5029 | ||
4947 | /* Any memory on that node */ | 5030 | /* Any memory on that node */ |
4948 | if (pgdat->node_present_pages) | 5031 | if (pgdat->node_present_pages) |
4949 | node_set_state(nid, N_HIGH_MEMORY); | 5032 | node_set_state(nid, N_MEMORY); |
4950 | check_for_regular_memory(pgdat); | 5033 | check_for_memory(pgdat, nid); |
4951 | } | 5034 | } |
4952 | } | 5035 | } |
4953 | 5036 | ||
@@ -5175,10 +5258,6 @@ static void __setup_per_zone_wmarks(void) | |||
5175 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); | 5258 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); |
5176 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); | 5259 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); |
5177 | 5260 | ||
5178 | zone->watermark[WMARK_MIN] += cma_wmark_pages(zone); | ||
5179 | zone->watermark[WMARK_LOW] += cma_wmark_pages(zone); | ||
5180 | zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone); | ||
5181 | |||
5182 | setup_zone_migrate_reserve(zone); | 5261 | setup_zone_migrate_reserve(zone); |
5183 | spin_unlock_irqrestore(&zone->lock, flags); | 5262 | spin_unlock_irqrestore(&zone->lock, flags); |
5184 | } | 5263 | } |
@@ -5506,7 +5585,7 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) | |||
5506 | pfn &= (PAGES_PER_SECTION-1); | 5585 | pfn &= (PAGES_PER_SECTION-1); |
5507 | return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; | 5586 | return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; |
5508 | #else | 5587 | #else |
5509 | pfn = pfn - zone->zone_start_pfn; | 5588 | pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages); |
5510 | return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; | 5589 | return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; |
5511 | #endif /* CONFIG_SPARSEMEM */ | 5590 | #endif /* CONFIG_SPARSEMEM */ |
5512 | } | 5591 | } |
@@ -5576,7 +5655,8 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, | |||
5576 | * MIGRATE_MOVABLE block might include unmovable pages. It means you can't | 5655 | * MIGRATE_MOVABLE block might include unmovable pages. It means you can't |
5577 | * expect this function should be exact. | 5656 | * expect this function should be exact. |
5578 | */ | 5657 | */ |
5579 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count) | 5658 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, |
5659 | bool skip_hwpoisoned_pages) | ||
5580 | { | 5660 | { |
5581 | unsigned long pfn, iter, found; | 5661 | unsigned long pfn, iter, found; |
5582 | int mt; | 5662 | int mt; |
@@ -5611,6 +5691,13 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count) | |||
5611 | continue; | 5691 | continue; |
5612 | } | 5692 | } |
5613 | 5693 | ||
5694 | /* | ||
5695 | * The HWPoisoned page may be not in buddy system, and | ||
5696 | * page_count() is not 0. | ||
5697 | */ | ||
5698 | if (skip_hwpoisoned_pages && PageHWPoison(page)) | ||
5699 | continue; | ||
5700 | |||
5614 | if (!PageLRU(page)) | 5701 | if (!PageLRU(page)) |
5615 | found++; | 5702 | found++; |
5616 | /* | 5703 | /* |
@@ -5653,7 +5740,7 @@ bool is_pageblock_removable_nolock(struct page *page) | |||
5653 | zone->zone_start_pfn + zone->spanned_pages <= pfn) | 5740 | zone->zone_start_pfn + zone->spanned_pages <= pfn) |
5654 | return false; | 5741 | return false; |
5655 | 5742 | ||
5656 | return !has_unmovable_pages(zone, page, 0); | 5743 | return !has_unmovable_pages(zone, page, 0, true); |
5657 | } | 5744 | } |
5658 | 5745 | ||
5659 | #ifdef CONFIG_CMA | 5746 | #ifdef CONFIG_CMA |
@@ -5680,7 +5767,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, | |||
5680 | unsigned int tries = 0; | 5767 | unsigned int tries = 0; |
5681 | int ret = 0; | 5768 | int ret = 0; |
5682 | 5769 | ||
5683 | migrate_prep_local(); | 5770 | migrate_prep(); |
5684 | 5771 | ||
5685 | while (pfn < end || !list_empty(&cc->migratepages)) { | 5772 | while (pfn < end || !list_empty(&cc->migratepages)) { |
5686 | if (fatal_signal_pending(current)) { | 5773 | if (fatal_signal_pending(current)) { |
@@ -5708,61 +5795,14 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, | |||
5708 | 5795 | ||
5709 | ret = migrate_pages(&cc->migratepages, | 5796 | ret = migrate_pages(&cc->migratepages, |
5710 | alloc_migrate_target, | 5797 | alloc_migrate_target, |
5711 | 0, false, MIGRATE_SYNC); | 5798 | 0, false, MIGRATE_SYNC, |
5799 | MR_CMA); | ||
5712 | } | 5800 | } |
5713 | 5801 | ||
5714 | putback_lru_pages(&cc->migratepages); | 5802 | putback_movable_pages(&cc->migratepages); |
5715 | return ret > 0 ? 0 : ret; | 5803 | return ret > 0 ? 0 : ret; |
5716 | } | 5804 | } |
5717 | 5805 | ||
5718 | /* | ||
5719 | * Update zone's cma pages counter used for watermark level calculation. | ||
5720 | */ | ||
5721 | static inline void __update_cma_watermarks(struct zone *zone, int count) | ||
5722 | { | ||
5723 | unsigned long flags; | ||
5724 | spin_lock_irqsave(&zone->lock, flags); | ||
5725 | zone->min_cma_pages += count; | ||
5726 | spin_unlock_irqrestore(&zone->lock, flags); | ||
5727 | setup_per_zone_wmarks(); | ||
5728 | } | ||
5729 | |||
5730 | /* | ||
5731 | * Trigger memory pressure bump to reclaim some pages in order to be able to | ||
5732 | * allocate 'count' pages in single page units. Does similar work as | ||
5733 | *__alloc_pages_slowpath() function. | ||
5734 | */ | ||
5735 | static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) | ||
5736 | { | ||
5737 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
5738 | struct zonelist *zonelist = node_zonelist(0, gfp_mask); | ||
5739 | int did_some_progress = 0; | ||
5740 | int order = 1; | ||
5741 | |||
5742 | /* | ||
5743 | * Increase level of watermarks to force kswapd do his job | ||
5744 | * to stabilise at new watermark level. | ||
5745 | */ | ||
5746 | __update_cma_watermarks(zone, count); | ||
5747 | |||
5748 | /* Obey watermarks as if the page was being allocated */ | ||
5749 | while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) { | ||
5750 | wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); | ||
5751 | |||
5752 | did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, | ||
5753 | NULL); | ||
5754 | if (!did_some_progress) { | ||
5755 | /* Exhausted what can be done so it's blamo time */ | ||
5756 | out_of_memory(zonelist, gfp_mask, order, NULL, false); | ||
5757 | } | ||
5758 | } | ||
5759 | |||
5760 | /* Restore original watermark levels. */ | ||
5761 | __update_cma_watermarks(zone, -count); | ||
5762 | |||
5763 | return count; | ||
5764 | } | ||
5765 | |||
5766 | /** | 5806 | /** |
5767 | * alloc_contig_range() -- tries to allocate given range of pages | 5807 | * alloc_contig_range() -- tries to allocate given range of pages |
5768 | * @start: start PFN to allocate | 5808 | * @start: start PFN to allocate |
@@ -5786,7 +5826,6 @@ static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) | |||
5786 | int alloc_contig_range(unsigned long start, unsigned long end, | 5826 | int alloc_contig_range(unsigned long start, unsigned long end, |
5787 | unsigned migratetype) | 5827 | unsigned migratetype) |
5788 | { | 5828 | { |
5789 | struct zone *zone = page_zone(pfn_to_page(start)); | ||
5790 | unsigned long outer_start, outer_end; | 5829 | unsigned long outer_start, outer_end; |
5791 | int ret = 0, order; | 5830 | int ret = 0, order; |
5792 | 5831 | ||
@@ -5824,7 +5863,8 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
5824 | */ | 5863 | */ |
5825 | 5864 | ||
5826 | ret = start_isolate_page_range(pfn_max_align_down(start), | 5865 | ret = start_isolate_page_range(pfn_max_align_down(start), |
5827 | pfn_max_align_up(end), migratetype); | 5866 | pfn_max_align_up(end), migratetype, |
5867 | false); | ||
5828 | if (ret) | 5868 | if (ret) |
5829 | return ret; | 5869 | return ret; |
5830 | 5870 | ||
@@ -5863,18 +5903,13 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
5863 | } | 5903 | } |
5864 | 5904 | ||
5865 | /* Make sure the range is really isolated. */ | 5905 | /* Make sure the range is really isolated. */ |
5866 | if (test_pages_isolated(outer_start, end)) { | 5906 | if (test_pages_isolated(outer_start, end, false)) { |
5867 | pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", | 5907 | pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", |
5868 | outer_start, end); | 5908 | outer_start, end); |
5869 | ret = -EBUSY; | 5909 | ret = -EBUSY; |
5870 | goto done; | 5910 | goto done; |
5871 | } | 5911 | } |
5872 | 5912 | ||
5873 | /* | ||
5874 | * Reclaim enough pages to make sure that contiguous allocation | ||
5875 | * will not starve the system. | ||
5876 | */ | ||
5877 | __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); | ||
5878 | 5913 | ||
5879 | /* Grab isolated pages from freelists. */ | 5914 | /* Grab isolated pages from freelists. */ |
5880 | outer_end = isolate_freepages_range(&cc, outer_start, end); | 5915 | outer_end = isolate_freepages_range(&cc, outer_start, end); |
@@ -5897,8 +5932,15 @@ done: | |||
5897 | 5932 | ||
5898 | void free_contig_range(unsigned long pfn, unsigned nr_pages) | 5933 | void free_contig_range(unsigned long pfn, unsigned nr_pages) |
5899 | { | 5934 | { |
5900 | for (; nr_pages--; ++pfn) | 5935 | unsigned int count = 0; |
5901 | __free_page(pfn_to_page(pfn)); | 5936 | |
5937 | for (; nr_pages--; pfn++) { | ||
5938 | struct page *page = pfn_to_page(pfn); | ||
5939 | |||
5940 | count += page_count(page) != 1; | ||
5941 | __free_page(page); | ||
5942 | } | ||
5943 | WARN(count != 0, "%d pages are still in use!\n", count); | ||
5902 | } | 5944 | } |
5903 | #endif | 5945 | #endif |
5904 | 5946 | ||
@@ -5932,7 +5974,6 @@ void __meminit zone_pcp_update(struct zone *zone) | |||
5932 | } | 5974 | } |
5933 | #endif | 5975 | #endif |
5934 | 5976 | ||
5935 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
5936 | void zone_pcp_reset(struct zone *zone) | 5977 | void zone_pcp_reset(struct zone *zone) |
5937 | { | 5978 | { |
5938 | unsigned long flags; | 5979 | unsigned long flags; |
@@ -5952,6 +5993,7 @@ void zone_pcp_reset(struct zone *zone) | |||
5952 | local_irq_restore(flags); | 5993 | local_irq_restore(flags); |
5953 | } | 5994 | } |
5954 | 5995 | ||
5996 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
5955 | /* | 5997 | /* |
5956 | * All pages in the range must be isolated before calling this. | 5998 | * All pages in the range must be isolated before calling this. |
5957 | */ | 5999 | */ |
@@ -5978,6 +6020,16 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) | |||
5978 | continue; | 6020 | continue; |
5979 | } | 6021 | } |
5980 | page = pfn_to_page(pfn); | 6022 | page = pfn_to_page(pfn); |
6023 | /* | ||
6024 | * The HWPoisoned page may be not in buddy system, and | ||
6025 | * page_count() is not 0. | ||
6026 | */ | ||
6027 | if (unlikely(!PageBuddy(page) && PageHWPoison(page))) { | ||
6028 | pfn++; | ||
6029 | SetPageReserved(page); | ||
6030 | continue; | ||
6031 | } | ||
6032 | |||
5981 | BUG_ON(page_count(page)); | 6033 | BUG_ON(page_count(page)); |
5982 | BUG_ON(!PageBuddy(page)); | 6034 | BUG_ON(!PageBuddy(page)); |
5983 | order = page_order(page); | 6035 | order = page_order(page); |
@@ -5988,8 +6040,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) | |||
5988 | list_del(&page->lru); | 6040 | list_del(&page->lru); |
5989 | rmv_page_order(page); | 6041 | rmv_page_order(page); |
5990 | zone->free_area[order].nr_free--; | 6042 | zone->free_area[order].nr_free--; |
5991 | __mod_zone_page_state(zone, NR_FREE_PAGES, | ||
5992 | - (1UL << order)); | ||
5993 | for (i = 0; i < (1 << order); i++) | 6043 | for (i = 0; i < (1 << order); i++) |
5994 | SetPageReserved((page+i)); | 6044 | SetPageReserved((page+i)); |
5995 | pfn += (1 << order); | 6045 | pfn += (1 << order); |