diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 354 |
1 files changed, 225 insertions, 129 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7e208f0ad68c..4ba5e37127fc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -90,6 +90,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = { | |||
90 | #ifdef CONFIG_HIGHMEM | 90 | #ifdef CONFIG_HIGHMEM |
91 | [N_HIGH_MEMORY] = { { [0] = 1UL } }, | 91 | [N_HIGH_MEMORY] = { { [0] = 1UL } }, |
92 | #endif | 92 | #endif |
93 | #ifdef CONFIG_MOVABLE_NODE | ||
94 | [N_MEMORY] = { { [0] = 1UL } }, | ||
95 | #endif | ||
93 | [N_CPU] = { { [0] = 1UL } }, | 96 | [N_CPU] = { { [0] = 1UL } }, |
94 | #endif /* NUMA */ | 97 | #endif /* NUMA */ |
95 | }; | 98 | }; |
@@ -368,8 +371,7 @@ static int destroy_compound_page(struct page *page, unsigned long order) | |||
368 | int nr_pages = 1 << order; | 371 | int nr_pages = 1 << order; |
369 | int bad = 0; | 372 | int bad = 0; |
370 | 373 | ||
371 | if (unlikely(compound_order(page) != order) || | 374 | if (unlikely(compound_order(page) != order)) { |
372 | unlikely(!PageHead(page))) { | ||
373 | bad_page(page); | 375 | bad_page(page); |
374 | bad++; | 376 | bad++; |
375 | } | 377 | } |
@@ -523,7 +525,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
523 | * If a block is freed, and its buddy is also free, then this | 525 | * If a block is freed, and its buddy is also free, then this |
524 | * triggers coalescing into a block of larger size. | 526 | * triggers coalescing into a block of larger size. |
525 | * | 527 | * |
526 | * -- wli | 528 | * -- nyc |
527 | */ | 529 | */ |
528 | 530 | ||
529 | static inline void __free_one_page(struct page *page, | 531 | static inline void __free_one_page(struct page *page, |
@@ -608,6 +610,7 @@ static inline int free_pages_check(struct page *page) | |||
608 | bad_page(page); | 610 | bad_page(page); |
609 | return 1; | 611 | return 1; |
610 | } | 612 | } |
613 | reset_page_last_nid(page); | ||
611 | if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) | 614 | if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) |
612 | page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; | 615 | page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; |
613 | return 0; | 616 | return 0; |
@@ -667,11 +670,13 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
667 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ | 670 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ |
668 | __free_one_page(page, zone, 0, mt); | 671 | __free_one_page(page, zone, 0, mt); |
669 | trace_mm_page_pcpu_drain(page, 0, mt); | 672 | trace_mm_page_pcpu_drain(page, 0, mt); |
670 | if (is_migrate_cma(mt)) | 673 | if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) { |
671 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); | 674 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1); |
675 | if (is_migrate_cma(mt)) | ||
676 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); | ||
677 | } | ||
672 | } while (--to_free && --batch_free && !list_empty(list)); | 678 | } while (--to_free && --batch_free && !list_empty(list)); |
673 | } | 679 | } |
674 | __mod_zone_page_state(zone, NR_FREE_PAGES, count); | ||
675 | spin_unlock(&zone->lock); | 680 | spin_unlock(&zone->lock); |
676 | } | 681 | } |
677 | 682 | ||
@@ -730,6 +735,13 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
730 | local_irq_restore(flags); | 735 | local_irq_restore(flags); |
731 | } | 736 | } |
732 | 737 | ||
738 | /* | ||
739 | * Read access to zone->managed_pages is safe because it's unsigned long, | ||
740 | * but we still need to serialize writers. Currently all callers of | ||
741 | * __free_pages_bootmem() except put_page_bootmem() should only be used | ||
742 | * at boot time. So for shorter boot time, we shift the burden to | ||
743 | * put_page_bootmem() to serialize writers. | ||
744 | */ | ||
733 | void __meminit __free_pages_bootmem(struct page *page, unsigned int order) | 745 | void __meminit __free_pages_bootmem(struct page *page, unsigned int order) |
734 | { | 746 | { |
735 | unsigned int nr_pages = 1 << order; | 747 | unsigned int nr_pages = 1 << order; |
@@ -745,6 +757,7 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) | |||
745 | set_page_count(p, 0); | 757 | set_page_count(p, 0); |
746 | } | 758 | } |
747 | 759 | ||
760 | page_zone(page)->managed_pages += 1 << order; | ||
748 | set_page_refcounted(page); | 761 | set_page_refcounted(page); |
749 | __free_pages(page, order); | 762 | __free_pages(page, order); |
750 | } | 763 | } |
@@ -780,7 +793,7 @@ void __init init_cma_reserved_pageblock(struct page *page) | |||
780 | * large block of memory acted on by a series of small allocations. | 793 | * large block of memory acted on by a series of small allocations. |
781 | * This behavior is a critical factor in sglist merging's success. | 794 | * This behavior is a critical factor in sglist merging's success. |
782 | * | 795 | * |
783 | * -- wli | 796 | * -- nyc |
784 | */ | 797 | */ |
785 | static inline void expand(struct zone *zone, struct page *page, | 798 | static inline void expand(struct zone *zone, struct page *page, |
786 | int low, int high, struct free_area *area, | 799 | int low, int high, struct free_area *area, |
@@ -1392,21 +1405,22 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) | |||
1392 | 1405 | ||
1393 | zone = page_zone(page); | 1406 | zone = page_zone(page); |
1394 | order = page_order(page); | 1407 | order = page_order(page); |
1408 | mt = get_pageblock_migratetype(page); | ||
1395 | 1409 | ||
1396 | /* Obey watermarks as if the page was being allocated */ | 1410 | if (mt != MIGRATE_ISOLATE) { |
1397 | watermark = low_wmark_pages(zone) + (1 << order); | 1411 | /* Obey watermarks as if the page was being allocated */ |
1398 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) | 1412 | watermark = low_wmark_pages(zone) + (1 << order); |
1399 | return 0; | 1413 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) |
1414 | return 0; | ||
1415 | |||
1416 | __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); | ||
1417 | } | ||
1400 | 1418 | ||
1401 | /* Remove page from free list */ | 1419 | /* Remove page from free list */ |
1402 | list_del(&page->lru); | 1420 | list_del(&page->lru); |
1403 | zone->free_area[order].nr_free--; | 1421 | zone->free_area[order].nr_free--; |
1404 | rmv_page_order(page); | 1422 | rmv_page_order(page); |
1405 | 1423 | ||
1406 | mt = get_pageblock_migratetype(page); | ||
1407 | if (unlikely(mt != MIGRATE_ISOLATE)) | ||
1408 | __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); | ||
1409 | |||
1410 | if (alloc_order != order) | 1424 | if (alloc_order != order) |
1411 | expand(zone, page, alloc_order, order, | 1425 | expand(zone, page, alloc_order, order, |
1412 | &zone->free_area[order], migratetype); | 1426 | &zone->free_area[order], migratetype); |
@@ -1692,7 +1706,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, | |||
1692 | * | 1706 | * |
1693 | * If the zonelist cache is present in the passed in zonelist, then | 1707 | * If the zonelist cache is present in the passed in zonelist, then |
1694 | * returns a pointer to the allowed node mask (either the current | 1708 | * returns a pointer to the allowed node mask (either the current |
1695 | * tasks mems_allowed, or node_states[N_HIGH_MEMORY].) | 1709 | * tasks mems_allowed, or node_states[N_MEMORY].) |
1696 | * | 1710 | * |
1697 | * If the zonelist cache is not available for this zonelist, does | 1711 | * If the zonelist cache is not available for this zonelist, does |
1698 | * nothing and returns NULL. | 1712 | * nothing and returns NULL. |
@@ -1721,7 +1735,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) | |||
1721 | 1735 | ||
1722 | allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? | 1736 | allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? |
1723 | &cpuset_current_mems_allowed : | 1737 | &cpuset_current_mems_allowed : |
1724 | &node_states[N_HIGH_MEMORY]; | 1738 | &node_states[N_MEMORY]; |
1725 | return allowednodes; | 1739 | return allowednodes; |
1726 | } | 1740 | } |
1727 | 1741 | ||
@@ -1871,7 +1885,7 @@ zonelist_scan: | |||
1871 | */ | 1885 | */ |
1872 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 1886 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
1873 | high_zoneidx, nodemask) { | 1887 | high_zoneidx, nodemask) { |
1874 | if (NUMA_BUILD && zlc_active && | 1888 | if (IS_ENABLED(CONFIG_NUMA) && zlc_active && |
1875 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | 1889 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) |
1876 | continue; | 1890 | continue; |
1877 | if ((alloc_flags & ALLOC_CPUSET) && | 1891 | if ((alloc_flags & ALLOC_CPUSET) && |
@@ -1917,7 +1931,8 @@ zonelist_scan: | |||
1917 | classzone_idx, alloc_flags)) | 1931 | classzone_idx, alloc_flags)) |
1918 | goto try_this_zone; | 1932 | goto try_this_zone; |
1919 | 1933 | ||
1920 | if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) { | 1934 | if (IS_ENABLED(CONFIG_NUMA) && |
1935 | !did_zlc_setup && nr_online_nodes > 1) { | ||
1921 | /* | 1936 | /* |
1922 | * we do zlc_setup if there are multiple nodes | 1937 | * we do zlc_setup if there are multiple nodes |
1923 | * and before considering the first zone allowed | 1938 | * and before considering the first zone allowed |
@@ -1936,7 +1951,7 @@ zonelist_scan: | |||
1936 | * As we may have just activated ZLC, check if the first | 1951 | * As we may have just activated ZLC, check if the first |
1937 | * eligible zone has failed zone_reclaim recently. | 1952 | * eligible zone has failed zone_reclaim recently. |
1938 | */ | 1953 | */ |
1939 | if (NUMA_BUILD && zlc_active && | 1954 | if (IS_ENABLED(CONFIG_NUMA) && zlc_active && |
1940 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | 1955 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) |
1941 | continue; | 1956 | continue; |
1942 | 1957 | ||
@@ -1962,11 +1977,11 @@ try_this_zone: | |||
1962 | if (page) | 1977 | if (page) |
1963 | break; | 1978 | break; |
1964 | this_zone_full: | 1979 | this_zone_full: |
1965 | if (NUMA_BUILD) | 1980 | if (IS_ENABLED(CONFIG_NUMA)) |
1966 | zlc_mark_zone_full(zonelist, z); | 1981 | zlc_mark_zone_full(zonelist, z); |
1967 | } | 1982 | } |
1968 | 1983 | ||
1969 | if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { | 1984 | if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) { |
1970 | /* Disable zlc cache for second zonelist scan */ | 1985 | /* Disable zlc cache for second zonelist scan */ |
1971 | zlc_active = 0; | 1986 | zlc_active = 0; |
1972 | goto zonelist_scan; | 1987 | goto zonelist_scan; |
@@ -2266,7 +2281,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
2266 | return NULL; | 2281 | return NULL; |
2267 | 2282 | ||
2268 | /* After successful reclaim, reconsider all zones for allocation */ | 2283 | /* After successful reclaim, reconsider all zones for allocation */ |
2269 | if (NUMA_BUILD) | 2284 | if (IS_ENABLED(CONFIG_NUMA)) |
2270 | zlc_clear_zones_full(zonelist); | 2285 | zlc_clear_zones_full(zonelist); |
2271 | 2286 | ||
2272 | retry: | 2287 | retry: |
@@ -2412,7 +2427,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2412 | * allowed per node queues are empty and that nodes are | 2427 | * allowed per node queues are empty and that nodes are |
2413 | * over allocated. | 2428 | * over allocated. |
2414 | */ | 2429 | */ |
2415 | if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | 2430 | if (IS_ENABLED(CONFIG_NUMA) && |
2431 | (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | ||
2416 | goto nopage; | 2432 | goto nopage; |
2417 | 2433 | ||
2418 | restart: | 2434 | restart: |
@@ -2596,6 +2612,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2596 | int migratetype = allocflags_to_migratetype(gfp_mask); | 2612 | int migratetype = allocflags_to_migratetype(gfp_mask); |
2597 | unsigned int cpuset_mems_cookie; | 2613 | unsigned int cpuset_mems_cookie; |
2598 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; | 2614 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; |
2615 | struct mem_cgroup *memcg = NULL; | ||
2599 | 2616 | ||
2600 | gfp_mask &= gfp_allowed_mask; | 2617 | gfp_mask &= gfp_allowed_mask; |
2601 | 2618 | ||
@@ -2614,6 +2631,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2614 | if (unlikely(!zonelist->_zonerefs->zone)) | 2631 | if (unlikely(!zonelist->_zonerefs->zone)) |
2615 | return NULL; | 2632 | return NULL; |
2616 | 2633 | ||
2634 | /* | ||
2635 | * Will only have any effect when __GFP_KMEMCG is set. This is | ||
2636 | * verified in the (always inline) callee | ||
2637 | */ | ||
2638 | if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order)) | ||
2639 | return NULL; | ||
2640 | |||
2617 | retry_cpuset: | 2641 | retry_cpuset: |
2618 | cpuset_mems_cookie = get_mems_allowed(); | 2642 | cpuset_mems_cookie = get_mems_allowed(); |
2619 | 2643 | ||
@@ -2649,6 +2673,8 @@ out: | |||
2649 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) | 2673 | if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) |
2650 | goto retry_cpuset; | 2674 | goto retry_cpuset; |
2651 | 2675 | ||
2676 | memcg_kmem_commit_charge(page, memcg, order); | ||
2677 | |||
2652 | return page; | 2678 | return page; |
2653 | } | 2679 | } |
2654 | EXPORT_SYMBOL(__alloc_pages_nodemask); | 2680 | EXPORT_SYMBOL(__alloc_pages_nodemask); |
@@ -2701,6 +2727,31 @@ void free_pages(unsigned long addr, unsigned int order) | |||
2701 | 2727 | ||
2702 | EXPORT_SYMBOL(free_pages); | 2728 | EXPORT_SYMBOL(free_pages); |
2703 | 2729 | ||
2730 | /* | ||
2731 | * __free_memcg_kmem_pages and free_memcg_kmem_pages will free | ||
2732 | * pages allocated with __GFP_KMEMCG. | ||
2733 | * | ||
2734 | * Those pages are accounted to a particular memcg, embedded in the | ||
2735 | * corresponding page_cgroup. To avoid adding a hit in the allocator to search | ||
2736 | * for that information only to find out that it is NULL for users who have no | ||
2737 | * interest in that whatsoever, we provide these functions. | ||
2738 | * | ||
2739 | * The caller knows better which flags it relies on. | ||
2740 | */ | ||
2741 | void __free_memcg_kmem_pages(struct page *page, unsigned int order) | ||
2742 | { | ||
2743 | memcg_kmem_uncharge_pages(page, order); | ||
2744 | __free_pages(page, order); | ||
2745 | } | ||
2746 | |||
2747 | void free_memcg_kmem_pages(unsigned long addr, unsigned int order) | ||
2748 | { | ||
2749 | if (addr != 0) { | ||
2750 | VM_BUG_ON(!virt_addr_valid((void *)addr)); | ||
2751 | __free_memcg_kmem_pages(virt_to_page((void *)addr), order); | ||
2752 | } | ||
2753 | } | ||
2754 | |||
2704 | static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) | 2755 | static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) |
2705 | { | 2756 | { |
2706 | if (addr) { | 2757 | if (addr) { |
@@ -2819,7 +2870,7 @@ unsigned int nr_free_pagecache_pages(void) | |||
2819 | 2870 | ||
2820 | static inline void show_node(struct zone *zone) | 2871 | static inline void show_node(struct zone *zone) |
2821 | { | 2872 | { |
2822 | if (NUMA_BUILD) | 2873 | if (IS_ENABLED(CONFIG_NUMA)) |
2823 | printk("Node %d ", zone_to_nid(zone)); | 2874 | printk("Node %d ", zone_to_nid(zone)); |
2824 | } | 2875 | } |
2825 | 2876 | ||
@@ -2877,6 +2928,31 @@ out: | |||
2877 | 2928 | ||
2878 | #define K(x) ((x) << (PAGE_SHIFT-10)) | 2929 | #define K(x) ((x) << (PAGE_SHIFT-10)) |
2879 | 2930 | ||
2931 | static void show_migration_types(unsigned char type) | ||
2932 | { | ||
2933 | static const char types[MIGRATE_TYPES] = { | ||
2934 | [MIGRATE_UNMOVABLE] = 'U', | ||
2935 | [MIGRATE_RECLAIMABLE] = 'E', | ||
2936 | [MIGRATE_MOVABLE] = 'M', | ||
2937 | [MIGRATE_RESERVE] = 'R', | ||
2938 | #ifdef CONFIG_CMA | ||
2939 | [MIGRATE_CMA] = 'C', | ||
2940 | #endif | ||
2941 | [MIGRATE_ISOLATE] = 'I', | ||
2942 | }; | ||
2943 | char tmp[MIGRATE_TYPES + 1]; | ||
2944 | char *p = tmp; | ||
2945 | int i; | ||
2946 | |||
2947 | for (i = 0; i < MIGRATE_TYPES; i++) { | ||
2948 | if (type & (1 << i)) | ||
2949 | *p++ = types[i]; | ||
2950 | } | ||
2951 | |||
2952 | *p = '\0'; | ||
2953 | printk("(%s) ", tmp); | ||
2954 | } | ||
2955 | |||
2880 | /* | 2956 | /* |
2881 | * Show free area list (used inside shift_scroll-lock stuff) | 2957 | * Show free area list (used inside shift_scroll-lock stuff) |
2882 | * We also calculate the percentage fragmentation. We do this by counting the | 2958 | * We also calculate the percentage fragmentation. We do this by counting the |
@@ -2951,6 +3027,7 @@ void show_free_areas(unsigned int filter) | |||
2951 | " isolated(anon):%lukB" | 3027 | " isolated(anon):%lukB" |
2952 | " isolated(file):%lukB" | 3028 | " isolated(file):%lukB" |
2953 | " present:%lukB" | 3029 | " present:%lukB" |
3030 | " managed:%lukB" | ||
2954 | " mlocked:%lukB" | 3031 | " mlocked:%lukB" |
2955 | " dirty:%lukB" | 3032 | " dirty:%lukB" |
2956 | " writeback:%lukB" | 3033 | " writeback:%lukB" |
@@ -2980,6 +3057,7 @@ void show_free_areas(unsigned int filter) | |||
2980 | K(zone_page_state(zone, NR_ISOLATED_ANON)), | 3057 | K(zone_page_state(zone, NR_ISOLATED_ANON)), |
2981 | K(zone_page_state(zone, NR_ISOLATED_FILE)), | 3058 | K(zone_page_state(zone, NR_ISOLATED_FILE)), |
2982 | K(zone->present_pages), | 3059 | K(zone->present_pages), |
3060 | K(zone->managed_pages), | ||
2983 | K(zone_page_state(zone, NR_MLOCK)), | 3061 | K(zone_page_state(zone, NR_MLOCK)), |
2984 | K(zone_page_state(zone, NR_FILE_DIRTY)), | 3062 | K(zone_page_state(zone, NR_FILE_DIRTY)), |
2985 | K(zone_page_state(zone, NR_WRITEBACK)), | 3063 | K(zone_page_state(zone, NR_WRITEBACK)), |
@@ -3005,6 +3083,7 @@ void show_free_areas(unsigned int filter) | |||
3005 | 3083 | ||
3006 | for_each_populated_zone(zone) { | 3084 | for_each_populated_zone(zone) { |
3007 | unsigned long nr[MAX_ORDER], flags, order, total = 0; | 3085 | unsigned long nr[MAX_ORDER], flags, order, total = 0; |
3086 | unsigned char types[MAX_ORDER]; | ||
3008 | 3087 | ||
3009 | if (skip_free_areas_node(filter, zone_to_nid(zone))) | 3088 | if (skip_free_areas_node(filter, zone_to_nid(zone))) |
3010 | continue; | 3089 | continue; |
@@ -3013,12 +3092,24 @@ void show_free_areas(unsigned int filter) | |||
3013 | 3092 | ||
3014 | spin_lock_irqsave(&zone->lock, flags); | 3093 | spin_lock_irqsave(&zone->lock, flags); |
3015 | for (order = 0; order < MAX_ORDER; order++) { | 3094 | for (order = 0; order < MAX_ORDER; order++) { |
3016 | nr[order] = zone->free_area[order].nr_free; | 3095 | struct free_area *area = &zone->free_area[order]; |
3096 | int type; | ||
3097 | |||
3098 | nr[order] = area->nr_free; | ||
3017 | total += nr[order] << order; | 3099 | total += nr[order] << order; |
3100 | |||
3101 | types[order] = 0; | ||
3102 | for (type = 0; type < MIGRATE_TYPES; type++) { | ||
3103 | if (!list_empty(&area->free_list[type])) | ||
3104 | types[order] |= 1 << type; | ||
3105 | } | ||
3018 | } | 3106 | } |
3019 | spin_unlock_irqrestore(&zone->lock, flags); | 3107 | spin_unlock_irqrestore(&zone->lock, flags); |
3020 | for (order = 0; order < MAX_ORDER; order++) | 3108 | for (order = 0; order < MAX_ORDER; order++) { |
3021 | printk("%lu*%lukB ", nr[order], K(1UL) << order); | 3109 | printk("%lu*%lukB ", nr[order], K(1UL) << order); |
3110 | if (nr[order]) | ||
3111 | show_migration_types(types[order]); | ||
3112 | } | ||
3022 | printk("= %lukB\n", K(total)); | 3113 | printk("= %lukB\n", K(total)); |
3023 | } | 3114 | } |
3024 | 3115 | ||
@@ -3195,7 +3286,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) | |||
3195 | return node; | 3286 | return node; |
3196 | } | 3287 | } |
3197 | 3288 | ||
3198 | for_each_node_state(n, N_HIGH_MEMORY) { | 3289 | for_each_node_state(n, N_MEMORY) { |
3199 | 3290 | ||
3200 | /* Don't want a node to appear more than once */ | 3291 | /* Don't want a node to appear more than once */ |
3201 | if (node_isset(n, *used_node_mask)) | 3292 | if (node_isset(n, *used_node_mask)) |
@@ -3337,7 +3428,7 @@ static int default_zonelist_order(void) | |||
3337 | * local memory, NODE_ORDER may be suitable. | 3428 | * local memory, NODE_ORDER may be suitable. |
3338 | */ | 3429 | */ |
3339 | average_size = total_size / | 3430 | average_size = total_size / |
3340 | (nodes_weight(node_states[N_HIGH_MEMORY]) + 1); | 3431 | (nodes_weight(node_states[N_MEMORY]) + 1); |
3341 | for_each_online_node(nid) { | 3432 | for_each_online_node(nid) { |
3342 | low_kmem_size = 0; | 3433 | low_kmem_size = 0; |
3343 | total_size = 0; | 3434 | total_size = 0; |
@@ -3827,6 +3918,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
3827 | mminit_verify_page_links(page, zone, nid, pfn); | 3918 | mminit_verify_page_links(page, zone, nid, pfn); |
3828 | init_page_count(page); | 3919 | init_page_count(page); |
3829 | reset_page_mapcount(page); | 3920 | reset_page_mapcount(page); |
3921 | reset_page_last_nid(page); | ||
3830 | SetPageReserved(page); | 3922 | SetPageReserved(page); |
3831 | /* | 3923 | /* |
3832 | * Mark the block movable so that blocks are reserved for | 3924 | * Mark the block movable so that blocks are reserved for |
@@ -4433,6 +4525,26 @@ void __init set_pageblock_order(void) | |||
4433 | 4525 | ||
4434 | #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ | 4526 | #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ |
4435 | 4527 | ||
4528 | static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages, | ||
4529 | unsigned long present_pages) | ||
4530 | { | ||
4531 | unsigned long pages = spanned_pages; | ||
4532 | |||
4533 | /* | ||
4534 | * Provide a more accurate estimation if there are holes within | ||
4535 | * the zone and SPARSEMEM is in use. If there are holes within the | ||
4536 | * zone, each populated memory region may cost us one or two extra | ||
4537 | * memmap pages due to alignment because memmap pages for each | ||
4538 | * populated regions may not naturally algined on page boundary. | ||
4539 | * So the (present_pages >> 4) heuristic is a tradeoff for that. | ||
4540 | */ | ||
4541 | if (spanned_pages > present_pages + (present_pages >> 4) && | ||
4542 | IS_ENABLED(CONFIG_SPARSEMEM)) | ||
4543 | pages = present_pages; | ||
4544 | |||
4545 | return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT; | ||
4546 | } | ||
4547 | |||
4436 | /* | 4548 | /* |
4437 | * Set up the zone data structures: | 4549 | * Set up the zone data structures: |
4438 | * - mark all pages reserved | 4550 | * - mark all pages reserved |
@@ -4450,54 +4562,67 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4450 | int ret; | 4562 | int ret; |
4451 | 4563 | ||
4452 | pgdat_resize_init(pgdat); | 4564 | pgdat_resize_init(pgdat); |
4565 | #ifdef CONFIG_NUMA_BALANCING | ||
4566 | spin_lock_init(&pgdat->numabalancing_migrate_lock); | ||
4567 | pgdat->numabalancing_migrate_nr_pages = 0; | ||
4568 | pgdat->numabalancing_migrate_next_window = jiffies; | ||
4569 | #endif | ||
4453 | init_waitqueue_head(&pgdat->kswapd_wait); | 4570 | init_waitqueue_head(&pgdat->kswapd_wait); |
4454 | init_waitqueue_head(&pgdat->pfmemalloc_wait); | 4571 | init_waitqueue_head(&pgdat->pfmemalloc_wait); |
4455 | pgdat_page_cgroup_init(pgdat); | 4572 | pgdat_page_cgroup_init(pgdat); |
4456 | 4573 | ||
4457 | for (j = 0; j < MAX_NR_ZONES; j++) { | 4574 | for (j = 0; j < MAX_NR_ZONES; j++) { |
4458 | struct zone *zone = pgdat->node_zones + j; | 4575 | struct zone *zone = pgdat->node_zones + j; |
4459 | unsigned long size, realsize, memmap_pages; | 4576 | unsigned long size, realsize, freesize, memmap_pages; |
4460 | 4577 | ||
4461 | size = zone_spanned_pages_in_node(nid, j, zones_size); | 4578 | size = zone_spanned_pages_in_node(nid, j, zones_size); |
4462 | realsize = size - zone_absent_pages_in_node(nid, j, | 4579 | realsize = freesize = size - zone_absent_pages_in_node(nid, j, |
4463 | zholes_size); | 4580 | zholes_size); |
4464 | 4581 | ||
4465 | /* | 4582 | /* |
4466 | * Adjust realsize so that it accounts for how much memory | 4583 | * Adjust freesize so that it accounts for how much memory |
4467 | * is used by this zone for memmap. This affects the watermark | 4584 | * is used by this zone for memmap. This affects the watermark |
4468 | * and per-cpu initialisations | 4585 | * and per-cpu initialisations |
4469 | */ | 4586 | */ |
4470 | memmap_pages = | 4587 | memmap_pages = calc_memmap_size(size, realsize); |
4471 | PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; | 4588 | if (freesize >= memmap_pages) { |
4472 | if (realsize >= memmap_pages) { | 4589 | freesize -= memmap_pages; |
4473 | realsize -= memmap_pages; | ||
4474 | if (memmap_pages) | 4590 | if (memmap_pages) |
4475 | printk(KERN_DEBUG | 4591 | printk(KERN_DEBUG |
4476 | " %s zone: %lu pages used for memmap\n", | 4592 | " %s zone: %lu pages used for memmap\n", |
4477 | zone_names[j], memmap_pages); | 4593 | zone_names[j], memmap_pages); |
4478 | } else | 4594 | } else |
4479 | printk(KERN_WARNING | 4595 | printk(KERN_WARNING |
4480 | " %s zone: %lu pages exceeds realsize %lu\n", | 4596 | " %s zone: %lu pages exceeds freesize %lu\n", |
4481 | zone_names[j], memmap_pages, realsize); | 4597 | zone_names[j], memmap_pages, freesize); |
4482 | 4598 | ||
4483 | /* Account for reserved pages */ | 4599 | /* Account for reserved pages */ |
4484 | if (j == 0 && realsize > dma_reserve) { | 4600 | if (j == 0 && freesize > dma_reserve) { |
4485 | realsize -= dma_reserve; | 4601 | freesize -= dma_reserve; |
4486 | printk(KERN_DEBUG " %s zone: %lu pages reserved\n", | 4602 | printk(KERN_DEBUG " %s zone: %lu pages reserved\n", |
4487 | zone_names[0], dma_reserve); | 4603 | zone_names[0], dma_reserve); |
4488 | } | 4604 | } |
4489 | 4605 | ||
4490 | if (!is_highmem_idx(j)) | 4606 | if (!is_highmem_idx(j)) |
4491 | nr_kernel_pages += realsize; | 4607 | nr_kernel_pages += freesize; |
4492 | nr_all_pages += realsize; | 4608 | /* Charge for highmem memmap if there are enough kernel pages */ |
4609 | else if (nr_kernel_pages > memmap_pages * 2) | ||
4610 | nr_kernel_pages -= memmap_pages; | ||
4611 | nr_all_pages += freesize; | ||
4493 | 4612 | ||
4494 | zone->spanned_pages = size; | 4613 | zone->spanned_pages = size; |
4495 | zone->present_pages = realsize; | 4614 | zone->present_pages = freesize; |
4615 | /* | ||
4616 | * Set an approximate value for lowmem here, it will be adjusted | ||
4617 | * when the bootmem allocator frees pages into the buddy system. | ||
4618 | * And all highmem pages will be managed by the buddy system. | ||
4619 | */ | ||
4620 | zone->managed_pages = is_highmem_idx(j) ? realsize : freesize; | ||
4496 | #ifdef CONFIG_NUMA | 4621 | #ifdef CONFIG_NUMA |
4497 | zone->node = nid; | 4622 | zone->node = nid; |
4498 | zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) | 4623 | zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio) |
4499 | / 100; | 4624 | / 100; |
4500 | zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; | 4625 | zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100; |
4501 | #endif | 4626 | #endif |
4502 | zone->name = zone_names[j]; | 4627 | zone->name = zone_names[j]; |
4503 | spin_lock_init(&zone->lock); | 4628 | spin_lock_init(&zone->lock); |
@@ -4688,7 +4813,7 @@ unsigned long __init find_min_pfn_with_active_regions(void) | |||
4688 | /* | 4813 | /* |
4689 | * early_calculate_totalpages() | 4814 | * early_calculate_totalpages() |
4690 | * Sum pages in active regions for movable zone. | 4815 | * Sum pages in active regions for movable zone. |
4691 | * Populate N_HIGH_MEMORY for calculating usable_nodes. | 4816 | * Populate N_MEMORY for calculating usable_nodes. |
4692 | */ | 4817 | */ |
4693 | static unsigned long __init early_calculate_totalpages(void) | 4818 | static unsigned long __init early_calculate_totalpages(void) |
4694 | { | 4819 | { |
@@ -4701,7 +4826,7 @@ static unsigned long __init early_calculate_totalpages(void) | |||
4701 | 4826 | ||
4702 | totalpages += pages; | 4827 | totalpages += pages; |
4703 | if (pages) | 4828 | if (pages) |
4704 | node_set_state(nid, N_HIGH_MEMORY); | 4829 | node_set_state(nid, N_MEMORY); |
4705 | } | 4830 | } |
4706 | return totalpages; | 4831 | return totalpages; |
4707 | } | 4832 | } |
@@ -4718,9 +4843,9 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
4718 | unsigned long usable_startpfn; | 4843 | unsigned long usable_startpfn; |
4719 | unsigned long kernelcore_node, kernelcore_remaining; | 4844 | unsigned long kernelcore_node, kernelcore_remaining; |
4720 | /* save the state before borrow the nodemask */ | 4845 | /* save the state before borrow the nodemask */ |
4721 | nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; | 4846 | nodemask_t saved_node_state = node_states[N_MEMORY]; |
4722 | unsigned long totalpages = early_calculate_totalpages(); | 4847 | unsigned long totalpages = early_calculate_totalpages(); |
4723 | int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); | 4848 | int usable_nodes = nodes_weight(node_states[N_MEMORY]); |
4724 | 4849 | ||
4725 | /* | 4850 | /* |
4726 | * If movablecore was specified, calculate what size of | 4851 | * If movablecore was specified, calculate what size of |
@@ -4755,7 +4880,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
4755 | restart: | 4880 | restart: |
4756 | /* Spread kernelcore memory as evenly as possible throughout nodes */ | 4881 | /* Spread kernelcore memory as evenly as possible throughout nodes */ |
4757 | kernelcore_node = required_kernelcore / usable_nodes; | 4882 | kernelcore_node = required_kernelcore / usable_nodes; |
4758 | for_each_node_state(nid, N_HIGH_MEMORY) { | 4883 | for_each_node_state(nid, N_MEMORY) { |
4759 | unsigned long start_pfn, end_pfn; | 4884 | unsigned long start_pfn, end_pfn; |
4760 | 4885 | ||
4761 | /* | 4886 | /* |
@@ -4847,23 +4972,27 @@ restart: | |||
4847 | 4972 | ||
4848 | out: | 4973 | out: |
4849 | /* restore the node_state */ | 4974 | /* restore the node_state */ |
4850 | node_states[N_HIGH_MEMORY] = saved_node_state; | 4975 | node_states[N_MEMORY] = saved_node_state; |
4851 | } | 4976 | } |
4852 | 4977 | ||
4853 | /* Any regular memory on that node ? */ | 4978 | /* Any regular or high memory on that node ? */ |
4854 | static void __init check_for_regular_memory(pg_data_t *pgdat) | 4979 | static void check_for_memory(pg_data_t *pgdat, int nid) |
4855 | { | 4980 | { |
4856 | #ifdef CONFIG_HIGHMEM | ||
4857 | enum zone_type zone_type; | 4981 | enum zone_type zone_type; |
4858 | 4982 | ||
4859 | for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { | 4983 | if (N_MEMORY == N_NORMAL_MEMORY) |
4984 | return; | ||
4985 | |||
4986 | for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) { | ||
4860 | struct zone *zone = &pgdat->node_zones[zone_type]; | 4987 | struct zone *zone = &pgdat->node_zones[zone_type]; |
4861 | if (zone->present_pages) { | 4988 | if (zone->present_pages) { |
4862 | node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); | 4989 | node_set_state(nid, N_HIGH_MEMORY); |
4990 | if (N_NORMAL_MEMORY != N_HIGH_MEMORY && | ||
4991 | zone_type <= ZONE_NORMAL) | ||
4992 | node_set_state(nid, N_NORMAL_MEMORY); | ||
4863 | break; | 4993 | break; |
4864 | } | 4994 | } |
4865 | } | 4995 | } |
4866 | #endif | ||
4867 | } | 4996 | } |
4868 | 4997 | ||
4869 | /** | 4998 | /** |
@@ -4946,8 +5075,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
4946 | 5075 | ||
4947 | /* Any memory on that node */ | 5076 | /* Any memory on that node */ |
4948 | if (pgdat->node_present_pages) | 5077 | if (pgdat->node_present_pages) |
4949 | node_set_state(nid, N_HIGH_MEMORY); | 5078 | node_set_state(nid, N_MEMORY); |
4950 | check_for_regular_memory(pgdat); | 5079 | check_for_memory(pgdat, nid); |
4951 | } | 5080 | } |
4952 | } | 5081 | } |
4953 | 5082 | ||
@@ -5175,10 +5304,6 @@ static void __setup_per_zone_wmarks(void) | |||
5175 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); | 5304 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); |
5176 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); | 5305 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); |
5177 | 5306 | ||
5178 | zone->watermark[WMARK_MIN] += cma_wmark_pages(zone); | ||
5179 | zone->watermark[WMARK_LOW] += cma_wmark_pages(zone); | ||
5180 | zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone); | ||
5181 | |||
5182 | setup_zone_migrate_reserve(zone); | 5307 | setup_zone_migrate_reserve(zone); |
5183 | spin_unlock_irqrestore(&zone->lock, flags); | 5308 | spin_unlock_irqrestore(&zone->lock, flags); |
5184 | } | 5309 | } |
@@ -5576,7 +5701,8 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, | |||
5576 | * MIGRATE_MOVABLE block might include unmovable pages. It means you can't | 5701 | * MIGRATE_MOVABLE block might include unmovable pages. It means you can't |
5577 | * expect this function should be exact. | 5702 | * expect this function should be exact. |
5578 | */ | 5703 | */ |
5579 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count) | 5704 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, |
5705 | bool skip_hwpoisoned_pages) | ||
5580 | { | 5706 | { |
5581 | unsigned long pfn, iter, found; | 5707 | unsigned long pfn, iter, found; |
5582 | int mt; | 5708 | int mt; |
@@ -5611,6 +5737,13 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count) | |||
5611 | continue; | 5737 | continue; |
5612 | } | 5738 | } |
5613 | 5739 | ||
5740 | /* | ||
5741 | * The HWPoisoned page may be not in buddy system, and | ||
5742 | * page_count() is not 0. | ||
5743 | */ | ||
5744 | if (skip_hwpoisoned_pages && PageHWPoison(page)) | ||
5745 | continue; | ||
5746 | |||
5614 | if (!PageLRU(page)) | 5747 | if (!PageLRU(page)) |
5615 | found++; | 5748 | found++; |
5616 | /* | 5749 | /* |
@@ -5653,7 +5786,7 @@ bool is_pageblock_removable_nolock(struct page *page) | |||
5653 | zone->zone_start_pfn + zone->spanned_pages <= pfn) | 5786 | zone->zone_start_pfn + zone->spanned_pages <= pfn) |
5654 | return false; | 5787 | return false; |
5655 | 5788 | ||
5656 | return !has_unmovable_pages(zone, page, 0); | 5789 | return !has_unmovable_pages(zone, page, 0, true); |
5657 | } | 5790 | } |
5658 | 5791 | ||
5659 | #ifdef CONFIG_CMA | 5792 | #ifdef CONFIG_CMA |
@@ -5680,7 +5813,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, | |||
5680 | unsigned int tries = 0; | 5813 | unsigned int tries = 0; |
5681 | int ret = 0; | 5814 | int ret = 0; |
5682 | 5815 | ||
5683 | migrate_prep_local(); | 5816 | migrate_prep(); |
5684 | 5817 | ||
5685 | while (pfn < end || !list_empty(&cc->migratepages)) { | 5818 | while (pfn < end || !list_empty(&cc->migratepages)) { |
5686 | if (fatal_signal_pending(current)) { | 5819 | if (fatal_signal_pending(current)) { |
@@ -5708,61 +5841,14 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, | |||
5708 | 5841 | ||
5709 | ret = migrate_pages(&cc->migratepages, | 5842 | ret = migrate_pages(&cc->migratepages, |
5710 | alloc_migrate_target, | 5843 | alloc_migrate_target, |
5711 | 0, false, MIGRATE_SYNC); | 5844 | 0, false, MIGRATE_SYNC, |
5845 | MR_CMA); | ||
5712 | } | 5846 | } |
5713 | 5847 | ||
5714 | putback_lru_pages(&cc->migratepages); | 5848 | putback_movable_pages(&cc->migratepages); |
5715 | return ret > 0 ? 0 : ret; | 5849 | return ret > 0 ? 0 : ret; |
5716 | } | 5850 | } |
5717 | 5851 | ||
5718 | /* | ||
5719 | * Update zone's cma pages counter used for watermark level calculation. | ||
5720 | */ | ||
5721 | static inline void __update_cma_watermarks(struct zone *zone, int count) | ||
5722 | { | ||
5723 | unsigned long flags; | ||
5724 | spin_lock_irqsave(&zone->lock, flags); | ||
5725 | zone->min_cma_pages += count; | ||
5726 | spin_unlock_irqrestore(&zone->lock, flags); | ||
5727 | setup_per_zone_wmarks(); | ||
5728 | } | ||
5729 | |||
5730 | /* | ||
5731 | * Trigger memory pressure bump to reclaim some pages in order to be able to | ||
5732 | * allocate 'count' pages in single page units. Does similar work as | ||
5733 | *__alloc_pages_slowpath() function. | ||
5734 | */ | ||
5735 | static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) | ||
5736 | { | ||
5737 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | ||
5738 | struct zonelist *zonelist = node_zonelist(0, gfp_mask); | ||
5739 | int did_some_progress = 0; | ||
5740 | int order = 1; | ||
5741 | |||
5742 | /* | ||
5743 | * Increase level of watermarks to force kswapd do his job | ||
5744 | * to stabilise at new watermark level. | ||
5745 | */ | ||
5746 | __update_cma_watermarks(zone, count); | ||
5747 | |||
5748 | /* Obey watermarks as if the page was being allocated */ | ||
5749 | while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) { | ||
5750 | wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); | ||
5751 | |||
5752 | did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, | ||
5753 | NULL); | ||
5754 | if (!did_some_progress) { | ||
5755 | /* Exhausted what can be done so it's blamo time */ | ||
5756 | out_of_memory(zonelist, gfp_mask, order, NULL, false); | ||
5757 | } | ||
5758 | } | ||
5759 | |||
5760 | /* Restore original watermark levels. */ | ||
5761 | __update_cma_watermarks(zone, -count); | ||
5762 | |||
5763 | return count; | ||
5764 | } | ||
5765 | |||
5766 | /** | 5852 | /** |
5767 | * alloc_contig_range() -- tries to allocate given range of pages | 5853 | * alloc_contig_range() -- tries to allocate given range of pages |
5768 | * @start: start PFN to allocate | 5854 | * @start: start PFN to allocate |
@@ -5786,7 +5872,6 @@ static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) | |||
5786 | int alloc_contig_range(unsigned long start, unsigned long end, | 5872 | int alloc_contig_range(unsigned long start, unsigned long end, |
5787 | unsigned migratetype) | 5873 | unsigned migratetype) |
5788 | { | 5874 | { |
5789 | struct zone *zone = page_zone(pfn_to_page(start)); | ||
5790 | unsigned long outer_start, outer_end; | 5875 | unsigned long outer_start, outer_end; |
5791 | int ret = 0, order; | 5876 | int ret = 0, order; |
5792 | 5877 | ||
@@ -5824,7 +5909,8 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
5824 | */ | 5909 | */ |
5825 | 5910 | ||
5826 | ret = start_isolate_page_range(pfn_max_align_down(start), | 5911 | ret = start_isolate_page_range(pfn_max_align_down(start), |
5827 | pfn_max_align_up(end), migratetype); | 5912 | pfn_max_align_up(end), migratetype, |
5913 | false); | ||
5828 | if (ret) | 5914 | if (ret) |
5829 | return ret; | 5915 | return ret; |
5830 | 5916 | ||
@@ -5863,18 +5949,13 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
5863 | } | 5949 | } |
5864 | 5950 | ||
5865 | /* Make sure the range is really isolated. */ | 5951 | /* Make sure the range is really isolated. */ |
5866 | if (test_pages_isolated(outer_start, end)) { | 5952 | if (test_pages_isolated(outer_start, end, false)) { |
5867 | pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", | 5953 | pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", |
5868 | outer_start, end); | 5954 | outer_start, end); |
5869 | ret = -EBUSY; | 5955 | ret = -EBUSY; |
5870 | goto done; | 5956 | goto done; |
5871 | } | 5957 | } |
5872 | 5958 | ||
5873 | /* | ||
5874 | * Reclaim enough pages to make sure that contiguous allocation | ||
5875 | * will not starve the system. | ||
5876 | */ | ||
5877 | __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); | ||
5878 | 5959 | ||
5879 | /* Grab isolated pages from freelists. */ | 5960 | /* Grab isolated pages from freelists. */ |
5880 | outer_end = isolate_freepages_range(&cc, outer_start, end); | 5961 | outer_end = isolate_freepages_range(&cc, outer_start, end); |
@@ -5897,8 +5978,15 @@ done: | |||
5897 | 5978 | ||
5898 | void free_contig_range(unsigned long pfn, unsigned nr_pages) | 5979 | void free_contig_range(unsigned long pfn, unsigned nr_pages) |
5899 | { | 5980 | { |
5900 | for (; nr_pages--; ++pfn) | 5981 | unsigned int count = 0; |
5901 | __free_page(pfn_to_page(pfn)); | 5982 | |
5983 | for (; nr_pages--; pfn++) { | ||
5984 | struct page *page = pfn_to_page(pfn); | ||
5985 | |||
5986 | count += page_count(page) != 1; | ||
5987 | __free_page(page); | ||
5988 | } | ||
5989 | WARN(count != 0, "%d pages are still in use!\n", count); | ||
5902 | } | 5990 | } |
5903 | #endif | 5991 | #endif |
5904 | 5992 | ||
@@ -5932,7 +6020,6 @@ void __meminit zone_pcp_update(struct zone *zone) | |||
5932 | } | 6020 | } |
5933 | #endif | 6021 | #endif |
5934 | 6022 | ||
5935 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
5936 | void zone_pcp_reset(struct zone *zone) | 6023 | void zone_pcp_reset(struct zone *zone) |
5937 | { | 6024 | { |
5938 | unsigned long flags; | 6025 | unsigned long flags; |
@@ -5952,6 +6039,7 @@ void zone_pcp_reset(struct zone *zone) | |||
5952 | local_irq_restore(flags); | 6039 | local_irq_restore(flags); |
5953 | } | 6040 | } |
5954 | 6041 | ||
6042 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
5955 | /* | 6043 | /* |
5956 | * All pages in the range must be isolated before calling this. | 6044 | * All pages in the range must be isolated before calling this. |
5957 | */ | 6045 | */ |
@@ -5978,6 +6066,16 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) | |||
5978 | continue; | 6066 | continue; |
5979 | } | 6067 | } |
5980 | page = pfn_to_page(pfn); | 6068 | page = pfn_to_page(pfn); |
6069 | /* | ||
6070 | * The HWPoisoned page may be not in buddy system, and | ||
6071 | * page_count() is not 0. | ||
6072 | */ | ||
6073 | if (unlikely(!PageBuddy(page) && PageHWPoison(page))) { | ||
6074 | pfn++; | ||
6075 | SetPageReserved(page); | ||
6076 | continue; | ||
6077 | } | ||
6078 | |||
5981 | BUG_ON(page_count(page)); | 6079 | BUG_ON(page_count(page)); |
5982 | BUG_ON(!PageBuddy(page)); | 6080 | BUG_ON(!PageBuddy(page)); |
5983 | order = page_order(page); | 6081 | order = page_order(page); |
@@ -5988,8 +6086,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) | |||
5988 | list_del(&page->lru); | 6086 | list_del(&page->lru); |
5989 | rmv_page_order(page); | 6087 | rmv_page_order(page); |
5990 | zone->free_area[order].nr_free--; | 6088 | zone->free_area[order].nr_free--; |
5991 | __mod_zone_page_state(zone, NR_FREE_PAGES, | ||
5992 | - (1UL << order)); | ||
5993 | for (i = 0; i < (1 << order); i++) | 6089 | for (i = 0; i < (1 << order); i++) |
5994 | SetPageReserved((page+i)); | 6090 | SetPageReserved((page+i)); |
5995 | pfn += (1 << order); | 6091 | pfn += (1 << order); |