aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c498
1 files changed, 395 insertions, 103 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bc6cc0e913bd..0dade3f18f7d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -58,6 +58,7 @@
58#include <linux/prefetch.h> 58#include <linux/prefetch.h>
59#include <linux/migrate.h> 59#include <linux/migrate.h>
60#include <linux/page-debug-flags.h> 60#include <linux/page-debug-flags.h>
61#include <linux/sched/rt.h>
61 62
62#include <asm/tlbflush.h> 63#include <asm/tlbflush.h>
63#include <asm/div64.h> 64#include <asm/div64.h>
@@ -201,11 +202,18 @@ static unsigned long __meminitdata nr_all_pages;
201static unsigned long __meminitdata dma_reserve; 202static unsigned long __meminitdata dma_reserve;
202 203
203#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 204#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
205/* Movable memory ranges, will also be used by memblock subsystem. */
206struct movablemem_map movablemem_map = {
207 .acpi = false,
208 .nr_map = 0,
209};
210
204static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; 211static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
205static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; 212static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
206static unsigned long __initdata required_kernelcore; 213static unsigned long __initdata required_kernelcore;
207static unsigned long __initdata required_movablecore; 214static unsigned long __initdata required_movablecore;
208static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; 215static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
216static unsigned long __meminitdata zone_movable_limit[MAX_NUMNODES];
209 217
210/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ 218/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
211int movable_zone; 219int movable_zone;
@@ -239,15 +247,20 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
239 int ret = 0; 247 int ret = 0;
240 unsigned seq; 248 unsigned seq;
241 unsigned long pfn = page_to_pfn(page); 249 unsigned long pfn = page_to_pfn(page);
250 unsigned long sp, start_pfn;
242 251
243 do { 252 do {
244 seq = zone_span_seqbegin(zone); 253 seq = zone_span_seqbegin(zone);
245 if (pfn >= zone->zone_start_pfn + zone->spanned_pages) 254 start_pfn = zone->zone_start_pfn;
246 ret = 1; 255 sp = zone->spanned_pages;
247 else if (pfn < zone->zone_start_pfn) 256 if (!zone_spans_pfn(zone, pfn))
248 ret = 1; 257 ret = 1;
249 } while (zone_span_seqretry(zone, seq)); 258 } while (zone_span_seqretry(zone, seq));
250 259
260 if (ret)
261 pr_err("page %lu outside zone [ %lu - %lu ]\n",
262 pfn, start_pfn, start_pfn + sp);
263
251 return ret; 264 return ret;
252} 265}
253 266
@@ -287,7 +300,7 @@ static void bad_page(struct page *page)
287 300
288 /* Don't complain about poisoned pages */ 301 /* Don't complain about poisoned pages */
289 if (PageHWPoison(page)) { 302 if (PageHWPoison(page)) {
290 reset_page_mapcount(page); /* remove PageBuddy */ 303 page_mapcount_reset(page); /* remove PageBuddy */
291 return; 304 return;
292 } 305 }
293 306
@@ -319,8 +332,8 @@ static void bad_page(struct page *page)
319 dump_stack(); 332 dump_stack();
320out: 333out:
321 /* Leave bad fields for debug, except PageBuddy could make trouble */ 334 /* Leave bad fields for debug, except PageBuddy could make trouble */
322 reset_page_mapcount(page); /* remove PageBuddy */ 335 page_mapcount_reset(page); /* remove PageBuddy */
323 add_taint(TAINT_BAD_PAGE); 336 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
324} 337}
325 338
326/* 339/*
@@ -532,6 +545,8 @@ static inline void __free_one_page(struct page *page,
532 unsigned long uninitialized_var(buddy_idx); 545 unsigned long uninitialized_var(buddy_idx);
533 struct page *buddy; 546 struct page *buddy;
534 547
548 VM_BUG_ON(!zone_is_initialized(zone));
549
535 if (unlikely(PageCompound(page))) 550 if (unlikely(PageCompound(page)))
536 if (unlikely(destroy_compound_page(page, order))) 551 if (unlikely(destroy_compound_page(page, order)))
537 return; 552 return;
@@ -605,7 +620,7 @@ static inline int free_pages_check(struct page *page)
605 bad_page(page); 620 bad_page(page);
606 return 1; 621 return 1;
607 } 622 }
608 reset_page_last_nid(page); 623 page_nid_reset_last(page);
609 if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) 624 if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
610 page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; 625 page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
611 return 0; 626 return 0;
@@ -665,7 +680,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
665 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ 680 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
666 __free_one_page(page, zone, 0, mt); 681 __free_one_page(page, zone, 0, mt);
667 trace_mm_page_pcpu_drain(page, 0, mt); 682 trace_mm_page_pcpu_drain(page, 0, mt);
668 if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) { 683 if (likely(!is_migrate_isolate_page(page))) {
669 __mod_zone_page_state(zone, NR_FREE_PAGES, 1); 684 __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
670 if (is_migrate_cma(mt)) 685 if (is_migrate_cma(mt))
671 __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); 686 __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
@@ -683,7 +698,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
683 zone->pages_scanned = 0; 698 zone->pages_scanned = 0;
684 699
685 __free_one_page(page, zone, order, migratetype); 700 __free_one_page(page, zone, order, migratetype);
686 if (unlikely(migratetype != MIGRATE_ISOLATE)) 701 if (unlikely(!is_migrate_isolate(migratetype)))
687 __mod_zone_freepage_state(zone, 1 << order, migratetype); 702 __mod_zone_freepage_state(zone, 1 << order, migratetype);
688 spin_unlock(&zone->lock); 703 spin_unlock(&zone->lock);
689} 704}
@@ -773,6 +788,10 @@ void __init init_cma_reserved_pageblock(struct page *page)
773 set_pageblock_migratetype(page, MIGRATE_CMA); 788 set_pageblock_migratetype(page, MIGRATE_CMA);
774 __free_pages(page, pageblock_order); 789 __free_pages(page, pageblock_order);
775 totalram_pages += pageblock_nr_pages; 790 totalram_pages += pageblock_nr_pages;
791#ifdef CONFIG_HIGHMEM
792 if (PageHighMem(page))
793 totalhigh_pages += pageblock_nr_pages;
794#endif
776} 795}
777#endif 796#endif
778 797
@@ -911,7 +930,9 @@ static int fallbacks[MIGRATE_TYPES][4] = {
911 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, 930 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
912#endif 931#endif
913 [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ 932 [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */
933#ifdef CONFIG_MEMORY_ISOLATION
914 [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ 934 [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */
935#endif
915}; 936};
916 937
917/* 938/*
@@ -976,9 +997,9 @@ int move_freepages_block(struct zone *zone, struct page *page,
976 end_pfn = start_pfn + pageblock_nr_pages - 1; 997 end_pfn = start_pfn + pageblock_nr_pages - 1;
977 998
978 /* Do not cross zone boundaries */ 999 /* Do not cross zone boundaries */
979 if (start_pfn < zone->zone_start_pfn) 1000 if (!zone_spans_pfn(zone, start_pfn))
980 start_page = page; 1001 start_page = page;
981 if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages) 1002 if (!zone_spans_pfn(zone, end_pfn))
982 return 0; 1003 return 0;
983 1004
984 return move_freepages(zone, start_page, end_page, migratetype); 1005 return move_freepages(zone, start_page, end_page, migratetype);
@@ -1137,7 +1158,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
1137 list_add_tail(&page->lru, list); 1158 list_add_tail(&page->lru, list);
1138 if (IS_ENABLED(CONFIG_CMA)) { 1159 if (IS_ENABLED(CONFIG_CMA)) {
1139 mt = get_pageblock_migratetype(page); 1160 mt = get_pageblock_migratetype(page);
1140 if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) 1161 if (!is_migrate_cma(mt) && !is_migrate_isolate(mt))
1141 mt = migratetype; 1162 mt = migratetype;
1142 } 1163 }
1143 set_freepage_migratetype(page, mt); 1164 set_freepage_migratetype(page, mt);
@@ -1272,7 +1293,7 @@ void mark_free_pages(struct zone *zone)
1272 1293
1273 spin_lock_irqsave(&zone->lock, flags); 1294 spin_lock_irqsave(&zone->lock, flags);
1274 1295
1275 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 1296 max_zone_pfn = zone_end_pfn(zone);
1276 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1297 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1277 if (pfn_valid(pfn)) { 1298 if (pfn_valid(pfn)) {
1278 struct page *page = pfn_to_page(pfn); 1299 struct page *page = pfn_to_page(pfn);
@@ -1321,7 +1342,7 @@ void free_hot_cold_page(struct page *page, int cold)
1321 * excessively into the page allocator 1342 * excessively into the page allocator
1322 */ 1343 */
1323 if (migratetype >= MIGRATE_PCPTYPES) { 1344 if (migratetype >= MIGRATE_PCPTYPES) {
1324 if (unlikely(migratetype == MIGRATE_ISOLATE)) { 1345 if (unlikely(is_migrate_isolate(migratetype))) {
1325 free_one_page(zone, page, 0, migratetype); 1346 free_one_page(zone, page, 0, migratetype);
1326 goto out; 1347 goto out;
1327 } 1348 }
@@ -1384,14 +1405,8 @@ void split_page(struct page *page, unsigned int order)
1384 set_page_refcounted(page + i); 1405 set_page_refcounted(page + i);
1385} 1406}
1386 1407
1387/* 1408static int __isolate_free_page(struct page *page, unsigned int order)
1388 * Similar to the split_page family of functions except that the page
1389 * required at the given order and being isolated now to prevent races
1390 * with parallel allocators
1391 */
1392int capture_free_page(struct page *page, int alloc_order, int migratetype)
1393{ 1409{
1394 unsigned int order;
1395 unsigned long watermark; 1410 unsigned long watermark;
1396 struct zone *zone; 1411 struct zone *zone;
1397 int mt; 1412 int mt;
@@ -1399,16 +1414,15 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1399 BUG_ON(!PageBuddy(page)); 1414 BUG_ON(!PageBuddy(page));
1400 1415
1401 zone = page_zone(page); 1416 zone = page_zone(page);
1402 order = page_order(page);
1403 mt = get_pageblock_migratetype(page); 1417 mt = get_pageblock_migratetype(page);
1404 1418
1405 if (mt != MIGRATE_ISOLATE) { 1419 if (!is_migrate_isolate(mt)) {
1406 /* Obey watermarks as if the page was being allocated */ 1420 /* Obey watermarks as if the page was being allocated */
1407 watermark = low_wmark_pages(zone) + (1 << order); 1421 watermark = low_wmark_pages(zone) + (1 << order);
1408 if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) 1422 if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
1409 return 0; 1423 return 0;
1410 1424
1411 __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); 1425 __mod_zone_freepage_state(zone, -(1UL << order), mt);
1412 } 1426 }
1413 1427
1414 /* Remove page from free list */ 1428 /* Remove page from free list */
@@ -1416,22 +1430,18 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
1416 zone->free_area[order].nr_free--; 1430 zone->free_area[order].nr_free--;
1417 rmv_page_order(page); 1431 rmv_page_order(page);
1418 1432
1419 if (alloc_order != order) 1433 /* Set the pageblock if the isolated page is at least a pageblock */
1420 expand(zone, page, alloc_order, order,
1421 &zone->free_area[order], migratetype);
1422
1423 /* Set the pageblock if the captured page is at least a pageblock */
1424 if (order >= pageblock_order - 1) { 1434 if (order >= pageblock_order - 1) {
1425 struct page *endpage = page + (1 << order) - 1; 1435 struct page *endpage = page + (1 << order) - 1;
1426 for (; page < endpage; page += pageblock_nr_pages) { 1436 for (; page < endpage; page += pageblock_nr_pages) {
1427 int mt = get_pageblock_migratetype(page); 1437 int mt = get_pageblock_migratetype(page);
1428 if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt)) 1438 if (!is_migrate_isolate(mt) && !is_migrate_cma(mt))
1429 set_pageblock_migratetype(page, 1439 set_pageblock_migratetype(page,
1430 MIGRATE_MOVABLE); 1440 MIGRATE_MOVABLE);
1431 } 1441 }
1432 } 1442 }
1433 1443
1434 return 1UL << alloc_order; 1444 return 1UL << order;
1435} 1445}
1436 1446
1437/* 1447/*
@@ -1449,10 +1459,9 @@ int split_free_page(struct page *page)
1449 unsigned int order; 1459 unsigned int order;
1450 int nr_pages; 1460 int nr_pages;
1451 1461
1452 BUG_ON(!PageBuddy(page));
1453 order = page_order(page); 1462 order = page_order(page);
1454 1463
1455 nr_pages = capture_free_page(page, order, 0); 1464 nr_pages = __isolate_free_page(page, order);
1456 if (!nr_pages) 1465 if (!nr_pages)
1457 return 0; 1466 return 0;
1458 1467
@@ -2136,8 +2145,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2136 bool *contended_compaction, bool *deferred_compaction, 2145 bool *contended_compaction, bool *deferred_compaction,
2137 unsigned long *did_some_progress) 2146 unsigned long *did_some_progress)
2138{ 2147{
2139 struct page *page = NULL;
2140
2141 if (!order) 2148 if (!order)
2142 return NULL; 2149 return NULL;
2143 2150
@@ -2149,16 +2156,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2149 current->flags |= PF_MEMALLOC; 2156 current->flags |= PF_MEMALLOC;
2150 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, 2157 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
2151 nodemask, sync_migration, 2158 nodemask, sync_migration,
2152 contended_compaction, &page); 2159 contended_compaction);
2153 current->flags &= ~PF_MEMALLOC; 2160 current->flags &= ~PF_MEMALLOC;
2154 2161
2155 /* If compaction captured a page, prep and use it */
2156 if (page) {
2157 prep_new_page(page, order, gfp_mask);
2158 goto got_page;
2159 }
2160
2161 if (*did_some_progress != COMPACT_SKIPPED) { 2162 if (*did_some_progress != COMPACT_SKIPPED) {
2163 struct page *page;
2164
2162 /* Page migration frees to the PCP lists but we want merging */ 2165 /* Page migration frees to the PCP lists but we want merging */
2163 drain_pages(get_cpu()); 2166 drain_pages(get_cpu());
2164 put_cpu(); 2167 put_cpu();
@@ -2168,7 +2171,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2168 alloc_flags & ~ALLOC_NO_WATERMARKS, 2171 alloc_flags & ~ALLOC_NO_WATERMARKS,
2169 preferred_zone, migratetype); 2172 preferred_zone, migratetype);
2170 if (page) { 2173 if (page) {
2171got_page:
2172 preferred_zone->compact_blockskip_flush = false; 2174 preferred_zone->compact_blockskip_flush = false;
2173 preferred_zone->compact_considered = 0; 2175 preferred_zone->compact_considered = 0;
2174 preferred_zone->compact_defer_shift = 0; 2176 preferred_zone->compact_defer_shift = 0;
@@ -2629,10 +2631,17 @@ retry_cpuset:
2629 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2631 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
2630 zonelist, high_zoneidx, alloc_flags, 2632 zonelist, high_zoneidx, alloc_flags,
2631 preferred_zone, migratetype); 2633 preferred_zone, migratetype);
2632 if (unlikely(!page)) 2634 if (unlikely(!page)) {
2635 /*
2636 * Runtime PM, block IO and its error handling path
2637 * can deadlock because I/O on the device might not
2638 * complete.
2639 */
2640 gfp_mask = memalloc_noio_flags(gfp_mask);
2633 page = __alloc_pages_slowpath(gfp_mask, order, 2641 page = __alloc_pages_slowpath(gfp_mask, order,
2634 zonelist, high_zoneidx, nodemask, 2642 zonelist, high_zoneidx, nodemask,
2635 preferred_zone, migratetype); 2643 preferred_zone, migratetype);
2644 }
2636 2645
2637 trace_mm_page_alloc(page, order, gfp_mask, migratetype); 2646 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
2638 2647
@@ -2804,18 +2813,27 @@ void free_pages_exact(void *virt, size_t size)
2804} 2813}
2805EXPORT_SYMBOL(free_pages_exact); 2814EXPORT_SYMBOL(free_pages_exact);
2806 2815
2807static unsigned int nr_free_zone_pages(int offset) 2816/**
2817 * nr_free_zone_pages - count number of pages beyond high watermark
2818 * @offset: The zone index of the highest zone
2819 *
2820 * nr_free_zone_pages() counts the number of counts pages which are beyond the
2821 * high watermark within all zones at or below a given zone index. For each
2822 * zone, the number of pages is calculated as:
2823 * present_pages - high_pages
2824 */
2825static unsigned long nr_free_zone_pages(int offset)
2808{ 2826{
2809 struct zoneref *z; 2827 struct zoneref *z;
2810 struct zone *zone; 2828 struct zone *zone;
2811 2829
2812 /* Just pick one node, since fallback list is circular */ 2830 /* Just pick one node, since fallback list is circular */
2813 unsigned int sum = 0; 2831 unsigned long sum = 0;
2814 2832
2815 struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); 2833 struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
2816 2834
2817 for_each_zone_zonelist(zone, z, zonelist, offset) { 2835 for_each_zone_zonelist(zone, z, zonelist, offset) {
2818 unsigned long size = zone->present_pages; 2836 unsigned long size = zone->managed_pages;
2819 unsigned long high = high_wmark_pages(zone); 2837 unsigned long high = high_wmark_pages(zone);
2820 if (size > high) 2838 if (size > high)
2821 sum += size - high; 2839 sum += size - high;
@@ -2824,19 +2842,25 @@ static unsigned int nr_free_zone_pages(int offset)
2824 return sum; 2842 return sum;
2825} 2843}
2826 2844
2827/* 2845/**
2828 * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL 2846 * nr_free_buffer_pages - count number of pages beyond high watermark
2847 *
2848 * nr_free_buffer_pages() counts the number of pages which are beyond the high
2849 * watermark within ZONE_DMA and ZONE_NORMAL.
2829 */ 2850 */
2830unsigned int nr_free_buffer_pages(void) 2851unsigned long nr_free_buffer_pages(void)
2831{ 2852{
2832 return nr_free_zone_pages(gfp_zone(GFP_USER)); 2853 return nr_free_zone_pages(gfp_zone(GFP_USER));
2833} 2854}
2834EXPORT_SYMBOL_GPL(nr_free_buffer_pages); 2855EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
2835 2856
2836/* 2857/**
2837 * Amount of free RAM allocatable within all zones 2858 * nr_free_pagecache_pages - count number of pages beyond high watermark
2859 *
2860 * nr_free_pagecache_pages() counts the number of pages which are beyond the
2861 * high watermark within all zones.
2838 */ 2862 */
2839unsigned int nr_free_pagecache_pages(void) 2863unsigned long nr_free_pagecache_pages(void)
2840{ 2864{
2841 return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE)); 2865 return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));
2842} 2866}
@@ -2868,7 +2892,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
2868 val->totalram = pgdat->node_present_pages; 2892 val->totalram = pgdat->node_present_pages;
2869 val->freeram = node_page_state(nid, NR_FREE_PAGES); 2893 val->freeram = node_page_state(nid, NR_FREE_PAGES);
2870#ifdef CONFIG_HIGHMEM 2894#ifdef CONFIG_HIGHMEM
2871 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; 2895 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
2872 val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM], 2896 val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
2873 NR_FREE_PAGES); 2897 NR_FREE_PAGES);
2874#else 2898#else
@@ -2911,7 +2935,9 @@ static void show_migration_types(unsigned char type)
2911#ifdef CONFIG_CMA 2935#ifdef CONFIG_CMA
2912 [MIGRATE_CMA] = 'C', 2936 [MIGRATE_CMA] = 'C',
2913#endif 2937#endif
2938#ifdef CONFIG_MEMORY_ISOLATION
2914 [MIGRATE_ISOLATE] = 'I', 2939 [MIGRATE_ISOLATE] = 'I',
2940#endif
2915 }; 2941 };
2916 char tmp[MIGRATE_TYPES + 1]; 2942 char tmp[MIGRATE_TYPES + 1];
2917 char *p = tmp; 2943 char *p = tmp;
@@ -3250,7 +3276,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
3250{ 3276{
3251 int n, val; 3277 int n, val;
3252 int min_val = INT_MAX; 3278 int min_val = INT_MAX;
3253 int best_node = -1; 3279 int best_node = NUMA_NO_NODE;
3254 const struct cpumask *tmp = cpumask_of_node(0); 3280 const struct cpumask *tmp = cpumask_of_node(0);
3255 3281
3256 /* Use the local node if we haven't already */ 3282 /* Use the local node if we haven't already */
@@ -3794,7 +3820,7 @@ static void setup_zone_migrate_reserve(struct zone *zone)
3794 * the block. 3820 * the block.
3795 */ 3821 */
3796 start_pfn = zone->zone_start_pfn; 3822 start_pfn = zone->zone_start_pfn;
3797 end_pfn = start_pfn + zone->spanned_pages; 3823 end_pfn = zone_end_pfn(zone);
3798 start_pfn = roundup(start_pfn, pageblock_nr_pages); 3824 start_pfn = roundup(start_pfn, pageblock_nr_pages);
3799 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> 3825 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
3800 pageblock_order; 3826 pageblock_order;
@@ -3890,8 +3916,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
3890 set_page_links(page, zone, nid, pfn); 3916 set_page_links(page, zone, nid, pfn);
3891 mminit_verify_page_links(page, zone, nid, pfn); 3917 mminit_verify_page_links(page, zone, nid, pfn);
3892 init_page_count(page); 3918 init_page_count(page);
3893 reset_page_mapcount(page); 3919 page_mapcount_reset(page);
3894 reset_page_last_nid(page); 3920 page_nid_reset_last(page);
3895 SetPageReserved(page); 3921 SetPageReserved(page);
3896 /* 3922 /*
3897 * Mark the block movable so that blocks are reserved for 3923 * Mark the block movable so that blocks are reserved for
@@ -3908,7 +3934,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
3908 * pfn out of zone. 3934 * pfn out of zone.
3909 */ 3935 */
3910 if ((z->zone_start_pfn <= pfn) 3936 if ((z->zone_start_pfn <= pfn)
3911 && (pfn < z->zone_start_pfn + z->spanned_pages) 3937 && (pfn < zone_end_pfn(z))
3912 && !(pfn & (pageblock_nr_pages - 1))) 3938 && !(pfn & (pageblock_nr_pages - 1)))
3913 set_pageblock_migratetype(page, MIGRATE_MOVABLE); 3939 set_pageblock_migratetype(page, MIGRATE_MOVABLE);
3914 3940
@@ -3946,7 +3972,7 @@ static int __meminit zone_batchsize(struct zone *zone)
3946 * 3972 *
3947 * OK, so we don't know how big the cache is. So guess. 3973 * OK, so we don't know how big the cache is. So guess.
3948 */ 3974 */
3949 batch = zone->present_pages / 1024; 3975 batch = zone->managed_pages / 1024;
3950 if (batch * PAGE_SIZE > 512 * 1024) 3976 if (batch * PAGE_SIZE > 512 * 1024)
3951 batch = (512 * 1024) / PAGE_SIZE; 3977 batch = (512 * 1024) / PAGE_SIZE;
3952 batch /= 4; /* We effectively *= 4 below */ 3978 batch /= 4; /* We effectively *= 4 below */
@@ -4030,7 +4056,7 @@ static void __meminit setup_zone_pageset(struct zone *zone)
4030 4056
4031 if (percpu_pagelist_fraction) 4057 if (percpu_pagelist_fraction)
4032 setup_pagelist_highmark(pcp, 4058 setup_pagelist_highmark(pcp,
4033 (zone->present_pages / 4059 (zone->managed_pages /
4034 percpu_pagelist_fraction)); 4060 percpu_pagelist_fraction));
4035 } 4061 }
4036} 4062}
@@ -4386,6 +4412,77 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
4386 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); 4412 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
4387} 4413}
4388 4414
4415/**
4416 * sanitize_zone_movable_limit - Sanitize the zone_movable_limit array.
4417 *
4418 * zone_movable_limit is initialized as 0. This function will try to get
4419 * the first ZONE_MOVABLE pfn of each node from movablemem_map, and
4420 * assigne them to zone_movable_limit.
4421 * zone_movable_limit[nid] == 0 means no limit for the node.
4422 *
4423 * Note: Each range is represented as [start_pfn, end_pfn)
4424 */
4425static void __meminit sanitize_zone_movable_limit(void)
4426{
4427 int map_pos = 0, i, nid;
4428 unsigned long start_pfn, end_pfn;
4429
4430 if (!movablemem_map.nr_map)
4431 return;
4432
4433 /* Iterate all ranges from minimum to maximum */
4434 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
4435 /*
4436 * If we have found lowest pfn of ZONE_MOVABLE of the node
4437 * specified by user, just go on to check next range.
4438 */
4439 if (zone_movable_limit[nid])
4440 continue;
4441
4442#ifdef CONFIG_ZONE_DMA
4443 /* Skip DMA memory. */
4444 if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA])
4445 start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA];
4446#endif
4447
4448#ifdef CONFIG_ZONE_DMA32
4449 /* Skip DMA32 memory. */
4450 if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA32])
4451 start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA32];
4452#endif
4453
4454#ifdef CONFIG_HIGHMEM
4455 /* Skip lowmem if ZONE_MOVABLE is highmem. */
4456 if (zone_movable_is_highmem() &&
4457 start_pfn < arch_zone_lowest_possible_pfn[ZONE_HIGHMEM])
4458 start_pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
4459#endif
4460
4461 if (start_pfn >= end_pfn)
4462 continue;
4463
4464 while (map_pos < movablemem_map.nr_map) {
4465 if (end_pfn <= movablemem_map.map[map_pos].start_pfn)
4466 break;
4467
4468 if (start_pfn >= movablemem_map.map[map_pos].end_pfn) {
4469 map_pos++;
4470 continue;
4471 }
4472
4473 /*
4474 * The start_pfn of ZONE_MOVABLE is either the minimum
4475 * pfn specified by movablemem_map, or 0, which means
4476 * the node has no ZONE_MOVABLE.
4477 */
4478 zone_movable_limit[nid] = max(start_pfn,
4479 movablemem_map.map[map_pos].start_pfn);
4480
4481 break;
4482 }
4483 }
4484}
4485
4389#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 4486#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4390static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, 4487static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
4391 unsigned long zone_type, 4488 unsigned long zone_type,
@@ -4403,7 +4500,6 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
4403 4500
4404 return zholes_size[zone_type]; 4501 return zholes_size[zone_type];
4405} 4502}
4406
4407#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 4503#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
4408 4504
4409static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, 4505static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
@@ -4435,10 +4531,11 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
4435 * round what is now in bits to nearest long in bits, then return it in 4531 * round what is now in bits to nearest long in bits, then return it in
4436 * bytes. 4532 * bytes.
4437 */ 4533 */
4438static unsigned long __init usemap_size(unsigned long zonesize) 4534static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize)
4439{ 4535{
4440 unsigned long usemapsize; 4536 unsigned long usemapsize;
4441 4537
4538 zonesize += zone_start_pfn & (pageblock_nr_pages-1);
4442 usemapsize = roundup(zonesize, pageblock_nr_pages); 4539 usemapsize = roundup(zonesize, pageblock_nr_pages);
4443 usemapsize = usemapsize >> pageblock_order; 4540 usemapsize = usemapsize >> pageblock_order;
4444 usemapsize *= NR_PAGEBLOCK_BITS; 4541 usemapsize *= NR_PAGEBLOCK_BITS;
@@ -4448,17 +4545,19 @@ static unsigned long __init usemap_size(unsigned long zonesize)
4448} 4545}
4449 4546
4450static void __init setup_usemap(struct pglist_data *pgdat, 4547static void __init setup_usemap(struct pglist_data *pgdat,
4451 struct zone *zone, unsigned long zonesize) 4548 struct zone *zone,
4549 unsigned long zone_start_pfn,
4550 unsigned long zonesize)
4452{ 4551{
4453 unsigned long usemapsize = usemap_size(zonesize); 4552 unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
4454 zone->pageblock_flags = NULL; 4553 zone->pageblock_flags = NULL;
4455 if (usemapsize) 4554 if (usemapsize)
4456 zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, 4555 zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
4457 usemapsize); 4556 usemapsize);
4458} 4557}
4459#else 4558#else
4460static inline void setup_usemap(struct pglist_data *pgdat, 4559static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
4461 struct zone *zone, unsigned long zonesize) {} 4560 unsigned long zone_start_pfn, unsigned long zonesize) {}
4462#endif /* CONFIG_SPARSEMEM */ 4561#endif /* CONFIG_SPARSEMEM */
4463 4562
4464#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE 4563#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -4584,7 +4683,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4584 nr_all_pages += freesize; 4683 nr_all_pages += freesize;
4585 4684
4586 zone->spanned_pages = size; 4685 zone->spanned_pages = size;
4587 zone->present_pages = freesize; 4686 zone->present_pages = realsize;
4588 /* 4687 /*
4589 * Set an approximate value for lowmem here, it will be adjusted 4688 * Set an approximate value for lowmem here, it will be adjusted
4590 * when the bootmem allocator frees pages into the buddy system. 4689 * when the bootmem allocator frees pages into the buddy system.
@@ -4609,7 +4708,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4609 continue; 4708 continue;
4610 4709
4611 set_pageblock_order(); 4710 set_pageblock_order();
4612 setup_usemap(pgdat, zone, size); 4711 setup_usemap(pgdat, zone, zone_start_pfn, size);
4613 ret = init_currently_empty_zone(zone, zone_start_pfn, 4712 ret = init_currently_empty_zone(zone, zone_start_pfn,
4614 size, MEMMAP_EARLY); 4713 size, MEMMAP_EARLY);
4615 BUG_ON(ret); 4714 BUG_ON(ret);
@@ -4636,7 +4735,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
4636 * for the buddy allocator to function correctly. 4735 * for the buddy allocator to function correctly.
4637 */ 4736 */
4638 start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); 4737 start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
4639 end = pgdat->node_start_pfn + pgdat->node_spanned_pages; 4738 end = pgdat_end_pfn(pgdat);
4640 end = ALIGN(end, MAX_ORDER_NR_PAGES); 4739 end = ALIGN(end, MAX_ORDER_NR_PAGES);
4641 size = (end - start) * sizeof(struct page); 4740 size = (end - start) * sizeof(struct page);
4642 map = alloc_remap(pgdat->node_id, size); 4741 map = alloc_remap(pgdat->node_id, size);
@@ -4842,12 +4941,19 @@ static void __init find_zone_movable_pfns_for_nodes(void)
4842 required_kernelcore = max(required_kernelcore, corepages); 4941 required_kernelcore = max(required_kernelcore, corepages);
4843 } 4942 }
4844 4943
4845 /* If kernelcore was not specified, there is no ZONE_MOVABLE */ 4944 /*
4846 if (!required_kernelcore) 4945 * If neither kernelcore/movablecore nor movablemem_map is specified,
4946 * there is no ZONE_MOVABLE. But if movablemem_map is specified, the
4947 * start pfn of ZONE_MOVABLE has been stored in zone_movable_limit[].
4948 */
4949 if (!required_kernelcore) {
4950 if (movablemem_map.nr_map)
4951 memcpy(zone_movable_pfn, zone_movable_limit,
4952 sizeof(zone_movable_pfn));
4847 goto out; 4953 goto out;
4954 }
4848 4955
4849 /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ 4956 /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
4850 find_usable_zone_for_movable();
4851 usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone]; 4957 usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
4852 4958
4853restart: 4959restart:
@@ -4875,10 +4981,24 @@ restart:
4875 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { 4981 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
4876 unsigned long size_pages; 4982 unsigned long size_pages;
4877 4983
4984 /*
4985 * Find more memory for kernelcore in
4986 * [zone_movable_pfn[nid], zone_movable_limit[nid]).
4987 */
4878 start_pfn = max(start_pfn, zone_movable_pfn[nid]); 4988 start_pfn = max(start_pfn, zone_movable_pfn[nid]);
4879 if (start_pfn >= end_pfn) 4989 if (start_pfn >= end_pfn)
4880 continue; 4990 continue;
4881 4991
4992 if (zone_movable_limit[nid]) {
4993 end_pfn = min(end_pfn, zone_movable_limit[nid]);
4994 /* No range left for kernelcore in this node */
4995 if (start_pfn >= end_pfn) {
4996 zone_movable_pfn[nid] =
4997 zone_movable_limit[nid];
4998 break;
4999 }
5000 }
5001
4882 /* Account for what is only usable for kernelcore */ 5002 /* Account for what is only usable for kernelcore */
4883 if (start_pfn < usable_startpfn) { 5003 if (start_pfn < usable_startpfn) {
4884 unsigned long kernel_pages; 5004 unsigned long kernel_pages;
@@ -4938,12 +5058,12 @@ restart:
4938 if (usable_nodes && required_kernelcore > usable_nodes) 5058 if (usable_nodes && required_kernelcore > usable_nodes)
4939 goto restart; 5059 goto restart;
4940 5060
5061out:
4941 /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ 5062 /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
4942 for (nid = 0; nid < MAX_NUMNODES; nid++) 5063 for (nid = 0; nid < MAX_NUMNODES; nid++)
4943 zone_movable_pfn[nid] = 5064 zone_movable_pfn[nid] =
4944 roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); 5065 roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
4945 5066
4946out:
4947 /* restore the node_state */ 5067 /* restore the node_state */
4948 node_states[N_MEMORY] = saved_node_state; 5068 node_states[N_MEMORY] = saved_node_state;
4949} 5069}
@@ -5006,6 +5126,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
5006 5126
5007 /* Find the PFNs that ZONE_MOVABLE begins at in each node */ 5127 /* Find the PFNs that ZONE_MOVABLE begins at in each node */
5008 memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); 5128 memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
5129 find_usable_zone_for_movable();
5130 sanitize_zone_movable_limit();
5009 find_zone_movable_pfns_for_nodes(); 5131 find_zone_movable_pfns_for_nodes();
5010 5132
5011 /* Print out the zone ranges */ 5133 /* Print out the zone ranges */
@@ -5089,6 +5211,181 @@ static int __init cmdline_parse_movablecore(char *p)
5089early_param("kernelcore", cmdline_parse_kernelcore); 5211early_param("kernelcore", cmdline_parse_kernelcore);
5090early_param("movablecore", cmdline_parse_movablecore); 5212early_param("movablecore", cmdline_parse_movablecore);
5091 5213
5214/**
5215 * movablemem_map_overlap() - Check if a range overlaps movablemem_map.map[].
5216 * @start_pfn: start pfn of the range to be checked
5217 * @end_pfn: end pfn of the range to be checked (exclusive)
5218 *
5219 * This function checks if a given memory range [start_pfn, end_pfn) overlaps
5220 * the movablemem_map.map[] array.
5221 *
5222 * Return: index of the first overlapped element in movablemem_map.map[]
5223 * or -1 if they don't overlap each other.
5224 */
5225int __init movablemem_map_overlap(unsigned long start_pfn,
5226 unsigned long end_pfn)
5227{
5228 int overlap;
5229
5230 if (!movablemem_map.nr_map)
5231 return -1;
5232
5233 for (overlap = 0; overlap < movablemem_map.nr_map; overlap++)
5234 if (start_pfn < movablemem_map.map[overlap].end_pfn)
5235 break;
5236
5237 if (overlap == movablemem_map.nr_map ||
5238 end_pfn <= movablemem_map.map[overlap].start_pfn)
5239 return -1;
5240
5241 return overlap;
5242}
5243
5244/**
5245 * insert_movablemem_map - Insert a memory range in to movablemem_map.map.
5246 * @start_pfn: start pfn of the range
5247 * @end_pfn: end pfn of the range
5248 *
5249 * This function will also merge the overlapped ranges, and sort the array
5250 * by start_pfn in monotonic increasing order.
5251 */
5252void __init insert_movablemem_map(unsigned long start_pfn,
5253 unsigned long end_pfn)
5254{
5255 int pos, overlap;
5256
5257 /*
5258 * pos will be at the 1st overlapped range, or the position
5259 * where the element should be inserted.
5260 */
5261 for (pos = 0; pos < movablemem_map.nr_map; pos++)
5262 if (start_pfn <= movablemem_map.map[pos].end_pfn)
5263 break;
5264
5265 /* If there is no overlapped range, just insert the element. */
5266 if (pos == movablemem_map.nr_map ||
5267 end_pfn < movablemem_map.map[pos].start_pfn) {
5268 /*
5269 * If pos is not the end of array, we need to move all
5270 * the rest elements backward.
5271 */
5272 if (pos < movablemem_map.nr_map)
5273 memmove(&movablemem_map.map[pos+1],
5274 &movablemem_map.map[pos],
5275 sizeof(struct movablemem_entry) *
5276 (movablemem_map.nr_map - pos));
5277 movablemem_map.map[pos].start_pfn = start_pfn;
5278 movablemem_map.map[pos].end_pfn = end_pfn;
5279 movablemem_map.nr_map++;
5280 return;
5281 }
5282
5283 /* overlap will be at the last overlapped range */
5284 for (overlap = pos + 1; overlap < movablemem_map.nr_map; overlap++)
5285 if (end_pfn < movablemem_map.map[overlap].start_pfn)
5286 break;
5287
5288 /*
5289 * If there are more ranges overlapped, we need to merge them,
5290 * and move the rest elements forward.
5291 */
5292 overlap--;
5293 movablemem_map.map[pos].start_pfn = min(start_pfn,
5294 movablemem_map.map[pos].start_pfn);
5295 movablemem_map.map[pos].end_pfn = max(end_pfn,
5296 movablemem_map.map[overlap].end_pfn);
5297
5298 if (pos != overlap && overlap + 1 != movablemem_map.nr_map)
5299 memmove(&movablemem_map.map[pos+1],
5300 &movablemem_map.map[overlap+1],
5301 sizeof(struct movablemem_entry) *
5302 (movablemem_map.nr_map - overlap - 1));
5303
5304 movablemem_map.nr_map -= overlap - pos;
5305}
5306
5307/**
5308 * movablemem_map_add_region - Add a memory range into movablemem_map.
5309 * @start: physical start address of range
5310 * @end: physical end address of range
5311 *
5312 * This function transform the physical address into pfn, and then add the
5313 * range into movablemem_map by calling insert_movablemem_map().
5314 */
5315static void __init movablemem_map_add_region(u64 start, u64 size)
5316{
5317 unsigned long start_pfn, end_pfn;
5318
5319 /* In case size == 0 or start + size overflows */
5320 if (start + size <= start)
5321 return;
5322
5323 if (movablemem_map.nr_map >= ARRAY_SIZE(movablemem_map.map)) {
5324 pr_err("movablemem_map: too many entries;"
5325 " ignoring [mem %#010llx-%#010llx]\n",
5326 (unsigned long long) start,
5327 (unsigned long long) (start + size - 1));
5328 return;
5329 }
5330
5331 start_pfn = PFN_DOWN(start);
5332 end_pfn = PFN_UP(start + size);
5333 insert_movablemem_map(start_pfn, end_pfn);
5334}
5335
5336/*
5337 * cmdline_parse_movablemem_map - Parse boot option movablemem_map.
5338 * @p: The boot option of the following format:
5339 * movablemem_map=nn[KMG]@ss[KMG]
5340 *
5341 * This option sets the memory range [ss, ss+nn) to be used as movable memory.
5342 *
5343 * Return: 0 on success or -EINVAL on failure.
5344 */
5345static int __init cmdline_parse_movablemem_map(char *p)
5346{
5347 char *oldp;
5348 u64 start_at, mem_size;
5349
5350 if (!p)
5351 goto err;
5352
5353 if (!strcmp(p, "acpi"))
5354 movablemem_map.acpi = true;
5355
5356 /*
5357 * If user decide to use info from BIOS, all the other user specified
5358 * ranges will be ingored.
5359 */
5360 if (movablemem_map.acpi) {
5361 if (movablemem_map.nr_map) {
5362 memset(movablemem_map.map, 0,
5363 sizeof(struct movablemem_entry)
5364 * movablemem_map.nr_map);
5365 movablemem_map.nr_map = 0;
5366 }
5367 return 0;
5368 }
5369
5370 oldp = p;
5371 mem_size = memparse(p, &p);
5372 if (p == oldp)
5373 goto err;
5374
5375 if (*p == '@') {
5376 oldp = ++p;
5377 start_at = memparse(p, &p);
5378 if (p == oldp || *p != '\0')
5379 goto err;
5380
5381 movablemem_map_add_region(start_at, mem_size);
5382 return 0;
5383 }
5384err:
5385 return -EINVAL;
5386}
5387early_param("movablemem_map", cmdline_parse_movablemem_map);
5388
5092#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 5389#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
5093 5390
5094/** 5391/**
@@ -5171,8 +5468,8 @@ static void calculate_totalreserve_pages(void)
5171 /* we treat the high watermark as reserved pages. */ 5468 /* we treat the high watermark as reserved pages. */
5172 max += high_wmark_pages(zone); 5469 max += high_wmark_pages(zone);
5173 5470
5174 if (max > zone->present_pages) 5471 if (max > zone->managed_pages)
5175 max = zone->present_pages; 5472 max = zone->managed_pages;
5176 reserve_pages += max; 5473 reserve_pages += max;
5177 /* 5474 /*
5178 * Lowmem reserves are not available to 5475 * Lowmem reserves are not available to
@@ -5204,7 +5501,7 @@ static void setup_per_zone_lowmem_reserve(void)
5204 for_each_online_pgdat(pgdat) { 5501 for_each_online_pgdat(pgdat) {
5205 for (j = 0; j < MAX_NR_ZONES; j++) { 5502 for (j = 0; j < MAX_NR_ZONES; j++) {
5206 struct zone *zone = pgdat->node_zones + j; 5503 struct zone *zone = pgdat->node_zones + j;
5207 unsigned long present_pages = zone->present_pages; 5504 unsigned long managed_pages = zone->managed_pages;
5208 5505
5209 zone->lowmem_reserve[j] = 0; 5506 zone->lowmem_reserve[j] = 0;
5210 5507
@@ -5218,9 +5515,9 @@ static void setup_per_zone_lowmem_reserve(void)
5218 sysctl_lowmem_reserve_ratio[idx] = 1; 5515 sysctl_lowmem_reserve_ratio[idx] = 1;
5219 5516
5220 lower_zone = pgdat->node_zones + idx; 5517 lower_zone = pgdat->node_zones + idx;
5221 lower_zone->lowmem_reserve[j] = present_pages / 5518 lower_zone->lowmem_reserve[j] = managed_pages /
5222 sysctl_lowmem_reserve_ratio[idx]; 5519 sysctl_lowmem_reserve_ratio[idx];
5223 present_pages += lower_zone->present_pages; 5520 managed_pages += lower_zone->managed_pages;
5224 } 5521 }
5225 } 5522 }
5226 } 5523 }
@@ -5239,14 +5536,14 @@ static void __setup_per_zone_wmarks(void)
5239 /* Calculate total number of !ZONE_HIGHMEM pages */ 5536 /* Calculate total number of !ZONE_HIGHMEM pages */
5240 for_each_zone(zone) { 5537 for_each_zone(zone) {
5241 if (!is_highmem(zone)) 5538 if (!is_highmem(zone))
5242 lowmem_pages += zone->present_pages; 5539 lowmem_pages += zone->managed_pages;
5243 } 5540 }
5244 5541
5245 for_each_zone(zone) { 5542 for_each_zone(zone) {
5246 u64 tmp; 5543 u64 tmp;
5247 5544
5248 spin_lock_irqsave(&zone->lock, flags); 5545 spin_lock_irqsave(&zone->lock, flags);
5249 tmp = (u64)pages_min * zone->present_pages; 5546 tmp = (u64)pages_min * zone->managed_pages;
5250 do_div(tmp, lowmem_pages); 5547 do_div(tmp, lowmem_pages);
5251 if (is_highmem(zone)) { 5548 if (is_highmem(zone)) {
5252 /* 5549 /*
@@ -5258,13 +5555,10 @@ static void __setup_per_zone_wmarks(void)
5258 * deltas controls asynch page reclaim, and so should 5555 * deltas controls asynch page reclaim, and so should
5259 * not be capped for highmem. 5556 * not be capped for highmem.
5260 */ 5557 */
5261 int min_pages; 5558 unsigned long min_pages;
5262 5559
5263 min_pages = zone->present_pages / 1024; 5560 min_pages = zone->managed_pages / 1024;
5264 if (min_pages < SWAP_CLUSTER_MAX) 5561 min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
5265 min_pages = SWAP_CLUSTER_MAX;
5266 if (min_pages > 128)
5267 min_pages = 128;
5268 zone->watermark[WMARK_MIN] = min_pages; 5562 zone->watermark[WMARK_MIN] = min_pages;
5269 } else { 5563 } else {
5270 /* 5564 /*
@@ -5325,7 +5619,7 @@ static void __meminit calculate_zone_inactive_ratio(struct zone *zone)
5325 unsigned int gb, ratio; 5619 unsigned int gb, ratio;
5326 5620
5327 /* Zone size in gigabytes */ 5621 /* Zone size in gigabytes */
5328 gb = zone->present_pages >> (30 - PAGE_SHIFT); 5622 gb = zone->managed_pages >> (30 - PAGE_SHIFT);
5329 if (gb) 5623 if (gb)
5330 ratio = int_sqrt(10 * gb); 5624 ratio = int_sqrt(10 * gb);
5331 else 5625 else
@@ -5411,7 +5705,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
5411 return rc; 5705 return rc;
5412 5706
5413 for_each_zone(zone) 5707 for_each_zone(zone)
5414 zone->min_unmapped_pages = (zone->present_pages * 5708 zone->min_unmapped_pages = (zone->managed_pages *
5415 sysctl_min_unmapped_ratio) / 100; 5709 sysctl_min_unmapped_ratio) / 100;
5416 return 0; 5710 return 0;
5417} 5711}
@@ -5427,7 +5721,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
5427 return rc; 5721 return rc;
5428 5722
5429 for_each_zone(zone) 5723 for_each_zone(zone)
5430 zone->min_slab_pages = (zone->present_pages * 5724 zone->min_slab_pages = (zone->managed_pages *
5431 sysctl_min_slab_ratio) / 100; 5725 sysctl_min_slab_ratio) / 100;
5432 return 0; 5726 return 0;
5433} 5727}
@@ -5469,7 +5763,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
5469 for_each_populated_zone(zone) { 5763 for_each_populated_zone(zone) {
5470 for_each_possible_cpu(cpu) { 5764 for_each_possible_cpu(cpu) {
5471 unsigned long high; 5765 unsigned long high;
5472 high = zone->present_pages / percpu_pagelist_fraction; 5766 high = zone->managed_pages / percpu_pagelist_fraction;
5473 setup_pagelist_highmark( 5767 setup_pagelist_highmark(
5474 per_cpu_ptr(zone->pageset, cpu), high); 5768 per_cpu_ptr(zone->pageset, cpu), high);
5475 } 5769 }
@@ -5604,7 +5898,7 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
5604 pfn &= (PAGES_PER_SECTION-1); 5898 pfn &= (PAGES_PER_SECTION-1);
5605 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; 5899 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
5606#else 5900#else
5607 pfn = pfn - zone->zone_start_pfn; 5901 pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages);
5608 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; 5902 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
5609#endif /* CONFIG_SPARSEMEM */ 5903#endif /* CONFIG_SPARSEMEM */
5610} 5904}
@@ -5656,8 +5950,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
5656 pfn = page_to_pfn(page); 5950 pfn = page_to_pfn(page);
5657 bitmap = get_pageblock_bitmap(zone, pfn); 5951 bitmap = get_pageblock_bitmap(zone, pfn);
5658 bitidx = pfn_to_bitidx(zone, pfn); 5952 bitidx = pfn_to_bitidx(zone, pfn);
5659 VM_BUG_ON(pfn < zone->zone_start_pfn); 5953 VM_BUG_ON(!zone_spans_pfn(zone, pfn));
5660 VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages);
5661 5954
5662 for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) 5955 for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
5663 if (flags & value) 5956 if (flags & value)
@@ -5755,8 +6048,7 @@ bool is_pageblock_removable_nolock(struct page *page)
5755 6048
5756 zone = page_zone(page); 6049 zone = page_zone(page);
5757 pfn = page_to_pfn(page); 6050 pfn = page_to_pfn(page);
5758 if (zone->zone_start_pfn > pfn || 6051 if (!zone_spans_pfn(zone, pfn))
5759 zone->zone_start_pfn + zone->spanned_pages <= pfn)
5760 return false; 6052 return false;
5761 6053
5762 return !has_unmovable_pages(zone, page, 0, true); 6054 return !has_unmovable_pages(zone, page, 0, true);
@@ -5812,14 +6104,14 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
5812 &cc->migratepages); 6104 &cc->migratepages);
5813 cc->nr_migratepages -= nr_reclaimed; 6105 cc->nr_migratepages -= nr_reclaimed;
5814 6106
5815 ret = migrate_pages(&cc->migratepages, 6107 ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
5816 alloc_migrate_target, 6108 0, MIGRATE_SYNC, MR_CMA);
5817 0, false, MIGRATE_SYNC,
5818 MR_CMA);
5819 } 6109 }
5820 6110 if (ret < 0) {
5821 putback_movable_pages(&cc->migratepages); 6111 putback_movable_pages(&cc->migratepages);
5822 return ret > 0 ? 0 : ret; 6112 return ret;
6113 }
6114 return 0;
5823} 6115}
5824 6116
5825/** 6117/**