diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 498 |
1 files changed, 395 insertions, 103 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bc6cc0e913bd..0dade3f18f7d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <linux/prefetch.h> | 58 | #include <linux/prefetch.h> |
59 | #include <linux/migrate.h> | 59 | #include <linux/migrate.h> |
60 | #include <linux/page-debug-flags.h> | 60 | #include <linux/page-debug-flags.h> |
61 | #include <linux/sched/rt.h> | ||
61 | 62 | ||
62 | #include <asm/tlbflush.h> | 63 | #include <asm/tlbflush.h> |
63 | #include <asm/div64.h> | 64 | #include <asm/div64.h> |
@@ -201,11 +202,18 @@ static unsigned long __meminitdata nr_all_pages; | |||
201 | static unsigned long __meminitdata dma_reserve; | 202 | static unsigned long __meminitdata dma_reserve; |
202 | 203 | ||
203 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 204 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
205 | /* Movable memory ranges, will also be used by memblock subsystem. */ | ||
206 | struct movablemem_map movablemem_map = { | ||
207 | .acpi = false, | ||
208 | .nr_map = 0, | ||
209 | }; | ||
210 | |||
204 | static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; | 211 | static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; |
205 | static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; | 212 | static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; |
206 | static unsigned long __initdata required_kernelcore; | 213 | static unsigned long __initdata required_kernelcore; |
207 | static unsigned long __initdata required_movablecore; | 214 | static unsigned long __initdata required_movablecore; |
208 | static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; | 215 | static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; |
216 | static unsigned long __meminitdata zone_movable_limit[MAX_NUMNODES]; | ||
209 | 217 | ||
210 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ | 218 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ |
211 | int movable_zone; | 219 | int movable_zone; |
@@ -239,15 +247,20 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page) | |||
239 | int ret = 0; | 247 | int ret = 0; |
240 | unsigned seq; | 248 | unsigned seq; |
241 | unsigned long pfn = page_to_pfn(page); | 249 | unsigned long pfn = page_to_pfn(page); |
250 | unsigned long sp, start_pfn; | ||
242 | 251 | ||
243 | do { | 252 | do { |
244 | seq = zone_span_seqbegin(zone); | 253 | seq = zone_span_seqbegin(zone); |
245 | if (pfn >= zone->zone_start_pfn + zone->spanned_pages) | 254 | start_pfn = zone->zone_start_pfn; |
246 | ret = 1; | 255 | sp = zone->spanned_pages; |
247 | else if (pfn < zone->zone_start_pfn) | 256 | if (!zone_spans_pfn(zone, pfn)) |
248 | ret = 1; | 257 | ret = 1; |
249 | } while (zone_span_seqretry(zone, seq)); | 258 | } while (zone_span_seqretry(zone, seq)); |
250 | 259 | ||
260 | if (ret) | ||
261 | pr_err("page %lu outside zone [ %lu - %lu ]\n", | ||
262 | pfn, start_pfn, start_pfn + sp); | ||
263 | |||
251 | return ret; | 264 | return ret; |
252 | } | 265 | } |
253 | 266 | ||
@@ -287,7 +300,7 @@ static void bad_page(struct page *page) | |||
287 | 300 | ||
288 | /* Don't complain about poisoned pages */ | 301 | /* Don't complain about poisoned pages */ |
289 | if (PageHWPoison(page)) { | 302 | if (PageHWPoison(page)) { |
290 | reset_page_mapcount(page); /* remove PageBuddy */ | 303 | page_mapcount_reset(page); /* remove PageBuddy */ |
291 | return; | 304 | return; |
292 | } | 305 | } |
293 | 306 | ||
@@ -319,8 +332,8 @@ static void bad_page(struct page *page) | |||
319 | dump_stack(); | 332 | dump_stack(); |
320 | out: | 333 | out: |
321 | /* Leave bad fields for debug, except PageBuddy could make trouble */ | 334 | /* Leave bad fields for debug, except PageBuddy could make trouble */ |
322 | reset_page_mapcount(page); /* remove PageBuddy */ | 335 | page_mapcount_reset(page); /* remove PageBuddy */ |
323 | add_taint(TAINT_BAD_PAGE); | 336 | add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); |
324 | } | 337 | } |
325 | 338 | ||
326 | /* | 339 | /* |
@@ -532,6 +545,8 @@ static inline void __free_one_page(struct page *page, | |||
532 | unsigned long uninitialized_var(buddy_idx); | 545 | unsigned long uninitialized_var(buddy_idx); |
533 | struct page *buddy; | 546 | struct page *buddy; |
534 | 547 | ||
548 | VM_BUG_ON(!zone_is_initialized(zone)); | ||
549 | |||
535 | if (unlikely(PageCompound(page))) | 550 | if (unlikely(PageCompound(page))) |
536 | if (unlikely(destroy_compound_page(page, order))) | 551 | if (unlikely(destroy_compound_page(page, order))) |
537 | return; | 552 | return; |
@@ -605,7 +620,7 @@ static inline int free_pages_check(struct page *page) | |||
605 | bad_page(page); | 620 | bad_page(page); |
606 | return 1; | 621 | return 1; |
607 | } | 622 | } |
608 | reset_page_last_nid(page); | 623 | page_nid_reset_last(page); |
609 | if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) | 624 | if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) |
610 | page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; | 625 | page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; |
611 | return 0; | 626 | return 0; |
@@ -665,7 +680,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
665 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ | 680 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ |
666 | __free_one_page(page, zone, 0, mt); | 681 | __free_one_page(page, zone, 0, mt); |
667 | trace_mm_page_pcpu_drain(page, 0, mt); | 682 | trace_mm_page_pcpu_drain(page, 0, mt); |
668 | if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) { | 683 | if (likely(!is_migrate_isolate_page(page))) { |
669 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1); | 684 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1); |
670 | if (is_migrate_cma(mt)) | 685 | if (is_migrate_cma(mt)) |
671 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); | 686 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); |
@@ -683,7 +698,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order, | |||
683 | zone->pages_scanned = 0; | 698 | zone->pages_scanned = 0; |
684 | 699 | ||
685 | __free_one_page(page, zone, order, migratetype); | 700 | __free_one_page(page, zone, order, migratetype); |
686 | if (unlikely(migratetype != MIGRATE_ISOLATE)) | 701 | if (unlikely(!is_migrate_isolate(migratetype))) |
687 | __mod_zone_freepage_state(zone, 1 << order, migratetype); | 702 | __mod_zone_freepage_state(zone, 1 << order, migratetype); |
688 | spin_unlock(&zone->lock); | 703 | spin_unlock(&zone->lock); |
689 | } | 704 | } |
@@ -773,6 +788,10 @@ void __init init_cma_reserved_pageblock(struct page *page) | |||
773 | set_pageblock_migratetype(page, MIGRATE_CMA); | 788 | set_pageblock_migratetype(page, MIGRATE_CMA); |
774 | __free_pages(page, pageblock_order); | 789 | __free_pages(page, pageblock_order); |
775 | totalram_pages += pageblock_nr_pages; | 790 | totalram_pages += pageblock_nr_pages; |
791 | #ifdef CONFIG_HIGHMEM | ||
792 | if (PageHighMem(page)) | ||
793 | totalhigh_pages += pageblock_nr_pages; | ||
794 | #endif | ||
776 | } | 795 | } |
777 | #endif | 796 | #endif |
778 | 797 | ||
@@ -911,7 +930,9 @@ static int fallbacks[MIGRATE_TYPES][4] = { | |||
911 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, | 930 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, |
912 | #endif | 931 | #endif |
913 | [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ | 932 | [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ |
933 | #ifdef CONFIG_MEMORY_ISOLATION | ||
914 | [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ | 934 | [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ |
935 | #endif | ||
915 | }; | 936 | }; |
916 | 937 | ||
917 | /* | 938 | /* |
@@ -976,9 +997,9 @@ int move_freepages_block(struct zone *zone, struct page *page, | |||
976 | end_pfn = start_pfn + pageblock_nr_pages - 1; | 997 | end_pfn = start_pfn + pageblock_nr_pages - 1; |
977 | 998 | ||
978 | /* Do not cross zone boundaries */ | 999 | /* Do not cross zone boundaries */ |
979 | if (start_pfn < zone->zone_start_pfn) | 1000 | if (!zone_spans_pfn(zone, start_pfn)) |
980 | start_page = page; | 1001 | start_page = page; |
981 | if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages) | 1002 | if (!zone_spans_pfn(zone, end_pfn)) |
982 | return 0; | 1003 | return 0; |
983 | 1004 | ||
984 | return move_freepages(zone, start_page, end_page, migratetype); | 1005 | return move_freepages(zone, start_page, end_page, migratetype); |
@@ -1137,7 +1158,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
1137 | list_add_tail(&page->lru, list); | 1158 | list_add_tail(&page->lru, list); |
1138 | if (IS_ENABLED(CONFIG_CMA)) { | 1159 | if (IS_ENABLED(CONFIG_CMA)) { |
1139 | mt = get_pageblock_migratetype(page); | 1160 | mt = get_pageblock_migratetype(page); |
1140 | if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) | 1161 | if (!is_migrate_cma(mt) && !is_migrate_isolate(mt)) |
1141 | mt = migratetype; | 1162 | mt = migratetype; |
1142 | } | 1163 | } |
1143 | set_freepage_migratetype(page, mt); | 1164 | set_freepage_migratetype(page, mt); |
@@ -1272,7 +1293,7 @@ void mark_free_pages(struct zone *zone) | |||
1272 | 1293 | ||
1273 | spin_lock_irqsave(&zone->lock, flags); | 1294 | spin_lock_irqsave(&zone->lock, flags); |
1274 | 1295 | ||
1275 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 1296 | max_zone_pfn = zone_end_pfn(zone); |
1276 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 1297 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
1277 | if (pfn_valid(pfn)) { | 1298 | if (pfn_valid(pfn)) { |
1278 | struct page *page = pfn_to_page(pfn); | 1299 | struct page *page = pfn_to_page(pfn); |
@@ -1321,7 +1342,7 @@ void free_hot_cold_page(struct page *page, int cold) | |||
1321 | * excessively into the page allocator | 1342 | * excessively into the page allocator |
1322 | */ | 1343 | */ |
1323 | if (migratetype >= MIGRATE_PCPTYPES) { | 1344 | if (migratetype >= MIGRATE_PCPTYPES) { |
1324 | if (unlikely(migratetype == MIGRATE_ISOLATE)) { | 1345 | if (unlikely(is_migrate_isolate(migratetype))) { |
1325 | free_one_page(zone, page, 0, migratetype); | 1346 | free_one_page(zone, page, 0, migratetype); |
1326 | goto out; | 1347 | goto out; |
1327 | } | 1348 | } |
@@ -1384,14 +1405,8 @@ void split_page(struct page *page, unsigned int order) | |||
1384 | set_page_refcounted(page + i); | 1405 | set_page_refcounted(page + i); |
1385 | } | 1406 | } |
1386 | 1407 | ||
1387 | /* | 1408 | static int __isolate_free_page(struct page *page, unsigned int order) |
1388 | * Similar to the split_page family of functions except that the page | ||
1389 | * required at the given order and being isolated now to prevent races | ||
1390 | * with parallel allocators | ||
1391 | */ | ||
1392 | int capture_free_page(struct page *page, int alloc_order, int migratetype) | ||
1393 | { | 1409 | { |
1394 | unsigned int order; | ||
1395 | unsigned long watermark; | 1410 | unsigned long watermark; |
1396 | struct zone *zone; | 1411 | struct zone *zone; |
1397 | int mt; | 1412 | int mt; |
@@ -1399,16 +1414,15 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) | |||
1399 | BUG_ON(!PageBuddy(page)); | 1414 | BUG_ON(!PageBuddy(page)); |
1400 | 1415 | ||
1401 | zone = page_zone(page); | 1416 | zone = page_zone(page); |
1402 | order = page_order(page); | ||
1403 | mt = get_pageblock_migratetype(page); | 1417 | mt = get_pageblock_migratetype(page); |
1404 | 1418 | ||
1405 | if (mt != MIGRATE_ISOLATE) { | 1419 | if (!is_migrate_isolate(mt)) { |
1406 | /* Obey watermarks as if the page was being allocated */ | 1420 | /* Obey watermarks as if the page was being allocated */ |
1407 | watermark = low_wmark_pages(zone) + (1 << order); | 1421 | watermark = low_wmark_pages(zone) + (1 << order); |
1408 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) | 1422 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) |
1409 | return 0; | 1423 | return 0; |
1410 | 1424 | ||
1411 | __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); | 1425 | __mod_zone_freepage_state(zone, -(1UL << order), mt); |
1412 | } | 1426 | } |
1413 | 1427 | ||
1414 | /* Remove page from free list */ | 1428 | /* Remove page from free list */ |
@@ -1416,22 +1430,18 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) | |||
1416 | zone->free_area[order].nr_free--; | 1430 | zone->free_area[order].nr_free--; |
1417 | rmv_page_order(page); | 1431 | rmv_page_order(page); |
1418 | 1432 | ||
1419 | if (alloc_order != order) | 1433 | /* Set the pageblock if the isolated page is at least a pageblock */ |
1420 | expand(zone, page, alloc_order, order, | ||
1421 | &zone->free_area[order], migratetype); | ||
1422 | |||
1423 | /* Set the pageblock if the captured page is at least a pageblock */ | ||
1424 | if (order >= pageblock_order - 1) { | 1434 | if (order >= pageblock_order - 1) { |
1425 | struct page *endpage = page + (1 << order) - 1; | 1435 | struct page *endpage = page + (1 << order) - 1; |
1426 | for (; page < endpage; page += pageblock_nr_pages) { | 1436 | for (; page < endpage; page += pageblock_nr_pages) { |
1427 | int mt = get_pageblock_migratetype(page); | 1437 | int mt = get_pageblock_migratetype(page); |
1428 | if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt)) | 1438 | if (!is_migrate_isolate(mt) && !is_migrate_cma(mt)) |
1429 | set_pageblock_migratetype(page, | 1439 | set_pageblock_migratetype(page, |
1430 | MIGRATE_MOVABLE); | 1440 | MIGRATE_MOVABLE); |
1431 | } | 1441 | } |
1432 | } | 1442 | } |
1433 | 1443 | ||
1434 | return 1UL << alloc_order; | 1444 | return 1UL << order; |
1435 | } | 1445 | } |
1436 | 1446 | ||
1437 | /* | 1447 | /* |
@@ -1449,10 +1459,9 @@ int split_free_page(struct page *page) | |||
1449 | unsigned int order; | 1459 | unsigned int order; |
1450 | int nr_pages; | 1460 | int nr_pages; |
1451 | 1461 | ||
1452 | BUG_ON(!PageBuddy(page)); | ||
1453 | order = page_order(page); | 1462 | order = page_order(page); |
1454 | 1463 | ||
1455 | nr_pages = capture_free_page(page, order, 0); | 1464 | nr_pages = __isolate_free_page(page, order); |
1456 | if (!nr_pages) | 1465 | if (!nr_pages) |
1457 | return 0; | 1466 | return 0; |
1458 | 1467 | ||
@@ -2136,8 +2145,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2136 | bool *contended_compaction, bool *deferred_compaction, | 2145 | bool *contended_compaction, bool *deferred_compaction, |
2137 | unsigned long *did_some_progress) | 2146 | unsigned long *did_some_progress) |
2138 | { | 2147 | { |
2139 | struct page *page = NULL; | ||
2140 | |||
2141 | if (!order) | 2148 | if (!order) |
2142 | return NULL; | 2149 | return NULL; |
2143 | 2150 | ||
@@ -2149,16 +2156,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2149 | current->flags |= PF_MEMALLOC; | 2156 | current->flags |= PF_MEMALLOC; |
2150 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | 2157 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, |
2151 | nodemask, sync_migration, | 2158 | nodemask, sync_migration, |
2152 | contended_compaction, &page); | 2159 | contended_compaction); |
2153 | current->flags &= ~PF_MEMALLOC; | 2160 | current->flags &= ~PF_MEMALLOC; |
2154 | 2161 | ||
2155 | /* If compaction captured a page, prep and use it */ | ||
2156 | if (page) { | ||
2157 | prep_new_page(page, order, gfp_mask); | ||
2158 | goto got_page; | ||
2159 | } | ||
2160 | |||
2161 | if (*did_some_progress != COMPACT_SKIPPED) { | 2162 | if (*did_some_progress != COMPACT_SKIPPED) { |
2163 | struct page *page; | ||
2164 | |||
2162 | /* Page migration frees to the PCP lists but we want merging */ | 2165 | /* Page migration frees to the PCP lists but we want merging */ |
2163 | drain_pages(get_cpu()); | 2166 | drain_pages(get_cpu()); |
2164 | put_cpu(); | 2167 | put_cpu(); |
@@ -2168,7 +2171,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2168 | alloc_flags & ~ALLOC_NO_WATERMARKS, | 2171 | alloc_flags & ~ALLOC_NO_WATERMARKS, |
2169 | preferred_zone, migratetype); | 2172 | preferred_zone, migratetype); |
2170 | if (page) { | 2173 | if (page) { |
2171 | got_page: | ||
2172 | preferred_zone->compact_blockskip_flush = false; | 2174 | preferred_zone->compact_blockskip_flush = false; |
2173 | preferred_zone->compact_considered = 0; | 2175 | preferred_zone->compact_considered = 0; |
2174 | preferred_zone->compact_defer_shift = 0; | 2176 | preferred_zone->compact_defer_shift = 0; |
@@ -2629,10 +2631,17 @@ retry_cpuset: | |||
2629 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, | 2631 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, |
2630 | zonelist, high_zoneidx, alloc_flags, | 2632 | zonelist, high_zoneidx, alloc_flags, |
2631 | preferred_zone, migratetype); | 2633 | preferred_zone, migratetype); |
2632 | if (unlikely(!page)) | 2634 | if (unlikely(!page)) { |
2635 | /* | ||
2636 | * Runtime PM, block IO and its error handling path | ||
2637 | * can deadlock because I/O on the device might not | ||
2638 | * complete. | ||
2639 | */ | ||
2640 | gfp_mask = memalloc_noio_flags(gfp_mask); | ||
2633 | page = __alloc_pages_slowpath(gfp_mask, order, | 2641 | page = __alloc_pages_slowpath(gfp_mask, order, |
2634 | zonelist, high_zoneidx, nodemask, | 2642 | zonelist, high_zoneidx, nodemask, |
2635 | preferred_zone, migratetype); | 2643 | preferred_zone, migratetype); |
2644 | } | ||
2636 | 2645 | ||
2637 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); | 2646 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); |
2638 | 2647 | ||
@@ -2804,18 +2813,27 @@ void free_pages_exact(void *virt, size_t size) | |||
2804 | } | 2813 | } |
2805 | EXPORT_SYMBOL(free_pages_exact); | 2814 | EXPORT_SYMBOL(free_pages_exact); |
2806 | 2815 | ||
2807 | static unsigned int nr_free_zone_pages(int offset) | 2816 | /** |
2817 | * nr_free_zone_pages - count number of pages beyond high watermark | ||
2818 | * @offset: The zone index of the highest zone | ||
2819 | * | ||
2820 | * nr_free_zone_pages() counts the number of counts pages which are beyond the | ||
2821 | * high watermark within all zones at or below a given zone index. For each | ||
2822 | * zone, the number of pages is calculated as: | ||
2823 | * present_pages - high_pages | ||
2824 | */ | ||
2825 | static unsigned long nr_free_zone_pages(int offset) | ||
2808 | { | 2826 | { |
2809 | struct zoneref *z; | 2827 | struct zoneref *z; |
2810 | struct zone *zone; | 2828 | struct zone *zone; |
2811 | 2829 | ||
2812 | /* Just pick one node, since fallback list is circular */ | 2830 | /* Just pick one node, since fallback list is circular */ |
2813 | unsigned int sum = 0; | 2831 | unsigned long sum = 0; |
2814 | 2832 | ||
2815 | struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); | 2833 | struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); |
2816 | 2834 | ||
2817 | for_each_zone_zonelist(zone, z, zonelist, offset) { | 2835 | for_each_zone_zonelist(zone, z, zonelist, offset) { |
2818 | unsigned long size = zone->present_pages; | 2836 | unsigned long size = zone->managed_pages; |
2819 | unsigned long high = high_wmark_pages(zone); | 2837 | unsigned long high = high_wmark_pages(zone); |
2820 | if (size > high) | 2838 | if (size > high) |
2821 | sum += size - high; | 2839 | sum += size - high; |
@@ -2824,19 +2842,25 @@ static unsigned int nr_free_zone_pages(int offset) | |||
2824 | return sum; | 2842 | return sum; |
2825 | } | 2843 | } |
2826 | 2844 | ||
2827 | /* | 2845 | /** |
2828 | * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL | 2846 | * nr_free_buffer_pages - count number of pages beyond high watermark |
2847 | * | ||
2848 | * nr_free_buffer_pages() counts the number of pages which are beyond the high | ||
2849 | * watermark within ZONE_DMA and ZONE_NORMAL. | ||
2829 | */ | 2850 | */ |
2830 | unsigned int nr_free_buffer_pages(void) | 2851 | unsigned long nr_free_buffer_pages(void) |
2831 | { | 2852 | { |
2832 | return nr_free_zone_pages(gfp_zone(GFP_USER)); | 2853 | return nr_free_zone_pages(gfp_zone(GFP_USER)); |
2833 | } | 2854 | } |
2834 | EXPORT_SYMBOL_GPL(nr_free_buffer_pages); | 2855 | EXPORT_SYMBOL_GPL(nr_free_buffer_pages); |
2835 | 2856 | ||
2836 | /* | 2857 | /** |
2837 | * Amount of free RAM allocatable within all zones | 2858 | * nr_free_pagecache_pages - count number of pages beyond high watermark |
2859 | * | ||
2860 | * nr_free_pagecache_pages() counts the number of pages which are beyond the | ||
2861 | * high watermark within all zones. | ||
2838 | */ | 2862 | */ |
2839 | unsigned int nr_free_pagecache_pages(void) | 2863 | unsigned long nr_free_pagecache_pages(void) |
2840 | { | 2864 | { |
2841 | return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE)); | 2865 | return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE)); |
2842 | } | 2866 | } |
@@ -2868,7 +2892,7 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
2868 | val->totalram = pgdat->node_present_pages; | 2892 | val->totalram = pgdat->node_present_pages; |
2869 | val->freeram = node_page_state(nid, NR_FREE_PAGES); | 2893 | val->freeram = node_page_state(nid, NR_FREE_PAGES); |
2870 | #ifdef CONFIG_HIGHMEM | 2894 | #ifdef CONFIG_HIGHMEM |
2871 | val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; | 2895 | val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages; |
2872 | val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM], | 2896 | val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM], |
2873 | NR_FREE_PAGES); | 2897 | NR_FREE_PAGES); |
2874 | #else | 2898 | #else |
@@ -2911,7 +2935,9 @@ static void show_migration_types(unsigned char type) | |||
2911 | #ifdef CONFIG_CMA | 2935 | #ifdef CONFIG_CMA |
2912 | [MIGRATE_CMA] = 'C', | 2936 | [MIGRATE_CMA] = 'C', |
2913 | #endif | 2937 | #endif |
2938 | #ifdef CONFIG_MEMORY_ISOLATION | ||
2914 | [MIGRATE_ISOLATE] = 'I', | 2939 | [MIGRATE_ISOLATE] = 'I', |
2940 | #endif | ||
2915 | }; | 2941 | }; |
2916 | char tmp[MIGRATE_TYPES + 1]; | 2942 | char tmp[MIGRATE_TYPES + 1]; |
2917 | char *p = tmp; | 2943 | char *p = tmp; |
@@ -3250,7 +3276,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) | |||
3250 | { | 3276 | { |
3251 | int n, val; | 3277 | int n, val; |
3252 | int min_val = INT_MAX; | 3278 | int min_val = INT_MAX; |
3253 | int best_node = -1; | 3279 | int best_node = NUMA_NO_NODE; |
3254 | const struct cpumask *tmp = cpumask_of_node(0); | 3280 | const struct cpumask *tmp = cpumask_of_node(0); |
3255 | 3281 | ||
3256 | /* Use the local node if we haven't already */ | 3282 | /* Use the local node if we haven't already */ |
@@ -3794,7 +3820,7 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
3794 | * the block. | 3820 | * the block. |
3795 | */ | 3821 | */ |
3796 | start_pfn = zone->zone_start_pfn; | 3822 | start_pfn = zone->zone_start_pfn; |
3797 | end_pfn = start_pfn + zone->spanned_pages; | 3823 | end_pfn = zone_end_pfn(zone); |
3798 | start_pfn = roundup(start_pfn, pageblock_nr_pages); | 3824 | start_pfn = roundup(start_pfn, pageblock_nr_pages); |
3799 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> | 3825 | reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >> |
3800 | pageblock_order; | 3826 | pageblock_order; |
@@ -3890,8 +3916,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
3890 | set_page_links(page, zone, nid, pfn); | 3916 | set_page_links(page, zone, nid, pfn); |
3891 | mminit_verify_page_links(page, zone, nid, pfn); | 3917 | mminit_verify_page_links(page, zone, nid, pfn); |
3892 | init_page_count(page); | 3918 | init_page_count(page); |
3893 | reset_page_mapcount(page); | 3919 | page_mapcount_reset(page); |
3894 | reset_page_last_nid(page); | 3920 | page_nid_reset_last(page); |
3895 | SetPageReserved(page); | 3921 | SetPageReserved(page); |
3896 | /* | 3922 | /* |
3897 | * Mark the block movable so that blocks are reserved for | 3923 | * Mark the block movable so that blocks are reserved for |
@@ -3908,7 +3934,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
3908 | * pfn out of zone. | 3934 | * pfn out of zone. |
3909 | */ | 3935 | */ |
3910 | if ((z->zone_start_pfn <= pfn) | 3936 | if ((z->zone_start_pfn <= pfn) |
3911 | && (pfn < z->zone_start_pfn + z->spanned_pages) | 3937 | && (pfn < zone_end_pfn(z)) |
3912 | && !(pfn & (pageblock_nr_pages - 1))) | 3938 | && !(pfn & (pageblock_nr_pages - 1))) |
3913 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | 3939 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); |
3914 | 3940 | ||
@@ -3946,7 +3972,7 @@ static int __meminit zone_batchsize(struct zone *zone) | |||
3946 | * | 3972 | * |
3947 | * OK, so we don't know how big the cache is. So guess. | 3973 | * OK, so we don't know how big the cache is. So guess. |
3948 | */ | 3974 | */ |
3949 | batch = zone->present_pages / 1024; | 3975 | batch = zone->managed_pages / 1024; |
3950 | if (batch * PAGE_SIZE > 512 * 1024) | 3976 | if (batch * PAGE_SIZE > 512 * 1024) |
3951 | batch = (512 * 1024) / PAGE_SIZE; | 3977 | batch = (512 * 1024) / PAGE_SIZE; |
3952 | batch /= 4; /* We effectively *= 4 below */ | 3978 | batch /= 4; /* We effectively *= 4 below */ |
@@ -4030,7 +4056,7 @@ static void __meminit setup_zone_pageset(struct zone *zone) | |||
4030 | 4056 | ||
4031 | if (percpu_pagelist_fraction) | 4057 | if (percpu_pagelist_fraction) |
4032 | setup_pagelist_highmark(pcp, | 4058 | setup_pagelist_highmark(pcp, |
4033 | (zone->present_pages / | 4059 | (zone->managed_pages / |
4034 | percpu_pagelist_fraction)); | 4060 | percpu_pagelist_fraction)); |
4035 | } | 4061 | } |
4036 | } | 4062 | } |
@@ -4386,6 +4412,77 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
4386 | return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); | 4412 | return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); |
4387 | } | 4413 | } |
4388 | 4414 | ||
4415 | /** | ||
4416 | * sanitize_zone_movable_limit - Sanitize the zone_movable_limit array. | ||
4417 | * | ||
4418 | * zone_movable_limit is initialized as 0. This function will try to get | ||
4419 | * the first ZONE_MOVABLE pfn of each node from movablemem_map, and | ||
4420 | * assigne them to zone_movable_limit. | ||
4421 | * zone_movable_limit[nid] == 0 means no limit for the node. | ||
4422 | * | ||
4423 | * Note: Each range is represented as [start_pfn, end_pfn) | ||
4424 | */ | ||
4425 | static void __meminit sanitize_zone_movable_limit(void) | ||
4426 | { | ||
4427 | int map_pos = 0, i, nid; | ||
4428 | unsigned long start_pfn, end_pfn; | ||
4429 | |||
4430 | if (!movablemem_map.nr_map) | ||
4431 | return; | ||
4432 | |||
4433 | /* Iterate all ranges from minimum to maximum */ | ||
4434 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { | ||
4435 | /* | ||
4436 | * If we have found lowest pfn of ZONE_MOVABLE of the node | ||
4437 | * specified by user, just go on to check next range. | ||
4438 | */ | ||
4439 | if (zone_movable_limit[nid]) | ||
4440 | continue; | ||
4441 | |||
4442 | #ifdef CONFIG_ZONE_DMA | ||
4443 | /* Skip DMA memory. */ | ||
4444 | if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA]) | ||
4445 | start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA]; | ||
4446 | #endif | ||
4447 | |||
4448 | #ifdef CONFIG_ZONE_DMA32 | ||
4449 | /* Skip DMA32 memory. */ | ||
4450 | if (start_pfn < arch_zone_highest_possible_pfn[ZONE_DMA32]) | ||
4451 | start_pfn = arch_zone_highest_possible_pfn[ZONE_DMA32]; | ||
4452 | #endif | ||
4453 | |||
4454 | #ifdef CONFIG_HIGHMEM | ||
4455 | /* Skip lowmem if ZONE_MOVABLE is highmem. */ | ||
4456 | if (zone_movable_is_highmem() && | ||
4457 | start_pfn < arch_zone_lowest_possible_pfn[ZONE_HIGHMEM]) | ||
4458 | start_pfn = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM]; | ||
4459 | #endif | ||
4460 | |||
4461 | if (start_pfn >= end_pfn) | ||
4462 | continue; | ||
4463 | |||
4464 | while (map_pos < movablemem_map.nr_map) { | ||
4465 | if (end_pfn <= movablemem_map.map[map_pos].start_pfn) | ||
4466 | break; | ||
4467 | |||
4468 | if (start_pfn >= movablemem_map.map[map_pos].end_pfn) { | ||
4469 | map_pos++; | ||
4470 | continue; | ||
4471 | } | ||
4472 | |||
4473 | /* | ||
4474 | * The start_pfn of ZONE_MOVABLE is either the minimum | ||
4475 | * pfn specified by movablemem_map, or 0, which means | ||
4476 | * the node has no ZONE_MOVABLE. | ||
4477 | */ | ||
4478 | zone_movable_limit[nid] = max(start_pfn, | ||
4479 | movablemem_map.map[map_pos].start_pfn); | ||
4480 | |||
4481 | break; | ||
4482 | } | ||
4483 | } | ||
4484 | } | ||
4485 | |||
4389 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 4486 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
4390 | static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, | 4487 | static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, |
4391 | unsigned long zone_type, | 4488 | unsigned long zone_type, |
@@ -4403,7 +4500,6 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
4403 | 4500 | ||
4404 | return zholes_size[zone_type]; | 4501 | return zholes_size[zone_type]; |
4405 | } | 4502 | } |
4406 | |||
4407 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 4503 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
4408 | 4504 | ||
4409 | static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, | 4505 | static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, |
@@ -4435,10 +4531,11 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, | |||
4435 | * round what is now in bits to nearest long in bits, then return it in | 4531 | * round what is now in bits to nearest long in bits, then return it in |
4436 | * bytes. | 4532 | * bytes. |
4437 | */ | 4533 | */ |
4438 | static unsigned long __init usemap_size(unsigned long zonesize) | 4534 | static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize) |
4439 | { | 4535 | { |
4440 | unsigned long usemapsize; | 4536 | unsigned long usemapsize; |
4441 | 4537 | ||
4538 | zonesize += zone_start_pfn & (pageblock_nr_pages-1); | ||
4442 | usemapsize = roundup(zonesize, pageblock_nr_pages); | 4539 | usemapsize = roundup(zonesize, pageblock_nr_pages); |
4443 | usemapsize = usemapsize >> pageblock_order; | 4540 | usemapsize = usemapsize >> pageblock_order; |
4444 | usemapsize *= NR_PAGEBLOCK_BITS; | 4541 | usemapsize *= NR_PAGEBLOCK_BITS; |
@@ -4448,17 +4545,19 @@ static unsigned long __init usemap_size(unsigned long zonesize) | |||
4448 | } | 4545 | } |
4449 | 4546 | ||
4450 | static void __init setup_usemap(struct pglist_data *pgdat, | 4547 | static void __init setup_usemap(struct pglist_data *pgdat, |
4451 | struct zone *zone, unsigned long zonesize) | 4548 | struct zone *zone, |
4549 | unsigned long zone_start_pfn, | ||
4550 | unsigned long zonesize) | ||
4452 | { | 4551 | { |
4453 | unsigned long usemapsize = usemap_size(zonesize); | 4552 | unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize); |
4454 | zone->pageblock_flags = NULL; | 4553 | zone->pageblock_flags = NULL; |
4455 | if (usemapsize) | 4554 | if (usemapsize) |
4456 | zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, | 4555 | zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, |
4457 | usemapsize); | 4556 | usemapsize); |
4458 | } | 4557 | } |
4459 | #else | 4558 | #else |
4460 | static inline void setup_usemap(struct pglist_data *pgdat, | 4559 | static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, |
4461 | struct zone *zone, unsigned long zonesize) {} | 4560 | unsigned long zone_start_pfn, unsigned long zonesize) {} |
4462 | #endif /* CONFIG_SPARSEMEM */ | 4561 | #endif /* CONFIG_SPARSEMEM */ |
4463 | 4562 | ||
4464 | #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE | 4563 | #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE |
@@ -4584,7 +4683,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4584 | nr_all_pages += freesize; | 4683 | nr_all_pages += freesize; |
4585 | 4684 | ||
4586 | zone->spanned_pages = size; | 4685 | zone->spanned_pages = size; |
4587 | zone->present_pages = freesize; | 4686 | zone->present_pages = realsize; |
4588 | /* | 4687 | /* |
4589 | * Set an approximate value for lowmem here, it will be adjusted | 4688 | * Set an approximate value for lowmem here, it will be adjusted |
4590 | * when the bootmem allocator frees pages into the buddy system. | 4689 | * when the bootmem allocator frees pages into the buddy system. |
@@ -4609,7 +4708,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4609 | continue; | 4708 | continue; |
4610 | 4709 | ||
4611 | set_pageblock_order(); | 4710 | set_pageblock_order(); |
4612 | setup_usemap(pgdat, zone, size); | 4711 | setup_usemap(pgdat, zone, zone_start_pfn, size); |
4613 | ret = init_currently_empty_zone(zone, zone_start_pfn, | 4712 | ret = init_currently_empty_zone(zone, zone_start_pfn, |
4614 | size, MEMMAP_EARLY); | 4713 | size, MEMMAP_EARLY); |
4615 | BUG_ON(ret); | 4714 | BUG_ON(ret); |
@@ -4636,7 +4735,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) | |||
4636 | * for the buddy allocator to function correctly. | 4735 | * for the buddy allocator to function correctly. |
4637 | */ | 4736 | */ |
4638 | start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); | 4737 | start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); |
4639 | end = pgdat->node_start_pfn + pgdat->node_spanned_pages; | 4738 | end = pgdat_end_pfn(pgdat); |
4640 | end = ALIGN(end, MAX_ORDER_NR_PAGES); | 4739 | end = ALIGN(end, MAX_ORDER_NR_PAGES); |
4641 | size = (end - start) * sizeof(struct page); | 4740 | size = (end - start) * sizeof(struct page); |
4642 | map = alloc_remap(pgdat->node_id, size); | 4741 | map = alloc_remap(pgdat->node_id, size); |
@@ -4842,12 +4941,19 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
4842 | required_kernelcore = max(required_kernelcore, corepages); | 4941 | required_kernelcore = max(required_kernelcore, corepages); |
4843 | } | 4942 | } |
4844 | 4943 | ||
4845 | /* If kernelcore was not specified, there is no ZONE_MOVABLE */ | 4944 | /* |
4846 | if (!required_kernelcore) | 4945 | * If neither kernelcore/movablecore nor movablemem_map is specified, |
4946 | * there is no ZONE_MOVABLE. But if movablemem_map is specified, the | ||
4947 | * start pfn of ZONE_MOVABLE has been stored in zone_movable_limit[]. | ||
4948 | */ | ||
4949 | if (!required_kernelcore) { | ||
4950 | if (movablemem_map.nr_map) | ||
4951 | memcpy(zone_movable_pfn, zone_movable_limit, | ||
4952 | sizeof(zone_movable_pfn)); | ||
4847 | goto out; | 4953 | goto out; |
4954 | } | ||
4848 | 4955 | ||
4849 | /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ | 4956 | /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ |
4850 | find_usable_zone_for_movable(); | ||
4851 | usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone]; | 4957 | usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone]; |
4852 | 4958 | ||
4853 | restart: | 4959 | restart: |
@@ -4875,10 +4981,24 @@ restart: | |||
4875 | for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { | 4981 | for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { |
4876 | unsigned long size_pages; | 4982 | unsigned long size_pages; |
4877 | 4983 | ||
4984 | /* | ||
4985 | * Find more memory for kernelcore in | ||
4986 | * [zone_movable_pfn[nid], zone_movable_limit[nid]). | ||
4987 | */ | ||
4878 | start_pfn = max(start_pfn, zone_movable_pfn[nid]); | 4988 | start_pfn = max(start_pfn, zone_movable_pfn[nid]); |
4879 | if (start_pfn >= end_pfn) | 4989 | if (start_pfn >= end_pfn) |
4880 | continue; | 4990 | continue; |
4881 | 4991 | ||
4992 | if (zone_movable_limit[nid]) { | ||
4993 | end_pfn = min(end_pfn, zone_movable_limit[nid]); | ||
4994 | /* No range left for kernelcore in this node */ | ||
4995 | if (start_pfn >= end_pfn) { | ||
4996 | zone_movable_pfn[nid] = | ||
4997 | zone_movable_limit[nid]; | ||
4998 | break; | ||
4999 | } | ||
5000 | } | ||
5001 | |||
4882 | /* Account for what is only usable for kernelcore */ | 5002 | /* Account for what is only usable for kernelcore */ |
4883 | if (start_pfn < usable_startpfn) { | 5003 | if (start_pfn < usable_startpfn) { |
4884 | unsigned long kernel_pages; | 5004 | unsigned long kernel_pages; |
@@ -4938,12 +5058,12 @@ restart: | |||
4938 | if (usable_nodes && required_kernelcore > usable_nodes) | 5058 | if (usable_nodes && required_kernelcore > usable_nodes) |
4939 | goto restart; | 5059 | goto restart; |
4940 | 5060 | ||
5061 | out: | ||
4941 | /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ | 5062 | /* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */ |
4942 | for (nid = 0; nid < MAX_NUMNODES; nid++) | 5063 | for (nid = 0; nid < MAX_NUMNODES; nid++) |
4943 | zone_movable_pfn[nid] = | 5064 | zone_movable_pfn[nid] = |
4944 | roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); | 5065 | roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); |
4945 | 5066 | ||
4946 | out: | ||
4947 | /* restore the node_state */ | 5067 | /* restore the node_state */ |
4948 | node_states[N_MEMORY] = saved_node_state; | 5068 | node_states[N_MEMORY] = saved_node_state; |
4949 | } | 5069 | } |
@@ -5006,6 +5126,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
5006 | 5126 | ||
5007 | /* Find the PFNs that ZONE_MOVABLE begins at in each node */ | 5127 | /* Find the PFNs that ZONE_MOVABLE begins at in each node */ |
5008 | memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); | 5128 | memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn)); |
5129 | find_usable_zone_for_movable(); | ||
5130 | sanitize_zone_movable_limit(); | ||
5009 | find_zone_movable_pfns_for_nodes(); | 5131 | find_zone_movable_pfns_for_nodes(); |
5010 | 5132 | ||
5011 | /* Print out the zone ranges */ | 5133 | /* Print out the zone ranges */ |
@@ -5089,6 +5211,181 @@ static int __init cmdline_parse_movablecore(char *p) | |||
5089 | early_param("kernelcore", cmdline_parse_kernelcore); | 5211 | early_param("kernelcore", cmdline_parse_kernelcore); |
5090 | early_param("movablecore", cmdline_parse_movablecore); | 5212 | early_param("movablecore", cmdline_parse_movablecore); |
5091 | 5213 | ||
5214 | /** | ||
5215 | * movablemem_map_overlap() - Check if a range overlaps movablemem_map.map[]. | ||
5216 | * @start_pfn: start pfn of the range to be checked | ||
5217 | * @end_pfn: end pfn of the range to be checked (exclusive) | ||
5218 | * | ||
5219 | * This function checks if a given memory range [start_pfn, end_pfn) overlaps | ||
5220 | * the movablemem_map.map[] array. | ||
5221 | * | ||
5222 | * Return: index of the first overlapped element in movablemem_map.map[] | ||
5223 | * or -1 if they don't overlap each other. | ||
5224 | */ | ||
5225 | int __init movablemem_map_overlap(unsigned long start_pfn, | ||
5226 | unsigned long end_pfn) | ||
5227 | { | ||
5228 | int overlap; | ||
5229 | |||
5230 | if (!movablemem_map.nr_map) | ||
5231 | return -1; | ||
5232 | |||
5233 | for (overlap = 0; overlap < movablemem_map.nr_map; overlap++) | ||
5234 | if (start_pfn < movablemem_map.map[overlap].end_pfn) | ||
5235 | break; | ||
5236 | |||
5237 | if (overlap == movablemem_map.nr_map || | ||
5238 | end_pfn <= movablemem_map.map[overlap].start_pfn) | ||
5239 | return -1; | ||
5240 | |||
5241 | return overlap; | ||
5242 | } | ||
5243 | |||
5244 | /** | ||
5245 | * insert_movablemem_map - Insert a memory range in to movablemem_map.map. | ||
5246 | * @start_pfn: start pfn of the range | ||
5247 | * @end_pfn: end pfn of the range | ||
5248 | * | ||
5249 | * This function will also merge the overlapped ranges, and sort the array | ||
5250 | * by start_pfn in monotonic increasing order. | ||
5251 | */ | ||
5252 | void __init insert_movablemem_map(unsigned long start_pfn, | ||
5253 | unsigned long end_pfn) | ||
5254 | { | ||
5255 | int pos, overlap; | ||
5256 | |||
5257 | /* | ||
5258 | * pos will be at the 1st overlapped range, or the position | ||
5259 | * where the element should be inserted. | ||
5260 | */ | ||
5261 | for (pos = 0; pos < movablemem_map.nr_map; pos++) | ||
5262 | if (start_pfn <= movablemem_map.map[pos].end_pfn) | ||
5263 | break; | ||
5264 | |||
5265 | /* If there is no overlapped range, just insert the element. */ | ||
5266 | if (pos == movablemem_map.nr_map || | ||
5267 | end_pfn < movablemem_map.map[pos].start_pfn) { | ||
5268 | /* | ||
5269 | * If pos is not the end of array, we need to move all | ||
5270 | * the rest elements backward. | ||
5271 | */ | ||
5272 | if (pos < movablemem_map.nr_map) | ||
5273 | memmove(&movablemem_map.map[pos+1], | ||
5274 | &movablemem_map.map[pos], | ||
5275 | sizeof(struct movablemem_entry) * | ||
5276 | (movablemem_map.nr_map - pos)); | ||
5277 | movablemem_map.map[pos].start_pfn = start_pfn; | ||
5278 | movablemem_map.map[pos].end_pfn = end_pfn; | ||
5279 | movablemem_map.nr_map++; | ||
5280 | return; | ||
5281 | } | ||
5282 | |||
5283 | /* overlap will be at the last overlapped range */ | ||
5284 | for (overlap = pos + 1; overlap < movablemem_map.nr_map; overlap++) | ||
5285 | if (end_pfn < movablemem_map.map[overlap].start_pfn) | ||
5286 | break; | ||
5287 | |||
5288 | /* | ||
5289 | * If there are more ranges overlapped, we need to merge them, | ||
5290 | * and move the rest elements forward. | ||
5291 | */ | ||
5292 | overlap--; | ||
5293 | movablemem_map.map[pos].start_pfn = min(start_pfn, | ||
5294 | movablemem_map.map[pos].start_pfn); | ||
5295 | movablemem_map.map[pos].end_pfn = max(end_pfn, | ||
5296 | movablemem_map.map[overlap].end_pfn); | ||
5297 | |||
5298 | if (pos != overlap && overlap + 1 != movablemem_map.nr_map) | ||
5299 | memmove(&movablemem_map.map[pos+1], | ||
5300 | &movablemem_map.map[overlap+1], | ||
5301 | sizeof(struct movablemem_entry) * | ||
5302 | (movablemem_map.nr_map - overlap - 1)); | ||
5303 | |||
5304 | movablemem_map.nr_map -= overlap - pos; | ||
5305 | } | ||
5306 | |||
5307 | /** | ||
5308 | * movablemem_map_add_region - Add a memory range into movablemem_map. | ||
5309 | * @start: physical start address of range | ||
5310 | * @end: physical end address of range | ||
5311 | * | ||
5312 | * This function transform the physical address into pfn, and then add the | ||
5313 | * range into movablemem_map by calling insert_movablemem_map(). | ||
5314 | */ | ||
5315 | static void __init movablemem_map_add_region(u64 start, u64 size) | ||
5316 | { | ||
5317 | unsigned long start_pfn, end_pfn; | ||
5318 | |||
5319 | /* In case size == 0 or start + size overflows */ | ||
5320 | if (start + size <= start) | ||
5321 | return; | ||
5322 | |||
5323 | if (movablemem_map.nr_map >= ARRAY_SIZE(movablemem_map.map)) { | ||
5324 | pr_err("movablemem_map: too many entries;" | ||
5325 | " ignoring [mem %#010llx-%#010llx]\n", | ||
5326 | (unsigned long long) start, | ||
5327 | (unsigned long long) (start + size - 1)); | ||
5328 | return; | ||
5329 | } | ||
5330 | |||
5331 | start_pfn = PFN_DOWN(start); | ||
5332 | end_pfn = PFN_UP(start + size); | ||
5333 | insert_movablemem_map(start_pfn, end_pfn); | ||
5334 | } | ||
5335 | |||
5336 | /* | ||
5337 | * cmdline_parse_movablemem_map - Parse boot option movablemem_map. | ||
5338 | * @p: The boot option of the following format: | ||
5339 | * movablemem_map=nn[KMG]@ss[KMG] | ||
5340 | * | ||
5341 | * This option sets the memory range [ss, ss+nn) to be used as movable memory. | ||
5342 | * | ||
5343 | * Return: 0 on success or -EINVAL on failure. | ||
5344 | */ | ||
5345 | static int __init cmdline_parse_movablemem_map(char *p) | ||
5346 | { | ||
5347 | char *oldp; | ||
5348 | u64 start_at, mem_size; | ||
5349 | |||
5350 | if (!p) | ||
5351 | goto err; | ||
5352 | |||
5353 | if (!strcmp(p, "acpi")) | ||
5354 | movablemem_map.acpi = true; | ||
5355 | |||
5356 | /* | ||
5357 | * If user decide to use info from BIOS, all the other user specified | ||
5358 | * ranges will be ingored. | ||
5359 | */ | ||
5360 | if (movablemem_map.acpi) { | ||
5361 | if (movablemem_map.nr_map) { | ||
5362 | memset(movablemem_map.map, 0, | ||
5363 | sizeof(struct movablemem_entry) | ||
5364 | * movablemem_map.nr_map); | ||
5365 | movablemem_map.nr_map = 0; | ||
5366 | } | ||
5367 | return 0; | ||
5368 | } | ||
5369 | |||
5370 | oldp = p; | ||
5371 | mem_size = memparse(p, &p); | ||
5372 | if (p == oldp) | ||
5373 | goto err; | ||
5374 | |||
5375 | if (*p == '@') { | ||
5376 | oldp = ++p; | ||
5377 | start_at = memparse(p, &p); | ||
5378 | if (p == oldp || *p != '\0') | ||
5379 | goto err; | ||
5380 | |||
5381 | movablemem_map_add_region(start_at, mem_size); | ||
5382 | return 0; | ||
5383 | } | ||
5384 | err: | ||
5385 | return -EINVAL; | ||
5386 | } | ||
5387 | early_param("movablemem_map", cmdline_parse_movablemem_map); | ||
5388 | |||
5092 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 5389 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
5093 | 5390 | ||
5094 | /** | 5391 | /** |
@@ -5171,8 +5468,8 @@ static void calculate_totalreserve_pages(void) | |||
5171 | /* we treat the high watermark as reserved pages. */ | 5468 | /* we treat the high watermark as reserved pages. */ |
5172 | max += high_wmark_pages(zone); | 5469 | max += high_wmark_pages(zone); |
5173 | 5470 | ||
5174 | if (max > zone->present_pages) | 5471 | if (max > zone->managed_pages) |
5175 | max = zone->present_pages; | 5472 | max = zone->managed_pages; |
5176 | reserve_pages += max; | 5473 | reserve_pages += max; |
5177 | /* | 5474 | /* |
5178 | * Lowmem reserves are not available to | 5475 | * Lowmem reserves are not available to |
@@ -5204,7 +5501,7 @@ static void setup_per_zone_lowmem_reserve(void) | |||
5204 | for_each_online_pgdat(pgdat) { | 5501 | for_each_online_pgdat(pgdat) { |
5205 | for (j = 0; j < MAX_NR_ZONES; j++) { | 5502 | for (j = 0; j < MAX_NR_ZONES; j++) { |
5206 | struct zone *zone = pgdat->node_zones + j; | 5503 | struct zone *zone = pgdat->node_zones + j; |
5207 | unsigned long present_pages = zone->present_pages; | 5504 | unsigned long managed_pages = zone->managed_pages; |
5208 | 5505 | ||
5209 | zone->lowmem_reserve[j] = 0; | 5506 | zone->lowmem_reserve[j] = 0; |
5210 | 5507 | ||
@@ -5218,9 +5515,9 @@ static void setup_per_zone_lowmem_reserve(void) | |||
5218 | sysctl_lowmem_reserve_ratio[idx] = 1; | 5515 | sysctl_lowmem_reserve_ratio[idx] = 1; |
5219 | 5516 | ||
5220 | lower_zone = pgdat->node_zones + idx; | 5517 | lower_zone = pgdat->node_zones + idx; |
5221 | lower_zone->lowmem_reserve[j] = present_pages / | 5518 | lower_zone->lowmem_reserve[j] = managed_pages / |
5222 | sysctl_lowmem_reserve_ratio[idx]; | 5519 | sysctl_lowmem_reserve_ratio[idx]; |
5223 | present_pages += lower_zone->present_pages; | 5520 | managed_pages += lower_zone->managed_pages; |
5224 | } | 5521 | } |
5225 | } | 5522 | } |
5226 | } | 5523 | } |
@@ -5239,14 +5536,14 @@ static void __setup_per_zone_wmarks(void) | |||
5239 | /* Calculate total number of !ZONE_HIGHMEM pages */ | 5536 | /* Calculate total number of !ZONE_HIGHMEM pages */ |
5240 | for_each_zone(zone) { | 5537 | for_each_zone(zone) { |
5241 | if (!is_highmem(zone)) | 5538 | if (!is_highmem(zone)) |
5242 | lowmem_pages += zone->present_pages; | 5539 | lowmem_pages += zone->managed_pages; |
5243 | } | 5540 | } |
5244 | 5541 | ||
5245 | for_each_zone(zone) { | 5542 | for_each_zone(zone) { |
5246 | u64 tmp; | 5543 | u64 tmp; |
5247 | 5544 | ||
5248 | spin_lock_irqsave(&zone->lock, flags); | 5545 | spin_lock_irqsave(&zone->lock, flags); |
5249 | tmp = (u64)pages_min * zone->present_pages; | 5546 | tmp = (u64)pages_min * zone->managed_pages; |
5250 | do_div(tmp, lowmem_pages); | 5547 | do_div(tmp, lowmem_pages); |
5251 | if (is_highmem(zone)) { | 5548 | if (is_highmem(zone)) { |
5252 | /* | 5549 | /* |
@@ -5258,13 +5555,10 @@ static void __setup_per_zone_wmarks(void) | |||
5258 | * deltas controls asynch page reclaim, and so should | 5555 | * deltas controls asynch page reclaim, and so should |
5259 | * not be capped for highmem. | 5556 | * not be capped for highmem. |
5260 | */ | 5557 | */ |
5261 | int min_pages; | 5558 | unsigned long min_pages; |
5262 | 5559 | ||
5263 | min_pages = zone->present_pages / 1024; | 5560 | min_pages = zone->managed_pages / 1024; |
5264 | if (min_pages < SWAP_CLUSTER_MAX) | 5561 | min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL); |
5265 | min_pages = SWAP_CLUSTER_MAX; | ||
5266 | if (min_pages > 128) | ||
5267 | min_pages = 128; | ||
5268 | zone->watermark[WMARK_MIN] = min_pages; | 5562 | zone->watermark[WMARK_MIN] = min_pages; |
5269 | } else { | 5563 | } else { |
5270 | /* | 5564 | /* |
@@ -5325,7 +5619,7 @@ static void __meminit calculate_zone_inactive_ratio(struct zone *zone) | |||
5325 | unsigned int gb, ratio; | 5619 | unsigned int gb, ratio; |
5326 | 5620 | ||
5327 | /* Zone size in gigabytes */ | 5621 | /* Zone size in gigabytes */ |
5328 | gb = zone->present_pages >> (30 - PAGE_SHIFT); | 5622 | gb = zone->managed_pages >> (30 - PAGE_SHIFT); |
5329 | if (gb) | 5623 | if (gb) |
5330 | ratio = int_sqrt(10 * gb); | 5624 | ratio = int_sqrt(10 * gb); |
5331 | else | 5625 | else |
@@ -5411,7 +5705,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | |||
5411 | return rc; | 5705 | return rc; |
5412 | 5706 | ||
5413 | for_each_zone(zone) | 5707 | for_each_zone(zone) |
5414 | zone->min_unmapped_pages = (zone->present_pages * | 5708 | zone->min_unmapped_pages = (zone->managed_pages * |
5415 | sysctl_min_unmapped_ratio) / 100; | 5709 | sysctl_min_unmapped_ratio) / 100; |
5416 | return 0; | 5710 | return 0; |
5417 | } | 5711 | } |
@@ -5427,7 +5721,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, | |||
5427 | return rc; | 5721 | return rc; |
5428 | 5722 | ||
5429 | for_each_zone(zone) | 5723 | for_each_zone(zone) |
5430 | zone->min_slab_pages = (zone->present_pages * | 5724 | zone->min_slab_pages = (zone->managed_pages * |
5431 | sysctl_min_slab_ratio) / 100; | 5725 | sysctl_min_slab_ratio) / 100; |
5432 | return 0; | 5726 | return 0; |
5433 | } | 5727 | } |
@@ -5469,7 +5763,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | |||
5469 | for_each_populated_zone(zone) { | 5763 | for_each_populated_zone(zone) { |
5470 | for_each_possible_cpu(cpu) { | 5764 | for_each_possible_cpu(cpu) { |
5471 | unsigned long high; | 5765 | unsigned long high; |
5472 | high = zone->present_pages / percpu_pagelist_fraction; | 5766 | high = zone->managed_pages / percpu_pagelist_fraction; |
5473 | setup_pagelist_highmark( | 5767 | setup_pagelist_highmark( |
5474 | per_cpu_ptr(zone->pageset, cpu), high); | 5768 | per_cpu_ptr(zone->pageset, cpu), high); |
5475 | } | 5769 | } |
@@ -5604,7 +5898,7 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) | |||
5604 | pfn &= (PAGES_PER_SECTION-1); | 5898 | pfn &= (PAGES_PER_SECTION-1); |
5605 | return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; | 5899 | return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; |
5606 | #else | 5900 | #else |
5607 | pfn = pfn - zone->zone_start_pfn; | 5901 | pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages); |
5608 | return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; | 5902 | return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; |
5609 | #endif /* CONFIG_SPARSEMEM */ | 5903 | #endif /* CONFIG_SPARSEMEM */ |
5610 | } | 5904 | } |
@@ -5656,8 +5950,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, | |||
5656 | pfn = page_to_pfn(page); | 5950 | pfn = page_to_pfn(page); |
5657 | bitmap = get_pageblock_bitmap(zone, pfn); | 5951 | bitmap = get_pageblock_bitmap(zone, pfn); |
5658 | bitidx = pfn_to_bitidx(zone, pfn); | 5952 | bitidx = pfn_to_bitidx(zone, pfn); |
5659 | VM_BUG_ON(pfn < zone->zone_start_pfn); | 5953 | VM_BUG_ON(!zone_spans_pfn(zone, pfn)); |
5660 | VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages); | ||
5661 | 5954 | ||
5662 | for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) | 5955 | for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) |
5663 | if (flags & value) | 5956 | if (flags & value) |
@@ -5755,8 +6048,7 @@ bool is_pageblock_removable_nolock(struct page *page) | |||
5755 | 6048 | ||
5756 | zone = page_zone(page); | 6049 | zone = page_zone(page); |
5757 | pfn = page_to_pfn(page); | 6050 | pfn = page_to_pfn(page); |
5758 | if (zone->zone_start_pfn > pfn || | 6051 | if (!zone_spans_pfn(zone, pfn)) |
5759 | zone->zone_start_pfn + zone->spanned_pages <= pfn) | ||
5760 | return false; | 6052 | return false; |
5761 | 6053 | ||
5762 | return !has_unmovable_pages(zone, page, 0, true); | 6054 | return !has_unmovable_pages(zone, page, 0, true); |
@@ -5812,14 +6104,14 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, | |||
5812 | &cc->migratepages); | 6104 | &cc->migratepages); |
5813 | cc->nr_migratepages -= nr_reclaimed; | 6105 | cc->nr_migratepages -= nr_reclaimed; |
5814 | 6106 | ||
5815 | ret = migrate_pages(&cc->migratepages, | 6107 | ret = migrate_pages(&cc->migratepages, alloc_migrate_target, |
5816 | alloc_migrate_target, | 6108 | 0, MIGRATE_SYNC, MR_CMA); |
5817 | 0, false, MIGRATE_SYNC, | ||
5818 | MR_CMA); | ||
5819 | } | 6109 | } |
5820 | 6110 | if (ret < 0) { | |
5821 | putback_movable_pages(&cc->migratepages); | 6111 | putback_movable_pages(&cc->migratepages); |
5822 | return ret > 0 ? 0 : ret; | 6112 | return ret; |
6113 | } | ||
6114 | return 0; | ||
5823 | } | 6115 | } |
5824 | 6116 | ||
5825 | /** | 6117 | /** |