aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c533
1 files changed, 417 insertions, 116 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 838ca8bb64f7..59de90d5d3a3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -223,6 +223,19 @@ static char * const zone_names[MAX_NR_ZONES] = {
223#endif 223#endif
224}; 224};
225 225
226char * const migratetype_names[MIGRATE_TYPES] = {
227 "Unmovable",
228 "Movable",
229 "Reclaimable",
230 "HighAtomic",
231#ifdef CONFIG_CMA
232 "CMA",
233#endif
234#ifdef CONFIG_MEMORY_ISOLATION
235 "Isolate",
236#endif
237};
238
226compound_page_dtor * const compound_page_dtors[] = { 239compound_page_dtor * const compound_page_dtors[] = {
227 NULL, 240 NULL,
228 free_compound_page, 241 free_compound_page,
@@ -236,6 +249,7 @@ compound_page_dtor * const compound_page_dtors[] = {
236 249
237int min_free_kbytes = 1024; 250int min_free_kbytes = 1024;
238int user_min_free_kbytes = -1; 251int user_min_free_kbytes = -1;
252int watermark_scale_factor = 10;
239 253
240static unsigned long __meminitdata nr_kernel_pages; 254static unsigned long __meminitdata nr_kernel_pages;
241static unsigned long __meminitdata nr_all_pages; 255static unsigned long __meminitdata nr_all_pages;
@@ -247,6 +261,7 @@ static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
247static unsigned long __initdata required_kernelcore; 261static unsigned long __initdata required_kernelcore;
248static unsigned long __initdata required_movablecore; 262static unsigned long __initdata required_movablecore;
249static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES]; 263static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
264static bool mirrored_kernelcore;
250 265
251/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ 266/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
252int movable_zone; 267int movable_zone;
@@ -293,13 +308,20 @@ static inline bool update_defer_init(pg_data_t *pgdat,
293 unsigned long pfn, unsigned long zone_end, 308 unsigned long pfn, unsigned long zone_end,
294 unsigned long *nr_initialised) 309 unsigned long *nr_initialised)
295{ 310{
311 unsigned long max_initialise;
312
296 /* Always populate low zones for address-contrained allocations */ 313 /* Always populate low zones for address-contrained allocations */
297 if (zone_end < pgdat_end_pfn(pgdat)) 314 if (zone_end < pgdat_end_pfn(pgdat))
298 return true; 315 return true;
316 /*
317 * Initialise at least 2G of a node but also take into account that
318 * two large system hashes that can take up 1GB for 0.25TB/node.
319 */
320 max_initialise = max(2UL << (30 - PAGE_SHIFT),
321 (pgdat->node_spanned_pages >> 8));
299 322
300 /* Initialise at least 2G of the highest zone */
301 (*nr_initialised)++; 323 (*nr_initialised)++;
302 if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) && 324 if ((*nr_initialised > max_initialise) &&
303 (pfn & (PAGES_PER_SECTION - 1)) == 0) { 325 (pfn & (PAGES_PER_SECTION - 1)) == 0) {
304 pgdat->first_deferred_pfn = pfn; 326 pgdat->first_deferred_pfn = pfn;
305 return false; 327 return false;
@@ -416,7 +438,7 @@ static void bad_page(struct page *page, const char *reason,
416 goto out; 438 goto out;
417 } 439 }
418 if (nr_unshown) { 440 if (nr_unshown) {
419 printk(KERN_ALERT 441 pr_alert(
420 "BUG: Bad page state: %lu messages suppressed\n", 442 "BUG: Bad page state: %lu messages suppressed\n",
421 nr_unshown); 443 nr_unshown);
422 nr_unshown = 0; 444 nr_unshown = 0;
@@ -426,9 +448,14 @@ static void bad_page(struct page *page, const char *reason,
426 if (nr_shown++ == 0) 448 if (nr_shown++ == 0)
427 resume = jiffies + 60 * HZ; 449 resume = jiffies + 60 * HZ;
428 450
429 printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n", 451 pr_alert("BUG: Bad page state in process %s pfn:%05lx\n",
430 current->comm, page_to_pfn(page)); 452 current->comm, page_to_pfn(page));
431 dump_page_badflags(page, reason, bad_flags); 453 __dump_page(page, reason);
454 bad_flags &= page->flags;
455 if (bad_flags)
456 pr_alert("bad because of flags: %#lx(%pGp)\n",
457 bad_flags, &bad_flags);
458 dump_page_owner(page);
432 459
433 print_modules(); 460 print_modules();
434 dump_stack(); 461 dump_stack();
@@ -477,7 +504,9 @@ void prep_compound_page(struct page *page, unsigned int order)
477 504
478#ifdef CONFIG_DEBUG_PAGEALLOC 505#ifdef CONFIG_DEBUG_PAGEALLOC
479unsigned int _debug_guardpage_minorder; 506unsigned int _debug_guardpage_minorder;
480bool _debug_pagealloc_enabled __read_mostly; 507bool _debug_pagealloc_enabled __read_mostly
508 = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
509EXPORT_SYMBOL(_debug_pagealloc_enabled);
481bool _debug_guardpage_enabled __read_mostly; 510bool _debug_guardpage_enabled __read_mostly;
482 511
483static int __init early_debug_pagealloc(char *buf) 512static int __init early_debug_pagealloc(char *buf)
@@ -488,6 +517,9 @@ static int __init early_debug_pagealloc(char *buf)
488 if (strcmp(buf, "on") == 0) 517 if (strcmp(buf, "on") == 0)
489 _debug_pagealloc_enabled = true; 518 _debug_pagealloc_enabled = true;
490 519
520 if (strcmp(buf, "off") == 0)
521 _debug_pagealloc_enabled = false;
522
491 return 0; 523 return 0;
492} 524}
493early_param("debug_pagealloc", early_debug_pagealloc); 525early_param("debug_pagealloc", early_debug_pagealloc);
@@ -519,11 +551,11 @@ static int __init debug_guardpage_minorder_setup(char *buf)
519 unsigned long res; 551 unsigned long res;
520 552
521 if (kstrtoul(buf, 10, &res) < 0 || res > MAX_ORDER / 2) { 553 if (kstrtoul(buf, 10, &res) < 0 || res > MAX_ORDER / 2) {
522 printk(KERN_ERR "Bad debug_guardpage_minorder value\n"); 554 pr_err("Bad debug_guardpage_minorder value\n");
523 return 0; 555 return 0;
524 } 556 }
525 _debug_guardpage_minorder = res; 557 _debug_guardpage_minorder = res;
526 printk(KERN_INFO "Setting debug_guardpage_minorder to %lu\n", res); 558 pr_info("Setting debug_guardpage_minorder to %lu\n", res);
527 return 0; 559 return 0;
528} 560}
529__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup); 561__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
@@ -660,34 +692,28 @@ static inline void __free_one_page(struct page *page,
660 unsigned long combined_idx; 692 unsigned long combined_idx;
661 unsigned long uninitialized_var(buddy_idx); 693 unsigned long uninitialized_var(buddy_idx);
662 struct page *buddy; 694 struct page *buddy;
663 unsigned int max_order = MAX_ORDER; 695 unsigned int max_order;
696
697 max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
664 698
665 VM_BUG_ON(!zone_is_initialized(zone)); 699 VM_BUG_ON(!zone_is_initialized(zone));
666 VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); 700 VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
667 701
668 VM_BUG_ON(migratetype == -1); 702 VM_BUG_ON(migratetype == -1);
669 if (is_migrate_isolate(migratetype)) { 703 if (likely(!is_migrate_isolate(migratetype)))
670 /*
671 * We restrict max order of merging to prevent merge
672 * between freepages on isolate pageblock and normal
673 * pageblock. Without this, pageblock isolation
674 * could cause incorrect freepage accounting.
675 */
676 max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
677 } else {
678 __mod_zone_freepage_state(zone, 1 << order, migratetype); 704 __mod_zone_freepage_state(zone, 1 << order, migratetype);
679 }
680 705
681 page_idx = pfn & ((1 << max_order) - 1); 706 page_idx = pfn & ((1 << MAX_ORDER) - 1);
682 707
683 VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); 708 VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
684 VM_BUG_ON_PAGE(bad_range(zone, page), page); 709 VM_BUG_ON_PAGE(bad_range(zone, page), page);
685 710
711continue_merging:
686 while (order < max_order - 1) { 712 while (order < max_order - 1) {
687 buddy_idx = __find_buddy_index(page_idx, order); 713 buddy_idx = __find_buddy_index(page_idx, order);
688 buddy = page + (buddy_idx - page_idx); 714 buddy = page + (buddy_idx - page_idx);
689 if (!page_is_buddy(page, buddy, order)) 715 if (!page_is_buddy(page, buddy, order))
690 break; 716 goto done_merging;
691 /* 717 /*
692 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page, 718 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
693 * merge with it and move up one order. 719 * merge with it and move up one order.
@@ -704,6 +730,32 @@ static inline void __free_one_page(struct page *page,
704 page_idx = combined_idx; 730 page_idx = combined_idx;
705 order++; 731 order++;
706 } 732 }
733 if (max_order < MAX_ORDER) {
734 /* If we are here, it means order is >= pageblock_order.
735 * We want to prevent merge between freepages on isolate
736 * pageblock and normal pageblock. Without this, pageblock
737 * isolation could cause incorrect freepage or CMA accounting.
738 *
739 * We don't want to hit this code for the more frequent
740 * low-order merging.
741 */
742 if (unlikely(has_isolate_pageblock(zone))) {
743 int buddy_mt;
744
745 buddy_idx = __find_buddy_index(page_idx, order);
746 buddy = page + (buddy_idx - page_idx);
747 buddy_mt = get_pageblock_migratetype(buddy);
748
749 if (migratetype != buddy_mt
750 && (is_migrate_isolate(migratetype) ||
751 is_migrate_isolate(buddy_mt)))
752 goto done_merging;
753 }
754 max_order++;
755 goto continue_merging;
756 }
757
758done_merging:
707 set_page_order(page, order); 759 set_page_order(page, order);
708 760
709 /* 761 /*
@@ -741,7 +793,7 @@ static inline int free_pages_check(struct page *page)
741 bad_reason = "nonzero mapcount"; 793 bad_reason = "nonzero mapcount";
742 if (unlikely(page->mapping != NULL)) 794 if (unlikely(page->mapping != NULL))
743 bad_reason = "non-NULL mapping"; 795 bad_reason = "non-NULL mapping";
744 if (unlikely(atomic_read(&page->_count) != 0)) 796 if (unlikely(page_ref_count(page) != 0))
745 bad_reason = "nonzero _count"; 797 bad_reason = "nonzero _count";
746 if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) { 798 if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_FREE)) {
747 bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set"; 799 bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
@@ -1002,6 +1054,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
1002 PAGE_SIZE << order); 1054 PAGE_SIZE << order);
1003 } 1055 }
1004 arch_free_page(page, order); 1056 arch_free_page(page, order);
1057 kernel_poison_pages(page, 1 << order, 0);
1005 kernel_map_pages(page, 1 << order, 0); 1058 kernel_map_pages(page, 1 << order, 0);
1006 1059
1007 return true; 1060 return true;
@@ -1104,6 +1157,75 @@ void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
1104 return __free_pages_boot_core(page, pfn, order); 1157 return __free_pages_boot_core(page, pfn, order);
1105} 1158}
1106 1159
1160/*
1161 * Check that the whole (or subset of) a pageblock given by the interval of
1162 * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
1163 * with the migration of free compaction scanner. The scanners then need to
1164 * use only pfn_valid_within() check for arches that allow holes within
1165 * pageblocks.
1166 *
1167 * Return struct page pointer of start_pfn, or NULL if checks were not passed.
1168 *
1169 * It's possible on some configurations to have a setup like node0 node1 node0
1170 * i.e. it's possible that all pages within a zones range of pages do not
1171 * belong to a single zone. We assume that a border between node0 and node1
1172 * can occur within a single pageblock, but not a node0 node1 node0
1173 * interleaving within a single pageblock. It is therefore sufficient to check
1174 * the first and last page of a pageblock and avoid checking each individual
1175 * page in a pageblock.
1176 */
1177struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
1178 unsigned long end_pfn, struct zone *zone)
1179{
1180 struct page *start_page;
1181 struct page *end_page;
1182
1183 /* end_pfn is one past the range we are checking */
1184 end_pfn--;
1185
1186 if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
1187 return NULL;
1188
1189 start_page = pfn_to_page(start_pfn);
1190
1191 if (page_zone(start_page) != zone)
1192 return NULL;
1193
1194 end_page = pfn_to_page(end_pfn);
1195
1196 /* This gives a shorter code than deriving page_zone(end_page) */
1197 if (page_zone_id(start_page) != page_zone_id(end_page))
1198 return NULL;
1199
1200 return start_page;
1201}
1202
1203void set_zone_contiguous(struct zone *zone)
1204{
1205 unsigned long block_start_pfn = zone->zone_start_pfn;
1206 unsigned long block_end_pfn;
1207
1208 block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
1209 for (; block_start_pfn < zone_end_pfn(zone);
1210 block_start_pfn = block_end_pfn,
1211 block_end_pfn += pageblock_nr_pages) {
1212
1213 block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
1214
1215 if (!__pageblock_pfn_to_page(block_start_pfn,
1216 block_end_pfn, zone))
1217 return;
1218 }
1219
1220 /* We confirm that there is no hole */
1221 zone->contiguous = true;
1222}
1223
1224void clear_zone_contiguous(struct zone *zone)
1225{
1226 zone->contiguous = false;
1227}
1228
1107#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 1229#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1108static void __init deferred_free_range(struct page *page, 1230static void __init deferred_free_range(struct page *page,
1109 unsigned long pfn, int nr_pages) 1231 unsigned long pfn, int nr_pages)
@@ -1254,9 +1376,13 @@ free_range:
1254 pgdat_init_report_one_done(); 1376 pgdat_init_report_one_done();
1255 return 0; 1377 return 0;
1256} 1378}
1379#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1257 1380
1258void __init page_alloc_init_late(void) 1381void __init page_alloc_init_late(void)
1259{ 1382{
1383 struct zone *zone;
1384
1385#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1260 int nid; 1386 int nid;
1261 1387
1262 /* There will be num_node_state(N_MEMORY) threads */ 1388 /* There will be num_node_state(N_MEMORY) threads */
@@ -1270,8 +1396,11 @@ void __init page_alloc_init_late(void)
1270 1396
1271 /* Reinit limits that are based on free pages after the kernel is up */ 1397 /* Reinit limits that are based on free pages after the kernel is up */
1272 files_maxfiles_init(); 1398 files_maxfiles_init();
1399#endif
1400
1401 for_each_populated_zone(zone)
1402 set_zone_contiguous(zone);
1273} 1403}
1274#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1275 1404
1276#ifdef CONFIG_CMA 1405#ifdef CONFIG_CMA
1277/* Free whole pageblock and set its migration type to MIGRATE_CMA. */ 1406/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
@@ -1360,7 +1489,7 @@ static inline int check_new_page(struct page *page)
1360 bad_reason = "nonzero mapcount"; 1489 bad_reason = "nonzero mapcount";
1361 if (unlikely(page->mapping != NULL)) 1490 if (unlikely(page->mapping != NULL))
1362 bad_reason = "non-NULL mapping"; 1491 bad_reason = "non-NULL mapping";
1363 if (unlikely(atomic_read(&page->_count) != 0)) 1492 if (unlikely(page_ref_count(page) != 0))
1364 bad_reason = "nonzero _count"; 1493 bad_reason = "nonzero _count";
1365 if (unlikely(page->flags & __PG_HWPOISON)) { 1494 if (unlikely(page->flags & __PG_HWPOISON)) {
1366 bad_reason = "HWPoisoned (hardware-corrupted)"; 1495 bad_reason = "HWPoisoned (hardware-corrupted)";
@@ -1381,15 +1510,24 @@ static inline int check_new_page(struct page *page)
1381 return 0; 1510 return 0;
1382} 1511}
1383 1512
1513static inline bool free_pages_prezeroed(bool poisoned)
1514{
1515 return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
1516 page_poisoning_enabled() && poisoned;
1517}
1518
1384static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, 1519static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
1385 int alloc_flags) 1520 int alloc_flags)
1386{ 1521{
1387 int i; 1522 int i;
1523 bool poisoned = true;
1388 1524
1389 for (i = 0; i < (1 << order); i++) { 1525 for (i = 0; i < (1 << order); i++) {
1390 struct page *p = page + i; 1526 struct page *p = page + i;
1391 if (unlikely(check_new_page(p))) 1527 if (unlikely(check_new_page(p)))
1392 return 1; 1528 return 1;
1529 if (poisoned)
1530 poisoned &= page_is_poisoned(p);
1393 } 1531 }
1394 1532
1395 set_page_private(page, 0); 1533 set_page_private(page, 0);
@@ -1397,9 +1535,10 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
1397 1535
1398 arch_alloc_page(page, order); 1536 arch_alloc_page(page, order);
1399 kernel_map_pages(page, 1 << order, 1); 1537 kernel_map_pages(page, 1 << order, 1);
1538 kernel_poison_pages(page, 1 << order, 1);
1400 kasan_alloc_pages(page, order); 1539 kasan_alloc_pages(page, order);
1401 1540
1402 if (gfp_flags & __GFP_ZERO) 1541 if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO))
1403 for (i = 0; i < (1 << order); i++) 1542 for (i = 0; i < (1 << order); i++)
1404 clear_highpage(page + i); 1543 clear_highpage(page + i);
1405 1544
@@ -2238,19 +2377,11 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
2238 list_del(&page->lru); 2377 list_del(&page->lru);
2239 pcp->count--; 2378 pcp->count--;
2240 } else { 2379 } else {
2241 if (unlikely(gfp_flags & __GFP_NOFAIL)) { 2380 /*
2242 /* 2381 * We most definitely don't want callers attempting to
2243 * __GFP_NOFAIL is not to be used in new code. 2382 * allocate greater than order-1 page units with __GFP_NOFAIL.
2244 * 2383 */
2245 * All __GFP_NOFAIL callers should be fixed so that they 2384 WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
2246 * properly detect and handle allocation failures.
2247 *
2248 * We most definitely don't want callers attempting to
2249 * allocate greater than order-1 page units with
2250 * __GFP_NOFAIL.
2251 */
2252 WARN_ON_ONCE(order > 1);
2253 }
2254 spin_lock_irqsave(&zone->lock, flags); 2385 spin_lock_irqsave(&zone->lock, flags);
2255 2386
2256 page = NULL; 2387 page = NULL;
@@ -2690,9 +2821,8 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
2690 va_end(args); 2821 va_end(args);
2691 } 2822 }
2692 2823
2693 pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n", 2824 pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n",
2694 current->comm, order, gfp_mask); 2825 current->comm, order, gfp_mask, &gfp_mask);
2695
2696 dump_stack(); 2826 dump_stack();
2697 if (!should_suppress_show_mem()) 2827 if (!should_suppress_show_mem())
2698 show_mem(filter); 2828 show_mem(filter);
@@ -2748,8 +2878,12 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2748 * XXX: Page reclaim didn't yield anything, 2878 * XXX: Page reclaim didn't yield anything,
2749 * and the OOM killer can't be invoked, but 2879 * and the OOM killer can't be invoked, but
2750 * keep looping as per tradition. 2880 * keep looping as per tradition.
2881 *
2882 * But do not keep looping if oom_killer_disable()
2883 * was already called, for the system is trying to
2884 * enter a quiescent state during suspend.
2751 */ 2885 */
2752 *did_some_progress = 1; 2886 *did_some_progress = !oom_killer_disabled;
2753 goto out; 2887 goto out;
2754 } 2888 }
2755 if (pm_suspended_storage()) 2889 if (pm_suspended_storage())
@@ -3008,14 +3142,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3008 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) 3142 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
3009 gfp_mask &= ~__GFP_ATOMIC; 3143 gfp_mask &= ~__GFP_ATOMIC;
3010 3144
3011 /*
3012 * If this allocation cannot block and it is for a specific node, then
3013 * fail early. There's no need to wakeup kswapd or retry for a
3014 * speculative node-specific allocation.
3015 */
3016 if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim)
3017 goto nopage;
3018
3019retry: 3145retry:
3020 if (gfp_mask & __GFP_KSWAPD_RECLAIM) 3146 if (gfp_mask & __GFP_KSWAPD_RECLAIM)
3021 wake_all_kswapds(order, ac); 3147 wake_all_kswapds(order, ac);
@@ -3372,7 +3498,7 @@ refill:
3372 /* Even if we own the page, we do not use atomic_set(). 3498 /* Even if we own the page, we do not use atomic_set().
3373 * This would break get_page_unless_zero() users. 3499 * This would break get_page_unless_zero() users.
3374 */ 3500 */
3375 atomic_add(size - 1, &page->_count); 3501 page_ref_add(page, size - 1);
3376 3502
3377 /* reset page count bias and offset to start of new frag */ 3503 /* reset page count bias and offset to start of new frag */
3378 nc->pfmemalloc = page_is_pfmemalloc(page); 3504 nc->pfmemalloc = page_is_pfmemalloc(page);
@@ -3384,7 +3510,7 @@ refill:
3384 if (unlikely(offset < 0)) { 3510 if (unlikely(offset < 0)) {
3385 page = virt_to_page(nc->va); 3511 page = virt_to_page(nc->va);
3386 3512
3387 if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count)) 3513 if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
3388 goto refill; 3514 goto refill;
3389 3515
3390#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) 3516#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
@@ -3392,7 +3518,7 @@ refill:
3392 size = nc->size; 3518 size = nc->size;
3393#endif 3519#endif
3394 /* OK, page count is 0, we can safely set it */ 3520 /* OK, page count is 0, we can safely set it */
3395 atomic_set(&page->_count, size); 3521 set_page_count(page, size);
3396 3522
3397 /* reset page count bias and offset to start of new frag */ 3523 /* reset page count bias and offset to start of new frag */
3398 nc->pagecnt_bias = size; 3524 nc->pagecnt_bias = size;
@@ -3603,6 +3729,49 @@ static inline void show_node(struct zone *zone)
3603 printk("Node %d ", zone_to_nid(zone)); 3729 printk("Node %d ", zone_to_nid(zone));
3604} 3730}
3605 3731
3732long si_mem_available(void)
3733{
3734 long available;
3735 unsigned long pagecache;
3736 unsigned long wmark_low = 0;
3737 unsigned long pages[NR_LRU_LISTS];
3738 struct zone *zone;
3739 int lru;
3740
3741 for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
3742 pages[lru] = global_page_state(NR_LRU_BASE + lru);
3743
3744 for_each_zone(zone)
3745 wmark_low += zone->watermark[WMARK_LOW];
3746
3747 /*
3748 * Estimate the amount of memory available for userspace allocations,
3749 * without causing swapping.
3750 */
3751 available = global_page_state(NR_FREE_PAGES) - totalreserve_pages;
3752
3753 /*
3754 * Not all the page cache can be freed, otherwise the system will
3755 * start swapping. Assume at least half of the page cache, or the
3756 * low watermark worth of cache, needs to stay.
3757 */
3758 pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
3759 pagecache -= min(pagecache / 2, wmark_low);
3760 available += pagecache;
3761
3762 /*
3763 * Part of the reclaimable slab consists of items that are in use,
3764 * and cannot be freed. Cap this estimate at the low watermark.
3765 */
3766 available += global_page_state(NR_SLAB_RECLAIMABLE) -
3767 min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
3768
3769 if (available < 0)
3770 available = 0;
3771 return available;
3772}
3773EXPORT_SYMBOL_GPL(si_mem_available);
3774
3606void si_meminfo(struct sysinfo *val) 3775void si_meminfo(struct sysinfo *val)
3607{ 3776{
3608 val->totalram = totalram_pages; 3777 val->totalram = totalram_pages;
@@ -3935,9 +4104,7 @@ static int __parse_numa_zonelist_order(char *s)
3935 } else if (*s == 'z' || *s == 'Z') { 4104 } else if (*s == 'z' || *s == 'Z') {
3936 user_zonelist_order = ZONELIST_ORDER_ZONE; 4105 user_zonelist_order = ZONELIST_ORDER_ZONE;
3937 } else { 4106 } else {
3938 printk(KERN_WARNING 4107 pr_warn("Ignoring invalid numa_zonelist_order value: %s\n", s);
3939 "Ignoring invalid numa_zonelist_order value: "
3940 "%s\n", s);
3941 return -EINVAL; 4108 return -EINVAL;
3942 } 4109 }
3943 return 0; 4110 return 0;
@@ -4401,12 +4568,11 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
4401 else 4568 else
4402 page_group_by_mobility_disabled = 0; 4569 page_group_by_mobility_disabled = 0;
4403 4570
4404 pr_info("Built %i zonelists in %s order, mobility grouping %s. " 4571 pr_info("Built %i zonelists in %s order, mobility grouping %s. Total pages: %ld\n",
4405 "Total pages: %ld\n", 4572 nr_online_nodes,
4406 nr_online_nodes, 4573 zonelist_order_name[current_zonelist_order],
4407 zonelist_order_name[current_zonelist_order], 4574 page_group_by_mobility_disabled ? "off" : "on",
4408 page_group_by_mobility_disabled ? "off" : "on", 4575 vm_total_pages);
4409 vm_total_pages);
4410#ifdef CONFIG_NUMA 4576#ifdef CONFIG_NUMA
4411 pr_info("Policy zone: %s\n", zone_names[policy_zone]); 4577 pr_info("Policy zone: %s\n", zone_names[policy_zone]);
4412#endif 4578#endif
@@ -4491,6 +4657,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
4491 pg_data_t *pgdat = NODE_DATA(nid); 4657 pg_data_t *pgdat = NODE_DATA(nid);
4492 unsigned long pfn; 4658 unsigned long pfn;
4493 unsigned long nr_initialised = 0; 4659 unsigned long nr_initialised = 0;
4660#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4661 struct memblock_region *r = NULL, *tmp;
4662#endif
4494 4663
4495 if (highest_memmap_pfn < end_pfn - 1) 4664 if (highest_memmap_pfn < end_pfn - 1)
4496 highest_memmap_pfn = end_pfn - 1; 4665 highest_memmap_pfn = end_pfn - 1;
@@ -4504,20 +4673,51 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
4504 4673
4505 for (pfn = start_pfn; pfn < end_pfn; pfn++) { 4674 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
4506 /* 4675 /*
4507 * There can be holes in boot-time mem_map[]s 4676 * There can be holes in boot-time mem_map[]s handed to this
4508 * handed to this function. They do not 4677 * function. They do not exist on hotplugged memory.
4509 * exist on hotplugged memory. 4678 */
4679 if (context != MEMMAP_EARLY)
4680 goto not_early;
4681
4682 if (!early_pfn_valid(pfn))
4683 continue;
4684 if (!early_pfn_in_nid(pfn, nid))
4685 continue;
4686 if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
4687 break;
4688
4689#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4690 /*
4691 * If not mirrored_kernelcore and ZONE_MOVABLE exists, range
4692 * from zone_movable_pfn[nid] to end of each node should be
4693 * ZONE_MOVABLE not ZONE_NORMAL. skip it.
4510 */ 4694 */
4511 if (context == MEMMAP_EARLY) { 4695 if (!mirrored_kernelcore && zone_movable_pfn[nid])
4512 if (!early_pfn_valid(pfn)) 4696 if (zone == ZONE_NORMAL && pfn >= zone_movable_pfn[nid])
4513 continue; 4697 continue;
4514 if (!early_pfn_in_nid(pfn, nid)) 4698
4699 /*
4700 * Check given memblock attribute by firmware which can affect
4701 * kernel memory layout. If zone==ZONE_MOVABLE but memory is
4702 * mirrored, it's an overlapped memmap init. skip it.
4703 */
4704 if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
4705 if (!r || pfn >= memblock_region_memory_end_pfn(r)) {
4706 for_each_memblock(memory, tmp)
4707 if (pfn < memblock_region_memory_end_pfn(tmp))
4708 break;
4709 r = tmp;
4710 }
4711 if (pfn >= memblock_region_memory_base_pfn(r) &&
4712 memblock_is_mirror(r)) {
4713 /* already initialized as NORMAL */
4714 pfn = memblock_region_memory_end_pfn(r);
4515 continue; 4715 continue;
4516 if (!update_defer_init(pgdat, pfn, end_pfn, 4716 }
4517 &nr_initialised))
4518 break;
4519 } 4717 }
4718#endif
4520 4719
4720not_early:
4521 /* 4721 /*
4522 * Mark the block movable so that blocks are reserved for 4722 * Mark the block movable so that blocks are reserved for
4523 * movable at startup. This will force kernel allocations 4723 * movable at startup. This will force kernel allocations
@@ -4934,11 +5134,6 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid,
4934 *zone_end_pfn = min(node_end_pfn, 5134 *zone_end_pfn = min(node_end_pfn,
4935 arch_zone_highest_possible_pfn[movable_zone]); 5135 arch_zone_highest_possible_pfn[movable_zone]);
4936 5136
4937 /* Adjust for ZONE_MOVABLE starting within this range */
4938 } else if (*zone_start_pfn < zone_movable_pfn[nid] &&
4939 *zone_end_pfn > zone_movable_pfn[nid]) {
4940 *zone_end_pfn = zone_movable_pfn[nid];
4941
4942 /* Check if this whole range is within ZONE_MOVABLE */ 5137 /* Check if this whole range is within ZONE_MOVABLE */
4943 } else if (*zone_start_pfn >= zone_movable_pfn[nid]) 5138 } else if (*zone_start_pfn >= zone_movable_pfn[nid])
4944 *zone_start_pfn = *zone_end_pfn; 5139 *zone_start_pfn = *zone_end_pfn;
@@ -4953,31 +5148,31 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid,
4953 unsigned long zone_type, 5148 unsigned long zone_type,
4954 unsigned long node_start_pfn, 5149 unsigned long node_start_pfn,
4955 unsigned long node_end_pfn, 5150 unsigned long node_end_pfn,
5151 unsigned long *zone_start_pfn,
5152 unsigned long *zone_end_pfn,
4956 unsigned long *ignored) 5153 unsigned long *ignored)
4957{ 5154{
4958 unsigned long zone_start_pfn, zone_end_pfn;
4959
4960 /* When hotadd a new node from cpu_up(), the node should be empty */ 5155 /* When hotadd a new node from cpu_up(), the node should be empty */
4961 if (!node_start_pfn && !node_end_pfn) 5156 if (!node_start_pfn && !node_end_pfn)
4962 return 0; 5157 return 0;
4963 5158
4964 /* Get the start and end of the zone */ 5159 /* Get the start and end of the zone */
4965 zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type]; 5160 *zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
4966 zone_end_pfn = arch_zone_highest_possible_pfn[zone_type]; 5161 *zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
4967 adjust_zone_range_for_zone_movable(nid, zone_type, 5162 adjust_zone_range_for_zone_movable(nid, zone_type,
4968 node_start_pfn, node_end_pfn, 5163 node_start_pfn, node_end_pfn,
4969 &zone_start_pfn, &zone_end_pfn); 5164 zone_start_pfn, zone_end_pfn);
4970 5165
4971 /* Check that this node has pages within the zone's required range */ 5166 /* Check that this node has pages within the zone's required range */
4972 if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn) 5167 if (*zone_end_pfn < node_start_pfn || *zone_start_pfn > node_end_pfn)
4973 return 0; 5168 return 0;
4974 5169
4975 /* Move the zone boundaries inside the node if necessary */ 5170 /* Move the zone boundaries inside the node if necessary */
4976 zone_end_pfn = min(zone_end_pfn, node_end_pfn); 5171 *zone_end_pfn = min(*zone_end_pfn, node_end_pfn);
4977 zone_start_pfn = max(zone_start_pfn, node_start_pfn); 5172 *zone_start_pfn = max(*zone_start_pfn, node_start_pfn);
4978 5173
4979 /* Return the spanned pages */ 5174 /* Return the spanned pages */
4980 return zone_end_pfn - zone_start_pfn; 5175 return *zone_end_pfn - *zone_start_pfn;
4981} 5176}
4982 5177
4983/* 5178/*
@@ -5023,6 +5218,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
5023 unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; 5218 unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
5024 unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; 5219 unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
5025 unsigned long zone_start_pfn, zone_end_pfn; 5220 unsigned long zone_start_pfn, zone_end_pfn;
5221 unsigned long nr_absent;
5026 5222
5027 /* When hotadd a new node from cpu_up(), the node should be empty */ 5223 /* When hotadd a new node from cpu_up(), the node should be empty */
5028 if (!node_start_pfn && !node_end_pfn) 5224 if (!node_start_pfn && !node_end_pfn)
@@ -5034,7 +5230,39 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
5034 adjust_zone_range_for_zone_movable(nid, zone_type, 5230 adjust_zone_range_for_zone_movable(nid, zone_type,
5035 node_start_pfn, node_end_pfn, 5231 node_start_pfn, node_end_pfn,
5036 &zone_start_pfn, &zone_end_pfn); 5232 &zone_start_pfn, &zone_end_pfn);
5037 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); 5233 nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
5234
5235 /*
5236 * ZONE_MOVABLE handling.
5237 * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
5238 * and vice versa.
5239 */
5240 if (zone_movable_pfn[nid]) {
5241 if (mirrored_kernelcore) {
5242 unsigned long start_pfn, end_pfn;
5243 struct memblock_region *r;
5244
5245 for_each_memblock(memory, r) {
5246 start_pfn = clamp(memblock_region_memory_base_pfn(r),
5247 zone_start_pfn, zone_end_pfn);
5248 end_pfn = clamp(memblock_region_memory_end_pfn(r),
5249 zone_start_pfn, zone_end_pfn);
5250
5251 if (zone_type == ZONE_MOVABLE &&
5252 memblock_is_mirror(r))
5253 nr_absent += end_pfn - start_pfn;
5254
5255 if (zone_type == ZONE_NORMAL &&
5256 !memblock_is_mirror(r))
5257 nr_absent += end_pfn - start_pfn;
5258 }
5259 } else {
5260 if (zone_type == ZONE_NORMAL)
5261 nr_absent += node_end_pfn - zone_movable_pfn[nid];
5262 }
5263 }
5264
5265 return nr_absent;
5038} 5266}
5039 5267
5040#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 5268#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
@@ -5042,8 +5270,18 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
5042 unsigned long zone_type, 5270 unsigned long zone_type,
5043 unsigned long node_start_pfn, 5271 unsigned long node_start_pfn,
5044 unsigned long node_end_pfn, 5272 unsigned long node_end_pfn,
5273 unsigned long *zone_start_pfn,
5274 unsigned long *zone_end_pfn,
5045 unsigned long *zones_size) 5275 unsigned long *zones_size)
5046{ 5276{
5277 unsigned int zone;
5278
5279 *zone_start_pfn = node_start_pfn;
5280 for (zone = 0; zone < zone_type; zone++)
5281 *zone_start_pfn += zones_size[zone];
5282
5283 *zone_end_pfn = *zone_start_pfn + zones_size[zone_type];
5284
5047 return zones_size[zone_type]; 5285 return zones_size[zone_type];
5048} 5286}
5049 5287
@@ -5072,15 +5310,22 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
5072 5310
5073 for (i = 0; i < MAX_NR_ZONES; i++) { 5311 for (i = 0; i < MAX_NR_ZONES; i++) {
5074 struct zone *zone = pgdat->node_zones + i; 5312 struct zone *zone = pgdat->node_zones + i;
5313 unsigned long zone_start_pfn, zone_end_pfn;
5075 unsigned long size, real_size; 5314 unsigned long size, real_size;
5076 5315
5077 size = zone_spanned_pages_in_node(pgdat->node_id, i, 5316 size = zone_spanned_pages_in_node(pgdat->node_id, i,
5078 node_start_pfn, 5317 node_start_pfn,
5079 node_end_pfn, 5318 node_end_pfn,
5319 &zone_start_pfn,
5320 &zone_end_pfn,
5080 zones_size); 5321 zones_size);
5081 real_size = size - zone_absent_pages_in_node(pgdat->node_id, i, 5322 real_size = size - zone_absent_pages_in_node(pgdat->node_id, i,
5082 node_start_pfn, node_end_pfn, 5323 node_start_pfn, node_end_pfn,
5083 zholes_size); 5324 zholes_size);
5325 if (size)
5326 zone->zone_start_pfn = zone_start_pfn;
5327 else
5328 zone->zone_start_pfn = 0;
5084 zone->spanned_pages = size; 5329 zone->spanned_pages = size;
5085 zone->present_pages = real_size; 5330 zone->present_pages = real_size;
5086 5331
@@ -5201,7 +5446,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
5201{ 5446{
5202 enum zone_type j; 5447 enum zone_type j;
5203 int nid = pgdat->node_id; 5448 int nid = pgdat->node_id;
5204 unsigned long zone_start_pfn = pgdat->node_start_pfn;
5205 int ret; 5449 int ret;
5206 5450
5207 pgdat_resize_init(pgdat); 5451 pgdat_resize_init(pgdat);
@@ -5217,11 +5461,15 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
5217#endif 5461#endif
5218 init_waitqueue_head(&pgdat->kswapd_wait); 5462 init_waitqueue_head(&pgdat->kswapd_wait);
5219 init_waitqueue_head(&pgdat->pfmemalloc_wait); 5463 init_waitqueue_head(&pgdat->pfmemalloc_wait);
5464#ifdef CONFIG_COMPACTION
5465 init_waitqueue_head(&pgdat->kcompactd_wait);
5466#endif
5220 pgdat_page_ext_init(pgdat); 5467 pgdat_page_ext_init(pgdat);
5221 5468
5222 for (j = 0; j < MAX_NR_ZONES; j++) { 5469 for (j = 0; j < MAX_NR_ZONES; j++) {
5223 struct zone *zone = pgdat->node_zones + j; 5470 struct zone *zone = pgdat->node_zones + j;
5224 unsigned long size, realsize, freesize, memmap_pages; 5471 unsigned long size, realsize, freesize, memmap_pages;
5472 unsigned long zone_start_pfn = zone->zone_start_pfn;
5225 5473
5226 size = zone->spanned_pages; 5474 size = zone->spanned_pages;
5227 realsize = freesize = zone->present_pages; 5475 realsize = freesize = zone->present_pages;
@@ -5240,8 +5488,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
5240 " %s zone: %lu pages used for memmap\n", 5488 " %s zone: %lu pages used for memmap\n",
5241 zone_names[j], memmap_pages); 5489 zone_names[j], memmap_pages);
5242 } else 5490 } else
5243 printk(KERN_WARNING 5491 pr_warn(" %s zone: %lu pages exceeds freesize %lu\n",
5244 " %s zone: %lu pages exceeds freesize %lu\n",
5245 zone_names[j], memmap_pages, freesize); 5492 zone_names[j], memmap_pages, freesize);
5246 } 5493 }
5247 5494
@@ -5290,7 +5537,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
5290 ret = init_currently_empty_zone(zone, zone_start_pfn, size); 5537 ret = init_currently_empty_zone(zone, zone_start_pfn, size);
5291 BUG_ON(ret); 5538 BUG_ON(ret);
5292 memmap_init(size, nid, j, zone_start_pfn); 5539 memmap_init(size, nid, j, zone_start_pfn);
5293 zone_start_pfn += size;
5294 } 5540 }
5295} 5541}
5296 5542
@@ -5358,6 +5604,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
5358 pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, 5604 pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
5359 (u64)start_pfn << PAGE_SHIFT, 5605 (u64)start_pfn << PAGE_SHIFT,
5360 end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); 5606 end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
5607#else
5608 start_pfn = node_start_pfn;
5361#endif 5609#endif
5362 calculate_node_totalpages(pgdat, start_pfn, end_pfn, 5610 calculate_node_totalpages(pgdat, start_pfn, end_pfn,
5363 zones_size, zholes_size); 5611 zones_size, zholes_size);
@@ -5448,8 +5696,7 @@ static unsigned long __init find_min_pfn_for_node(int nid)
5448 min_pfn = min(min_pfn, start_pfn); 5696 min_pfn = min(min_pfn, start_pfn);
5449 5697
5450 if (min_pfn == ULONG_MAX) { 5698 if (min_pfn == ULONG_MAX) {
5451 printk(KERN_WARNING 5699 pr_warn("Could not find start_pfn for node %d\n", nid);
5452 "Could not find start_pfn for node %d\n", nid);
5453 return 0; 5700 return 0;
5454 } 5701 }
5455 5702
@@ -5529,6 +5776,36 @@ static void __init find_zone_movable_pfns_for_nodes(void)
5529 } 5776 }
5530 5777
5531 /* 5778 /*
5779 * If kernelcore=mirror is specified, ignore movablecore option
5780 */
5781 if (mirrored_kernelcore) {
5782 bool mem_below_4gb_not_mirrored = false;
5783
5784 for_each_memblock(memory, r) {
5785 if (memblock_is_mirror(r))
5786 continue;
5787
5788 nid = r->nid;
5789
5790 usable_startpfn = memblock_region_memory_base_pfn(r);
5791
5792 if (usable_startpfn < 0x100000) {
5793 mem_below_4gb_not_mirrored = true;
5794 continue;
5795 }
5796
5797 zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
5798 min(usable_startpfn, zone_movable_pfn[nid]) :
5799 usable_startpfn;
5800 }
5801
5802 if (mem_below_4gb_not_mirrored)
5803 pr_warn("This configuration results in unmirrored kernel memory.");
5804
5805 goto out2;
5806 }
5807
5808 /*
5532 * If movablecore=nn[KMG] was specified, calculate what size of 5809 * If movablecore=nn[KMG] was specified, calculate what size of
5533 * kernelcore that corresponds so that memory usable for 5810 * kernelcore that corresponds so that memory usable for
5534 * any allocation type is evenly spread. If both kernelcore 5811 * any allocation type is evenly spread. If both kernelcore
@@ -5788,6 +6065,12 @@ static int __init cmdline_parse_core(char *p, unsigned long *core)
5788 */ 6065 */
5789static int __init cmdline_parse_kernelcore(char *p) 6066static int __init cmdline_parse_kernelcore(char *p)
5790{ 6067{
6068 /* parse kernelcore=mirror */
6069 if (parse_option_str(p, "mirror")) {
6070 mirrored_kernelcore = true;
6071 return 0;
6072 }
6073
5791 return cmdline_parse_core(p, &required_kernelcore); 6074 return cmdline_parse_core(p, &required_kernelcore);
5792} 6075}
5793 6076
@@ -5885,22 +6168,21 @@ void __init mem_init_print_info(const char *str)
5885 6168
5886#undef adj_init_size 6169#undef adj_init_size
5887 6170
5888 pr_info("Memory: %luK/%luK available " 6171 pr_info("Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved"
5889 "(%luK kernel code, %luK rwdata, %luK rodata, "
5890 "%luK init, %luK bss, %luK reserved, %luK cma-reserved"
5891#ifdef CONFIG_HIGHMEM 6172#ifdef CONFIG_HIGHMEM
5892 ", %luK highmem" 6173 ", %luK highmem"
5893#endif 6174#endif
5894 "%s%s)\n", 6175 "%s%s)\n",
5895 nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10), 6176 nr_free_pages() << (PAGE_SHIFT - 10),
5896 codesize >> 10, datasize >> 10, rosize >> 10, 6177 physpages << (PAGE_SHIFT - 10),
5897 (init_data_size + init_code_size) >> 10, bss_size >> 10, 6178 codesize >> 10, datasize >> 10, rosize >> 10,
5898 (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10), 6179 (init_data_size + init_code_size) >> 10, bss_size >> 10,
5899 totalcma_pages << (PAGE_SHIFT-10), 6180 (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT - 10),
6181 totalcma_pages << (PAGE_SHIFT - 10),
5900#ifdef CONFIG_HIGHMEM 6182#ifdef CONFIG_HIGHMEM
5901 totalhigh_pages << (PAGE_SHIFT-10), 6183 totalhigh_pages << (PAGE_SHIFT - 10),
5902#endif 6184#endif
5903 str ? ", " : "", str ? str : ""); 6185 str ? ", " : "", str ? str : "");
5904} 6186}
5905 6187
5906/** 6188/**
@@ -6075,8 +6357,17 @@ static void __setup_per_zone_wmarks(void)
6075 zone->watermark[WMARK_MIN] = tmp; 6357 zone->watermark[WMARK_MIN] = tmp;
6076 } 6358 }
6077 6359
6078 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); 6360 /*
6079 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); 6361 * Set the kswapd watermarks distance according to the
6362 * scale factor in proportion to available memory, but
6363 * ensure a minimum size on small systems.
6364 */
6365 tmp = max_t(u64, tmp >> 2,
6366 mult_frac(zone->managed_pages,
6367 watermark_scale_factor, 10000));
6368
6369 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
6370 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;
6080 6371
6081 __mod_zone_page_state(zone, NR_ALLOC_BATCH, 6372 __mod_zone_page_state(zone, NR_ALLOC_BATCH,
6082 high_wmark_pages(zone) - low_wmark_pages(zone) - 6373 high_wmark_pages(zone) - low_wmark_pages(zone) -
@@ -6217,6 +6508,21 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
6217 return 0; 6508 return 0;
6218} 6509}
6219 6510
6511int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
6512 void __user *buffer, size_t *length, loff_t *ppos)
6513{
6514 int rc;
6515
6516 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
6517 if (rc)
6518 return rc;
6519
6520 if (write)
6521 setup_per_zone_wmarks();
6522
6523 return 0;
6524}
6525
6220#ifdef CONFIG_NUMA 6526#ifdef CONFIG_NUMA
6221int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, 6527int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
6222 void __user *buffer, size_t *length, loff_t *ppos) 6528 void __user *buffer, size_t *length, loff_t *ppos)
@@ -6408,11 +6714,8 @@ void *__init alloc_large_system_hash(const char *tablename,
6408 if (!table) 6714 if (!table)
6409 panic("Failed to allocate %s hash table\n", tablename); 6715 panic("Failed to allocate %s hash table\n", tablename);
6410 6716
6411 printk(KERN_INFO "%s hash table entries: %ld (order: %d, %lu bytes)\n", 6717 pr_info("%s hash table entries: %ld (order: %d, %lu bytes)\n",
6412 tablename, 6718 tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size);
6413 (1UL << log2qty),
6414 ilog2(size) - PAGE_SHIFT,
6415 size);
6416 6719
6417 if (_hash_shift) 6720 if (_hash_shift)
6418 *_hash_shift = log2qty; 6721 *_hash_shift = log2qty;
@@ -6563,7 +6866,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
6563 * This check already skips compound tails of THP 6866 * This check already skips compound tails of THP
6564 * because their page->_count is zero at all time. 6867 * because their page->_count is zero at all time.
6565 */ 6868 */
6566 if (!atomic_read(&page->_count)) { 6869 if (!page_ref_count(page)) {
6567 if (PageBuddy(page)) 6870 if (PageBuddy(page))
6568 iter += (1 << page_order(page)) - 1; 6871 iter += (1 << page_order(page)) - 1;
6569 continue; 6872 continue;
@@ -6913,8 +7216,8 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
6913 BUG_ON(!PageBuddy(page)); 7216 BUG_ON(!PageBuddy(page));
6914 order = page_order(page); 7217 order = page_order(page);
6915#ifdef CONFIG_DEBUG_VM 7218#ifdef CONFIG_DEBUG_VM
6916 printk(KERN_INFO "remove from free list %lx %d %lx\n", 7219 pr_info("remove from free list %lx %d %lx\n",
6917 pfn, 1 << order, end_pfn); 7220 pfn, 1 << order, end_pfn);
6918#endif 7221#endif
6919 list_del(&page->lru); 7222 list_del(&page->lru);
6920 rmv_page_order(page); 7223 rmv_page_order(page);
@@ -6927,7 +7230,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
6927} 7230}
6928#endif 7231#endif
6929 7232
6930#ifdef CONFIG_MEMORY_FAILURE
6931bool is_free_buddy_page(struct page *page) 7233bool is_free_buddy_page(struct page *page)
6932{ 7234{
6933 struct zone *zone = page_zone(page); 7235 struct zone *zone = page_zone(page);
@@ -6946,4 +7248,3 @@ bool is_free_buddy_page(struct page *page)
6946 7248
6947 return order < MAX_ORDER; 7249 return order < MAX_ORDER;
6948} 7250}
6949#endif