summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/internal.h6
-rw-r--r--mm/mm_init.c1
-rw-r--r--mm/page_alloc.c123
-rw-r--r--mm/vmscan.c6
4 files changed, 130 insertions, 6 deletions
diff --git a/mm/internal.h b/mm/internal.h
index 88ac7be741ca..71d160437205 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -400,9 +400,15 @@ static inline void mminit_verify_zonelist(void)
400#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 400#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
401#define __defermem_init __meminit 401#define __defermem_init __meminit
402#define __defer_init __meminit 402#define __defer_init __meminit
403
404void deferred_init_memmap(int nid);
403#else 405#else
404#define __defermem_init 406#define __defermem_init
405#define __defer_init __init 407#define __defer_init __init
408
409static inline void deferred_init_memmap(int nid)
410{
411}
406#endif 412#endif
407 413
408/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */ 414/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5f420f7fafa1..28fbf87b20aa 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -11,6 +11,7 @@
11#include <linux/export.h> 11#include <linux/export.h>
12#include <linux/memory.h> 12#include <linux/memory.h>
13#include <linux/notifier.h> 13#include <linux/notifier.h>
14#include <linux/sched.h>
14#include "internal.h" 15#include "internal.h"
15 16
16#ifdef CONFIG_DEBUG_MEMORY_INIT 17#ifdef CONFIG_DEBUG_MEMORY_INIT
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7af45b2e8870..c30f5a0535fd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -252,6 +252,14 @@ static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
252 return false; 252 return false;
253} 253}
254 254
255static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
256{
257 if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
258 return true;
259
260 return false;
261}
262
255/* 263/*
256 * Returns false when the remaining initialisation should be deferred until 264 * Returns false when the remaining initialisation should be deferred until
257 * later in the boot cycle when it can be parallelised. 265 * later in the boot cycle when it can be parallelised.
@@ -284,6 +292,11 @@ static inline bool early_page_uninitialised(unsigned long pfn)
284 return false; 292 return false;
285} 293}
286 294
295static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
296{
297 return false;
298}
299
287static inline bool update_defer_init(pg_data_t *pgdat, 300static inline bool update_defer_init(pg_data_t *pgdat,
288 unsigned long pfn, unsigned long zone_end, 301 unsigned long pfn, unsigned long zone_end,
289 unsigned long *nr_initialised) 302 unsigned long *nr_initialised)
@@ -866,20 +879,51 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
866 return __init_single_page(pfn_to_page(pfn), pfn, zone, nid); 879 return __init_single_page(pfn_to_page(pfn), pfn, zone, nid);
867} 880}
868 881
882#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
883static void init_reserved_page(unsigned long pfn)
884{
885 pg_data_t *pgdat;
886 int nid, zid;
887
888 if (!early_page_uninitialised(pfn))
889 return;
890
891 nid = early_pfn_to_nid(pfn);
892 pgdat = NODE_DATA(nid);
893
894 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
895 struct zone *zone = &pgdat->node_zones[zid];
896
897 if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
898 break;
899 }
900 __init_single_pfn(pfn, zid, nid);
901}
902#else
903static inline void init_reserved_page(unsigned long pfn)
904{
905}
906#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
907
869/* 908/*
870 * Initialised pages do not have PageReserved set. This function is 909 * Initialised pages do not have PageReserved set. This function is
871 * called for each range allocated by the bootmem allocator and 910 * called for each range allocated by the bootmem allocator and
872 * marks the pages PageReserved. The remaining valid pages are later 911 * marks the pages PageReserved. The remaining valid pages are later
873 * sent to the buddy page allocator. 912 * sent to the buddy page allocator.
874 */ 913 */
875void reserve_bootmem_region(unsigned long start, unsigned long end) 914void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
876{ 915{
877 unsigned long start_pfn = PFN_DOWN(start); 916 unsigned long start_pfn = PFN_DOWN(start);
878 unsigned long end_pfn = PFN_UP(end); 917 unsigned long end_pfn = PFN_UP(end);
879 918
880 for (; start_pfn < end_pfn; start_pfn++) 919 for (; start_pfn < end_pfn; start_pfn++) {
881 if (pfn_valid(start_pfn)) 920 if (pfn_valid(start_pfn)) {
882 SetPageReserved(pfn_to_page(start_pfn)); 921 struct page *page = pfn_to_page(start_pfn);
922
923 init_reserved_page(start_pfn);
924 SetPageReserved(page);
925 }
926 }
883} 927}
884 928
885static bool free_pages_prepare(struct page *page, unsigned int order) 929static bool free_pages_prepare(struct page *page, unsigned int order)
@@ -1017,6 +1061,74 @@ void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
1017 return __free_pages_boot_core(page, pfn, order); 1061 return __free_pages_boot_core(page, pfn, order);
1018} 1062}
1019 1063
1064#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1065/* Initialise remaining memory on a node */
1066void __defermem_init deferred_init_memmap(int nid)
1067{
1068 struct mminit_pfnnid_cache nid_init_state = { };
1069 unsigned long start = jiffies;
1070 unsigned long nr_pages = 0;
1071 unsigned long walk_start, walk_end;
1072 int i, zid;
1073 struct zone *zone;
1074 pg_data_t *pgdat = NODE_DATA(nid);
1075 unsigned long first_init_pfn = pgdat->first_deferred_pfn;
1076
1077 if (first_init_pfn == ULONG_MAX)
1078 return;
1079
1080 /* Sanity check boundaries */
1081 BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
1082 BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
1083 pgdat->first_deferred_pfn = ULONG_MAX;
1084
1085 /* Only the highest zone is deferred so find it */
1086 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1087 zone = pgdat->node_zones + zid;
1088 if (first_init_pfn < zone_end_pfn(zone))
1089 break;
1090 }
1091
1092 for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) {
1093 unsigned long pfn, end_pfn;
1094
1095 end_pfn = min(walk_end, zone_end_pfn(zone));
1096 pfn = first_init_pfn;
1097 if (pfn < walk_start)
1098 pfn = walk_start;
1099 if (pfn < zone->zone_start_pfn)
1100 pfn = zone->zone_start_pfn;
1101
1102 for (; pfn < end_pfn; pfn++) {
1103 struct page *page;
1104
1105 if (!pfn_valid(pfn))
1106 continue;
1107
1108 if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state))
1109 continue;
1110
1111 if (page->flags) {
1112 VM_BUG_ON(page_zone(page) != zone);
1113 continue;
1114 }
1115
1116 __init_single_page(page, pfn, zid, nid);
1117 __free_pages_boot_core(page, pfn, 0);
1118 nr_pages++;
1119 cond_resched();
1120 }
1121 first_init_pfn = max(end_pfn, first_init_pfn);
1122 }
1123
1124 /* Sanity check that the next zone really is unpopulated */
1125 WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
1126
1127 pr_info("kswapd %d initialised %lu pages in %ums\n", nid, nr_pages,
1128 jiffies_to_msecs(jiffies - start));
1129}
1130#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1131
1020#ifdef CONFIG_CMA 1132#ifdef CONFIG_CMA
1021/* Free whole pageblock and set its migration type to MIGRATE_CMA. */ 1133/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
1022void __init init_cma_reserved_pageblock(struct page *page) 1134void __init init_cma_reserved_pageblock(struct page *page)
@@ -4329,6 +4441,9 @@ static void setup_zone_migrate_reserve(struct zone *zone)
4329 zone->nr_migrate_reserve_block = reserve; 4441 zone->nr_migrate_reserve_block = reserve;
4330 4442
4331 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 4443 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
4444 if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
4445 return;
4446
4332 if (!pfn_valid(pfn)) 4447 if (!pfn_valid(pfn))
4333 continue; 4448 continue;
4334 page = pfn_to_page(pfn); 4449 page = pfn_to_page(pfn);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e61445dce04e..f4a487110764 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3386,7 +3386,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
3386 * If there are applications that are active memory-allocators 3386 * If there are applications that are active memory-allocators
3387 * (most normal use), this basically shouldn't matter. 3387 * (most normal use), this basically shouldn't matter.
3388 */ 3388 */
3389static int kswapd(void *p) 3389static int __defermem_init kswapd(void *p)
3390{ 3390{
3391 unsigned long order, new_order; 3391 unsigned long order, new_order;
3392 unsigned balanced_order; 3392 unsigned balanced_order;
@@ -3421,6 +3421,8 @@ static int kswapd(void *p)
3421 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 3421 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
3422 set_freezable(); 3422 set_freezable();
3423 3423
3424 deferred_init_memmap(pgdat->node_id);
3425
3424 order = new_order = 0; 3426 order = new_order = 0;
3425 balanced_order = 0; 3427 balanced_order = 0;
3426 classzone_idx = new_classzone_idx = pgdat->nr_zones - 1; 3428 classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
@@ -3576,7 +3578,7 @@ static int cpu_callback(struct notifier_block *nfb, unsigned long action,
3576 * This kswapd start function will be called by init and node-hot-add. 3578 * This kswapd start function will be called by init and node-hot-add.
3577 * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added. 3579 * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
3578 */ 3580 */
3579int kswapd_run(int nid) 3581int __defermem_init kswapd_run(int nid)
3580{ 3582{
3581 pg_data_t *pgdat = NODE_DATA(nid); 3583 pg_data_t *pgdat = NODE_DATA(nid);
3582 int ret = 0; 3584 int ret = 0;