diff options
author | Mel Gorman <mgorman@suse.de> | 2015-06-30 17:57:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-30 22:44:56 -0400 |
commit | 3a80a7fa7989fbb6aa56bb6ad31811b62cf99e60 (patch) | |
tree | cfcd9fb093f891b4218618d63347cfa63be7e38f /mm | |
parent | 75a592a47129dcfc1aec40e7d3cdf239a767d441 (diff) |
mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set
This patch initalises all low memory struct pages and 2G of the highest
zone on each node during memory initialisation if
CONFIG_DEFERRED_STRUCT_PAGE_INIT is set. That config option cannot be set
but will be available in a later patch. Parallel initialisation of struct
page depends on some features from memory hotplug and it is necessary to
alter alter section annotations.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Tested-by: Nate Zimmer <nzimmer@sgi.com>
Tested-by: Waiman Long <waiman.long@hp.com>
Tested-by: Daniel J Blueman <daniel@numascale.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Nate Zimmer <nzimmer@sgi.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 18 | ||||
-rw-r--r-- | mm/internal.h | 18 | ||||
-rw-r--r-- | mm/page_alloc.c | 78 |
3 files changed, 111 insertions, 3 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index c180af880ed5..e79de2bd12cd 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -636,3 +636,21 @@ config MAX_STACK_SIZE_MB | |||
636 | changed to a smaller value in which case that is used. | 636 | changed to a smaller value in which case that is used. |
637 | 637 | ||
638 | A sane initial value is 80 MB. | 638 | A sane initial value is 80 MB. |
639 | |||
640 | # For architectures that support deferred memory initialisation | ||
641 | config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT | ||
642 | bool | ||
643 | |||
644 | config DEFERRED_STRUCT_PAGE_INIT | ||
645 | bool "Defer initialisation of struct pages to kswapd" | ||
646 | default n | ||
647 | depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT | ||
648 | depends on MEMORY_HOTPLUG | ||
649 | help | ||
650 | Ordinarily all struct pages are initialised during early boot in a | ||
651 | single thread. On very large machines this can take a considerable | ||
652 | amount of time. If this option is set, large machines will bring up | ||
653 | a subset of memmap at boot and then initialise the rest in parallel | ||
654 | when kswapd starts. This has a potential performance impact on | ||
655 | processes running early in the lifetime of the systemm until kswapd | ||
656 | finishes the initialisation. | ||
diff --git a/mm/internal.h b/mm/internal.h index 58e9022e3757..88ac7be741ca 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -387,6 +387,24 @@ static inline void mminit_verify_zonelist(void) | |||
387 | } | 387 | } |
388 | #endif /* CONFIG_DEBUG_MEMORY_INIT */ | 388 | #endif /* CONFIG_DEBUG_MEMORY_INIT */ |
389 | 389 | ||
390 | /* | ||
391 | * Deferred struct page initialisation requires init functions that are freed | ||
392 | * before kswapd is available. Reuse the memory hotplug section annotation | ||
393 | * to mark the required code. | ||
394 | * | ||
395 | * __defermem_init is code that always exists but is annotated __meminit to | ||
396 | * avoid section warnings. | ||
397 | * __defer_init code gets marked __meminit when deferring struct page | ||
398 | * initialistion but is otherwise in the init section. | ||
399 | */ | ||
400 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
401 | #define __defermem_init __meminit | ||
402 | #define __defer_init __meminit | ||
403 | #else | ||
404 | #define __defermem_init | ||
405 | #define __defer_init __init | ||
406 | #endif | ||
407 | |||
390 | /* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */ | 408 | /* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */ |
391 | #if defined(CONFIG_SPARSEMEM) | 409 | #if defined(CONFIG_SPARSEMEM) |
392 | extern void mminit_validate_memmodel_limits(unsigned long *start_pfn, | 410 | extern void mminit_validate_memmodel_limits(unsigned long *start_pfn, |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 12a81870815f..7af45b2e8870 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -235,6 +235,64 @@ EXPORT_SYMBOL(nr_online_nodes); | |||
235 | 235 | ||
236 | int page_group_by_mobility_disabled __read_mostly; | 236 | int page_group_by_mobility_disabled __read_mostly; |
237 | 237 | ||
238 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
239 | static inline void reset_deferred_meminit(pg_data_t *pgdat) | ||
240 | { | ||
241 | pgdat->first_deferred_pfn = ULONG_MAX; | ||
242 | } | ||
243 | |||
244 | /* Returns true if the struct page for the pfn is uninitialised */ | ||
245 | static inline bool __defermem_init early_page_uninitialised(unsigned long pfn) | ||
246 | { | ||
247 | int nid = early_pfn_to_nid(pfn); | ||
248 | |||
249 | if (pfn >= NODE_DATA(nid)->first_deferred_pfn) | ||
250 | return true; | ||
251 | |||
252 | return false; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Returns false when the remaining initialisation should be deferred until | ||
257 | * later in the boot cycle when it can be parallelised. | ||
258 | */ | ||
259 | static inline bool update_defer_init(pg_data_t *pgdat, | ||
260 | unsigned long pfn, unsigned long zone_end, | ||
261 | unsigned long *nr_initialised) | ||
262 | { | ||
263 | /* Always populate low zones for address-contrained allocations */ | ||
264 | if (zone_end < pgdat_end_pfn(pgdat)) | ||
265 | return true; | ||
266 | |||
267 | /* Initialise at least 2G of the highest zone */ | ||
268 | (*nr_initialised)++; | ||
269 | if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) && | ||
270 | (pfn & (PAGES_PER_SECTION - 1)) == 0) { | ||
271 | pgdat->first_deferred_pfn = pfn; | ||
272 | return false; | ||
273 | } | ||
274 | |||
275 | return true; | ||
276 | } | ||
277 | #else | ||
278 | static inline void reset_deferred_meminit(pg_data_t *pgdat) | ||
279 | { | ||
280 | } | ||
281 | |||
282 | static inline bool early_page_uninitialised(unsigned long pfn) | ||
283 | { | ||
284 | return false; | ||
285 | } | ||
286 | |||
287 | static inline bool update_defer_init(pg_data_t *pgdat, | ||
288 | unsigned long pfn, unsigned long zone_end, | ||
289 | unsigned long *nr_initialised) | ||
290 | { | ||
291 | return true; | ||
292 | } | ||
293 | #endif | ||
294 | |||
295 | |||
238 | void set_pageblock_migratetype(struct page *page, int migratetype) | 296 | void set_pageblock_migratetype(struct page *page, int migratetype) |
239 | { | 297 | { |
240 | if (unlikely(page_group_by_mobility_disabled && | 298 | if (unlikely(page_group_by_mobility_disabled && |
@@ -878,8 +936,8 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
878 | local_irq_restore(flags); | 936 | local_irq_restore(flags); |
879 | } | 937 | } |
880 | 938 | ||
881 | void __init __free_pages_bootmem(struct page *page, unsigned long pfn, | 939 | static void __defer_init __free_pages_boot_core(struct page *page, |
882 | unsigned int order) | 940 | unsigned long pfn, unsigned int order) |
883 | { | 941 | { |
884 | unsigned int nr_pages = 1 << order; | 942 | unsigned int nr_pages = 1 << order; |
885 | struct page *p = page; | 943 | struct page *p = page; |
@@ -951,6 +1009,14 @@ static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node, | |||
951 | #endif | 1009 | #endif |
952 | 1010 | ||
953 | 1011 | ||
1012 | void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn, | ||
1013 | unsigned int order) | ||
1014 | { | ||
1015 | if (early_page_uninitialised(pfn)) | ||
1016 | return; | ||
1017 | return __free_pages_boot_core(page, pfn, order); | ||
1018 | } | ||
1019 | |||
954 | #ifdef CONFIG_CMA | 1020 | #ifdef CONFIG_CMA |
955 | /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ | 1021 | /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ |
956 | void __init init_cma_reserved_pageblock(struct page *page) | 1022 | void __init init_cma_reserved_pageblock(struct page *page) |
@@ -4325,14 +4391,16 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
4325 | void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | 4391 | void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, |
4326 | unsigned long start_pfn, enum memmap_context context) | 4392 | unsigned long start_pfn, enum memmap_context context) |
4327 | { | 4393 | { |
4394 | pg_data_t *pgdat = NODE_DATA(nid); | ||
4328 | unsigned long end_pfn = start_pfn + size; | 4395 | unsigned long end_pfn = start_pfn + size; |
4329 | unsigned long pfn; | 4396 | unsigned long pfn; |
4330 | struct zone *z; | 4397 | struct zone *z; |
4398 | unsigned long nr_initialised = 0; | ||
4331 | 4399 | ||
4332 | if (highest_memmap_pfn < end_pfn - 1) | 4400 | if (highest_memmap_pfn < end_pfn - 1) |
4333 | highest_memmap_pfn = end_pfn - 1; | 4401 | highest_memmap_pfn = end_pfn - 1; |
4334 | 4402 | ||
4335 | z = &NODE_DATA(nid)->node_zones[zone]; | 4403 | z = &pgdat->node_zones[zone]; |
4336 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | 4404 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { |
4337 | /* | 4405 | /* |
4338 | * There can be holes in boot-time mem_map[]s | 4406 | * There can be holes in boot-time mem_map[]s |
@@ -4344,6 +4412,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
4344 | continue; | 4412 | continue; |
4345 | if (!early_pfn_in_nid(pfn, nid)) | 4413 | if (!early_pfn_in_nid(pfn, nid)) |
4346 | continue; | 4414 | continue; |
4415 | if (!update_defer_init(pgdat, pfn, end_pfn, | ||
4416 | &nr_initialised)) | ||
4417 | break; | ||
4347 | } | 4418 | } |
4348 | __init_single_pfn(pfn, zone, nid); | 4419 | __init_single_pfn(pfn, zone, nid); |
4349 | } | 4420 | } |
@@ -5144,6 +5215,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
5144 | /* pg_data_t should be reset to zero when it's allocated */ | 5215 | /* pg_data_t should be reset to zero when it's allocated */ |
5145 | WARN_ON(pgdat->nr_zones || pgdat->classzone_idx); | 5216 | WARN_ON(pgdat->nr_zones || pgdat->classzone_idx); |
5146 | 5217 | ||
5218 | reset_deferred_meminit(pgdat); | ||
5147 | pgdat->node_id = nid; | 5219 | pgdat->node_id = nid; |
5148 | pgdat->node_start_pfn = node_start_pfn; | 5220 | pgdat->node_start_pfn = node_start_pfn; |
5149 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 5221 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |