summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2015-06-30 17:57:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-30 22:44:56 -0400
commit3a80a7fa7989fbb6aa56bb6ad31811b62cf99e60 (patch)
treecfcd9fb093f891b4218618d63347cfa63be7e38f /mm
parent75a592a47129dcfc1aec40e7d3cdf239a767d441 (diff)
mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set
This patch initalises all low memory struct pages and 2G of the highest zone on each node during memory initialisation if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set. That config option cannot be set but will be available in a later patch. Parallel initialisation of struct page depends on some features from memory hotplug and it is necessary to alter alter section annotations. Signed-off-by: Mel Gorman <mgorman@suse.de> Tested-by: Nate Zimmer <nzimmer@sgi.com> Tested-by: Waiman Long <waiman.long@hp.com> Tested-by: Daniel J Blueman <daniel@numascale.com> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Robin Holt <robinmholt@gmail.com> Cc: Nate Zimmer <nzimmer@sgi.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Waiman Long <waiman.long@hp.com> Cc: Scott Norton <scott.norton@hp.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig18
-rw-r--r--mm/internal.h18
-rw-r--r--mm/page_alloc.c78
3 files changed, 111 insertions, 3 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index c180af880ed5..e79de2bd12cd 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -636,3 +636,21 @@ config MAX_STACK_SIZE_MB
636 changed to a smaller value in which case that is used. 636 changed to a smaller value in which case that is used.
637 637
638 A sane initial value is 80 MB. 638 A sane initial value is 80 MB.
639
640# For architectures that support deferred memory initialisation
641config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
642 bool
643
644config DEFERRED_STRUCT_PAGE_INIT
645 bool "Defer initialisation of struct pages to kswapd"
646 default n
647 depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
648 depends on MEMORY_HOTPLUG
649 help
650 Ordinarily all struct pages are initialised during early boot in a
651 single thread. On very large machines this can take a considerable
652 amount of time. If this option is set, large machines will bring up
653 a subset of memmap at boot and then initialise the rest in parallel
654 when kswapd starts. This has a potential performance impact on
655 processes running early in the lifetime of the systemm until kswapd
656 finishes the initialisation.
diff --git a/mm/internal.h b/mm/internal.h
index 58e9022e3757..88ac7be741ca 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -387,6 +387,24 @@ static inline void mminit_verify_zonelist(void)
387} 387}
388#endif /* CONFIG_DEBUG_MEMORY_INIT */ 388#endif /* CONFIG_DEBUG_MEMORY_INIT */
389 389
390/*
391 * Deferred struct page initialisation requires init functions that are freed
392 * before kswapd is available. Reuse the memory hotplug section annotation
393 * to mark the required code.
394 *
395 * __defermem_init is code that always exists but is annotated __meminit to
396 * avoid section warnings.
397 * __defer_init code gets marked __meminit when deferring struct page
398 * initialistion but is otherwise in the init section.
399 */
400#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
401#define __defermem_init __meminit
402#define __defer_init __meminit
403#else
404#define __defermem_init
405#define __defer_init __init
406#endif
407
390/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */ 408/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
391#if defined(CONFIG_SPARSEMEM) 409#if defined(CONFIG_SPARSEMEM)
392extern void mminit_validate_memmodel_limits(unsigned long *start_pfn, 410extern void mminit_validate_memmodel_limits(unsigned long *start_pfn,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 12a81870815f..7af45b2e8870 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -235,6 +235,64 @@ EXPORT_SYMBOL(nr_online_nodes);
235 235
236int page_group_by_mobility_disabled __read_mostly; 236int page_group_by_mobility_disabled __read_mostly;
237 237
238#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
239static inline void reset_deferred_meminit(pg_data_t *pgdat)
240{
241 pgdat->first_deferred_pfn = ULONG_MAX;
242}
243
244/* Returns true if the struct page for the pfn is uninitialised */
245static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
246{
247 int nid = early_pfn_to_nid(pfn);
248
249 if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
250 return true;
251
252 return false;
253}
254
255/*
256 * Returns false when the remaining initialisation should be deferred until
257 * later in the boot cycle when it can be parallelised.
258 */
259static inline bool update_defer_init(pg_data_t *pgdat,
260 unsigned long pfn, unsigned long zone_end,
261 unsigned long *nr_initialised)
262{
263 /* Always populate low zones for address-contrained allocations */
264 if (zone_end < pgdat_end_pfn(pgdat))
265 return true;
266
267 /* Initialise at least 2G of the highest zone */
268 (*nr_initialised)++;
269 if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
270 (pfn & (PAGES_PER_SECTION - 1)) == 0) {
271 pgdat->first_deferred_pfn = pfn;
272 return false;
273 }
274
275 return true;
276}
277#else
278static inline void reset_deferred_meminit(pg_data_t *pgdat)
279{
280}
281
282static inline bool early_page_uninitialised(unsigned long pfn)
283{
284 return false;
285}
286
287static inline bool update_defer_init(pg_data_t *pgdat,
288 unsigned long pfn, unsigned long zone_end,
289 unsigned long *nr_initialised)
290{
291 return true;
292}
293#endif
294
295
238void set_pageblock_migratetype(struct page *page, int migratetype) 296void set_pageblock_migratetype(struct page *page, int migratetype)
239{ 297{
240 if (unlikely(page_group_by_mobility_disabled && 298 if (unlikely(page_group_by_mobility_disabled &&
@@ -878,8 +936,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
878 local_irq_restore(flags); 936 local_irq_restore(flags);
879} 937}
880 938
881void __init __free_pages_bootmem(struct page *page, unsigned long pfn, 939static void __defer_init __free_pages_boot_core(struct page *page,
882 unsigned int order) 940 unsigned long pfn, unsigned int order)
883{ 941{
884 unsigned int nr_pages = 1 << order; 942 unsigned int nr_pages = 1 << order;
885 struct page *p = page; 943 struct page *p = page;
@@ -951,6 +1009,14 @@ static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
951#endif 1009#endif
952 1010
953 1011
1012void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
1013 unsigned int order)
1014{
1015 if (early_page_uninitialised(pfn))
1016 return;
1017 return __free_pages_boot_core(page, pfn, order);
1018}
1019
954#ifdef CONFIG_CMA 1020#ifdef CONFIG_CMA
955/* Free whole pageblock and set its migration type to MIGRATE_CMA. */ 1021/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
956void __init init_cma_reserved_pageblock(struct page *page) 1022void __init init_cma_reserved_pageblock(struct page *page)
@@ -4325,14 +4391,16 @@ static void setup_zone_migrate_reserve(struct zone *zone)
4325void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, 4391void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
4326 unsigned long start_pfn, enum memmap_context context) 4392 unsigned long start_pfn, enum memmap_context context)
4327{ 4393{
4394 pg_data_t *pgdat = NODE_DATA(nid);
4328 unsigned long end_pfn = start_pfn + size; 4395 unsigned long end_pfn = start_pfn + size;
4329 unsigned long pfn; 4396 unsigned long pfn;
4330 struct zone *z; 4397 struct zone *z;
4398 unsigned long nr_initialised = 0;
4331 4399
4332 if (highest_memmap_pfn < end_pfn - 1) 4400 if (highest_memmap_pfn < end_pfn - 1)
4333 highest_memmap_pfn = end_pfn - 1; 4401 highest_memmap_pfn = end_pfn - 1;
4334 4402
4335 z = &NODE_DATA(nid)->node_zones[zone]; 4403 z = &pgdat->node_zones[zone];
4336 for (pfn = start_pfn; pfn < end_pfn; pfn++) { 4404 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
4337 /* 4405 /*
4338 * There can be holes in boot-time mem_map[]s 4406 * There can be holes in boot-time mem_map[]s
@@ -4344,6 +4412,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
4344 continue; 4412 continue;
4345 if (!early_pfn_in_nid(pfn, nid)) 4413 if (!early_pfn_in_nid(pfn, nid))
4346 continue; 4414 continue;
4415 if (!update_defer_init(pgdat, pfn, end_pfn,
4416 &nr_initialised))
4417 break;
4347 } 4418 }
4348 __init_single_pfn(pfn, zone, nid); 4419 __init_single_pfn(pfn, zone, nid);
4349 } 4420 }
@@ -5144,6 +5215,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
5144 /* pg_data_t should be reset to zero when it's allocated */ 5215 /* pg_data_t should be reset to zero when it's allocated */
5145 WARN_ON(pgdat->nr_zones || pgdat->classzone_idx); 5216 WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
5146 5217
5218 reset_deferred_meminit(pgdat);
5147 pgdat->node_id = nid; 5219 pgdat->node_id = nid;
5148 pgdat->node_start_pfn = node_start_pfn; 5220 pgdat->node_start_pfn = node_start_pfn;
5149#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 5221#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP