aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorPavel Tatashin <pasha.tatashin@oracle.com>2018-10-26 18:09:37 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-26 19:26:35 -0400
commitd3035be4ce2345d98633a45f93a74e526e94b802 (patch)
tree4f845e0a87133583d176f3449c1a41e775b09348 /mm/page_alloc.c
parentdfb3ccd00a06d71171961022019bb0f210d2cdc1 (diff)
mm: calculate deferred pages after skipping mirrored memory
update_defer_init() should be called only when struct page is about to be initialized. Because it counts number of initialized struct pages, but there we may skip struct pages if there is some mirrored memory. So move, update_defer_init() after checking for mirrored memory. Also, rename update_defer_init() to defer_init() and reverse the return boolean to emphasize that this is a boolean function, that tells that the reset of memmap initialization should be deferred. Make this function self-contained: do not pass number of already initialized pages in this zone by using static counters. I found this bug by reading the code. The effect is that fewer than expected struct pages are initialized early in boot, and it is possible that in some corner cases we may fail to boot when mirrored pages are used. The deferred on demand code should somewhat mitigate this. But this still brings some inconsistencies compared to when booting without mirrored pages, so it is better to fix. [pasha.tatashin@oracle.com: add comment about defer_init's lack of locking] Link: http://lkml.kernel.org/r/20180726193509.3326-3-pasha.tatashin@oracle.com [akpm@linux-foundation.org: make defer_init non-inline, __meminit] Link: http://lkml.kernel.org/r/20180724235520.10200-3-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com> Cc: Baoquan He <bhe@redhat.com> Cc: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Jérôme Glisse <jglisse@redhat.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Hocko <mhocko@suse.com> Cc: Souptick Joarder <jrdr.linux@gmail.com> Cc: Steven Sistare <steven.sistare@oracle.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c45
1 files changed, 25 insertions, 20 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 94725aea672f..db1ff4ac0cc6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -307,24 +307,33 @@ static inline bool __meminit early_page_uninitialised(unsigned long pfn)
307} 307}
308 308
309/* 309/*
310 * Returns false when the remaining initialisation should be deferred until 310 * Returns true when the remaining initialisation should be deferred until
311 * later in the boot cycle when it can be parallelised. 311 * later in the boot cycle when it can be parallelised.
312 */ 312 */
313static inline bool update_defer_init(pg_data_t *pgdat, 313static bool __meminit
314 unsigned long pfn, unsigned long zone_end, 314defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
315 unsigned long *nr_initialised)
316{ 315{
316 static unsigned long prev_end_pfn, nr_initialised;
317
318 /*
319 * prev_end_pfn static that contains the end of previous zone
320 * No need to protect because called very early in boot before smp_init.
321 */
322 if (prev_end_pfn != end_pfn) {
323 prev_end_pfn = end_pfn;
324 nr_initialised = 0;
325 }
326
317 /* Always populate low zones for address-constrained allocations */ 327 /* Always populate low zones for address-constrained allocations */
318 if (zone_end < pgdat_end_pfn(pgdat)) 328 if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
319 return true;
320 (*nr_initialised)++;
321 if ((*nr_initialised > pgdat->static_init_pgcnt) &&
322 (pfn & (PAGES_PER_SECTION - 1)) == 0) {
323 pgdat->first_deferred_pfn = pfn;
324 return false; 329 return false;
330 nr_initialised++;
331 if ((nr_initialised > NODE_DATA(nid)->static_init_pgcnt) &&
332 (pfn & (PAGES_PER_SECTION - 1)) == 0) {
333 NODE_DATA(nid)->first_deferred_pfn = pfn;
334 return true;
325 } 335 }
326 336 return false;
327 return true;
328} 337}
329#else 338#else
330static inline bool early_page_uninitialised(unsigned long pfn) 339static inline bool early_page_uninitialised(unsigned long pfn)
@@ -332,11 +341,9 @@ static inline bool early_page_uninitialised(unsigned long pfn)
332 return false; 341 return false;
333} 342}
334 343
335static inline bool update_defer_init(pg_data_t *pgdat, 344static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
336 unsigned long pfn, unsigned long zone_end,
337 unsigned long *nr_initialised)
338{ 345{
339 return true; 346 return false;
340} 347}
341#endif 348#endif
342 349
@@ -5453,9 +5460,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
5453 struct vmem_altmap *altmap) 5460 struct vmem_altmap *altmap)
5454{ 5461{
5455 unsigned long end_pfn = start_pfn + size; 5462 unsigned long end_pfn = start_pfn + size;
5456 pg_data_t *pgdat = NODE_DATA(nid);
5457 unsigned long pfn; 5463 unsigned long pfn;
5458 unsigned long nr_initialised = 0;
5459 struct page *page; 5464 struct page *page;
5460#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 5465#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
5461 struct memblock_region *r = NULL, *tmp; 5466 struct memblock_region *r = NULL, *tmp;
@@ -5494,8 +5499,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
5494 continue; 5499 continue;
5495 if (!early_pfn_in_nid(pfn, nid)) 5500 if (!early_pfn_in_nid(pfn, nid))
5496 continue; 5501 continue;
5497 if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
5498 break;
5499 5502
5500#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 5503#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
5501 /* 5504 /*
@@ -5518,6 +5521,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
5518 } 5521 }
5519 } 5522 }
5520#endif 5523#endif
5524 if (defer_init(nid, pfn, end_pfn))
5525 break;
5521 5526
5522not_early: 5527not_early:
5523 page = pfn_to_page(pfn); 5528 page = pfn_to_page(pfn);