aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorPavel Tatashin <pasha.tatashin@oracle.com>2017-11-15 20:38:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 21:21:07 -0500
commitd135e5750205a21a212a19dbb05aeb339e2cbea7 (patch)
treecc96e583458549e9a9385b6e3a8557cf0eddca4f /mm/page_alloc.c
parent400e22499dd92613821374c8c6c88c7225359980 (diff)
mm/page_alloc.c: broken deferred calculation
In reset_deferred_meminit() we determine number of pages that must not be deferred. We initialize pages for at least 2G of memory, but also pages for reserved memory in this node. The reserved memory is determined in this function: memblock_reserved_memory_within(), which operates over physical addresses, and returns size in bytes. However, reset_deferred_meminit() assumes that that this function operates with pfns, and returns page count. The result is that in the best case machine boots slower than expected due to initializing more pages than needed in single thread, and in the worst case panics because fewer than needed pages are initialized early. Link: http://lkml.kernel.org/r/20171021011707.15191-1-pasha.tatashin@oracle.com Fixes: 864b9a393dcb ("mm: consider memblock reservations for deferred memory initialization sizing") Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c27
1 files changed, 18 insertions, 9 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bd1a686e40fe..8f2b9ad2e23f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -291,28 +291,37 @@ EXPORT_SYMBOL(nr_online_nodes);
291int page_group_by_mobility_disabled __read_mostly; 291int page_group_by_mobility_disabled __read_mostly;
292 292
293#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 293#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
294
295/*
296 * Determine how many pages need to be initialized durig early boot
297 * (non-deferred initialization).
298 * The value of first_deferred_pfn will be set later, once non-deferred pages
299 * are initialized, but for now set it ULONG_MAX.
300 */
294static inline void reset_deferred_meminit(pg_data_t *pgdat) 301static inline void reset_deferred_meminit(pg_data_t *pgdat)
295{ 302{
296 unsigned long max_initialise; 303 phys_addr_t start_addr, end_addr;
297 unsigned long reserved_lowmem; 304 unsigned long max_pgcnt;
305 unsigned long reserved;
298 306
299 /* 307 /*
300 * Initialise at least 2G of a node but also take into account that 308 * Initialise at least 2G of a node but also take into account that
301 * two large system hashes that can take up 1GB for 0.25TB/node. 309 * two large system hashes that can take up 1GB for 0.25TB/node.
302 */ 310 */
303 max_initialise = max(2UL << (30 - PAGE_SHIFT), 311 max_pgcnt = max(2UL << (30 - PAGE_SHIFT),
304 (pgdat->node_spanned_pages >> 8)); 312 (pgdat->node_spanned_pages >> 8));
305 313
306 /* 314 /*
307 * Compensate the all the memblock reservations (e.g. crash kernel) 315 * Compensate the all the memblock reservations (e.g. crash kernel)
308 * from the initial estimation to make sure we will initialize enough 316 * from the initial estimation to make sure we will initialize enough
309 * memory to boot. 317 * memory to boot.
310 */ 318 */
311 reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn, 319 start_addr = PFN_PHYS(pgdat->node_start_pfn);
312 pgdat->node_start_pfn + max_initialise); 320 end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt);
313 max_initialise += reserved_lowmem; 321 reserved = memblock_reserved_memory_within(start_addr, end_addr);
322 max_pgcnt += PHYS_PFN(reserved);
314 323
315 pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages); 324 pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages);
316 pgdat->first_deferred_pfn = ULONG_MAX; 325 pgdat->first_deferred_pfn = ULONG_MAX;
317} 326}
318 327
@@ -339,7 +348,7 @@ static inline bool update_defer_init(pg_data_t *pgdat,
339 if (zone_end < pgdat_end_pfn(pgdat)) 348 if (zone_end < pgdat_end_pfn(pgdat))
340 return true; 349 return true;
341 (*nr_initialised)++; 350 (*nr_initialised)++;
342 if ((*nr_initialised > pgdat->static_init_size) && 351 if ((*nr_initialised > pgdat->static_init_pgcnt) &&
343 (pfn & (PAGES_PER_SECTION - 1)) == 0) { 352 (pfn & (PAGES_PER_SECTION - 1)) == 0) {
344 pgdat->first_deferred_pfn = pfn; 353 pgdat->first_deferred_pfn = pfn;
345 return false; 354 return false;