summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c183
1 files changed, 144 insertions, 39 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cf5555df78bd..3183eb2f579c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -292,40 +292,6 @@ EXPORT_SYMBOL(nr_online_nodes);
292int page_group_by_mobility_disabled __read_mostly; 292int page_group_by_mobility_disabled __read_mostly;
293 293
294#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 294#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
295
296/*
297 * Determine how many pages need to be initialized during early boot
298 * (non-deferred initialization).
299 * The value of first_deferred_pfn will be set later, once non-deferred pages
300 * are initialized, but for now set it ULONG_MAX.
301 */
302static inline void reset_deferred_meminit(pg_data_t *pgdat)
303{
304 phys_addr_t start_addr, end_addr;
305 unsigned long max_pgcnt;
306 unsigned long reserved;
307
308 /*
309 * Initialise at least 2G of a node but also take into account that
310 * two large system hashes that can take up 1GB for 0.25TB/node.
311 */
312 max_pgcnt = max(2UL << (30 - PAGE_SHIFT),
313 (pgdat->node_spanned_pages >> 8));
314
315 /*
316 * Compensate the all the memblock reservations (e.g. crash kernel)
317 * from the initial estimation to make sure we will initialize enough
318 * memory to boot.
319 */
320 start_addr = PFN_PHYS(pgdat->node_start_pfn);
321 end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt);
322 reserved = memblock_reserved_memory_within(start_addr, end_addr);
323 max_pgcnt += PHYS_PFN(reserved);
324
325 pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages);
326 pgdat->first_deferred_pfn = ULONG_MAX;
327}
328
329/* Returns true if the struct page for the pfn is uninitialised */ 295/* Returns true if the struct page for the pfn is uninitialised */
330static inline bool __meminit early_page_uninitialised(unsigned long pfn) 296static inline bool __meminit early_page_uninitialised(unsigned long pfn)
331{ 297{
@@ -361,10 +327,6 @@ static inline bool update_defer_init(pg_data_t *pgdat,
361 return true; 327 return true;
362} 328}
363#else 329#else
364static inline void reset_deferred_meminit(pg_data_t *pgdat)
365{
366}
367
368static inline bool early_page_uninitialised(unsigned long pfn) 330static inline bool early_page_uninitialised(unsigned long pfn)
369{ 331{
370 return false; 332 return false;
@@ -1611,6 +1573,117 @@ static int __init deferred_init_memmap(void *data)
1611 pgdat_init_report_one_done(); 1573 pgdat_init_report_one_done();
1612 return 0; 1574 return 0;
1613} 1575}
1576
1577/*
1578 * During boot we initialize deferred pages on-demand, as needed, but once
1579 * page_alloc_init_late() has finished, the deferred pages are all initialized,
1580 * and we can permanently disable that path.
1581 */
1582static DEFINE_STATIC_KEY_TRUE(deferred_pages);
1583
1584/*
1585 * If this zone has deferred pages, try to grow it by initializing enough
1586 * deferred pages to satisfy the allocation specified by order, rounded up to
1587 * the nearest PAGES_PER_SECTION boundary. So we're adding memory in increments
1588 * of SECTION_SIZE bytes by initializing struct pages in increments of
1589 * PAGES_PER_SECTION * sizeof(struct page) bytes.
1590 *
1591 * Return true when zone was grown, otherwise return false. We return true even
1592 * when we grow less than requested, to let the caller decide if there are
1593 * enough pages to satisfy the allocation.
1594 *
1595 * Note: We use noinline because this function is needed only during boot, and
1596 * it is called from a __ref function _deferred_grow_zone. This way we are
1597 * making sure that it is not inlined into permanent text section.
1598 */
1599static noinline bool __init
1600deferred_grow_zone(struct zone *zone, unsigned int order)
1601{
1602 int zid = zone_idx(zone);
1603 int nid = zone_to_nid(zone);
1604 pg_data_t *pgdat = NODE_DATA(nid);
1605 unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
1606 unsigned long nr_pages = 0;
1607 unsigned long first_init_pfn, spfn, epfn, t, flags;
1608 unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
1609 phys_addr_t spa, epa;
1610 u64 i;
1611
1612 /* Only the last zone may have deferred pages */
1613 if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
1614 return false;
1615
1616 pgdat_resize_lock(pgdat, &flags);
1617
1618 /*
1619 * If deferred pages have been initialized while we were waiting for
1620 * the lock, return true, as the zone was grown. The caller will retry
1621 * this zone. We won't return to this function since the caller also
1622 * has this static branch.
1623 */
1624 if (!static_branch_unlikely(&deferred_pages)) {
1625 pgdat_resize_unlock(pgdat, &flags);
1626 return true;
1627 }
1628
1629 /*
1630 * If someone grew this zone while we were waiting for spinlock, return
1631 * true, as there might be enough pages already.
1632 */
1633 if (first_deferred_pfn != pgdat->first_deferred_pfn) {
1634 pgdat_resize_unlock(pgdat, &flags);
1635 return true;
1636 }
1637
1638 first_init_pfn = max(zone->zone_start_pfn, first_deferred_pfn);
1639
1640 if (first_init_pfn >= pgdat_end_pfn(pgdat)) {
1641 pgdat_resize_unlock(pgdat, &flags);
1642 return false;
1643 }
1644
1645 for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
1646 spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
1647 epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
1648
1649 while (spfn < epfn && nr_pages < nr_pages_needed) {
1650 t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION);
1651 first_deferred_pfn = min(t, epfn);
1652 nr_pages += deferred_init_pages(nid, zid, spfn,
1653 first_deferred_pfn);
1654 spfn = first_deferred_pfn;
1655 }
1656
1657 if (nr_pages >= nr_pages_needed)
1658 break;
1659 }
1660
1661 for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
1662 spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
1663 epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa));
1664 deferred_free_pages(nid, zid, spfn, epfn);
1665
1666 if (first_deferred_pfn == epfn)
1667 break;
1668 }
1669 pgdat->first_deferred_pfn = first_deferred_pfn;
1670 pgdat_resize_unlock(pgdat, &flags);
1671
1672 return nr_pages > 0;
1673}
1674
1675/*
1676 * deferred_grow_zone() is __init, but it is called from
1677 * get_page_from_freelist() during early boot until deferred_pages permanently
1678 * disables this call. This is why we have refdata wrapper to avoid warning,
1679 * and to ensure that the function body gets unloaded.
1680 */
1681static bool __ref
1682_deferred_grow_zone(struct zone *zone, unsigned int order)
1683{
1684 return deferred_grow_zone(zone, order);
1685}
1686
1614#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1687#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1615 1688
1616void __init page_alloc_init_late(void) 1689void __init page_alloc_init_late(void)
@@ -1629,6 +1702,12 @@ void __init page_alloc_init_late(void)
1629 /* Block until all are initialised */ 1702 /* Block until all are initialised */
1630 wait_for_completion(&pgdat_init_all_done_comp); 1703 wait_for_completion(&pgdat_init_all_done_comp);
1631 1704
1705 /*
1706 * We initialized the rest of the deferred pages. Permanently disable
1707 * on-demand struct page initialization.
1708 */
1709 static_branch_disable(&deferred_pages);
1710
1632 /* Reinit limits that are based on free pages after the kernel is up */ 1711 /* Reinit limits that are based on free pages after the kernel is up */
1633 files_maxfiles_init(); 1712 files_maxfiles_init();
1634#endif 1713#endif
@@ -3208,6 +3287,16 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
3208 ac_classzone_idx(ac), alloc_flags)) { 3287 ac_classzone_idx(ac), alloc_flags)) {
3209 int ret; 3288 int ret;
3210 3289
3290#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
3291 /*
3292 * Watermark failed for this zone, but see if we can
3293 * grow this zone if it contains deferred pages.
3294 */
3295 if (static_branch_unlikely(&deferred_pages)) {
3296 if (_deferred_grow_zone(zone, order))
3297 goto try_this_zone;
3298 }
3299#endif
3211 /* Checked here to keep the fast path fast */ 3300 /* Checked here to keep the fast path fast */
3212 BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); 3301 BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
3213 if (alloc_flags & ALLOC_NO_WATERMARKS) 3302 if (alloc_flags & ALLOC_NO_WATERMARKS)
@@ -3249,6 +3338,14 @@ try_this_zone:
3249 reserve_highatomic_pageblock(page, zone, order); 3338 reserve_highatomic_pageblock(page, zone, order);
3250 3339
3251 return page; 3340 return page;
3341 } else {
3342#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
3343 /* Try again if zone has deferred pages */
3344 if (static_branch_unlikely(&deferred_pages)) {
3345 if (_deferred_grow_zone(zone, order))
3346 goto try_this_zone;
3347 }
3348#endif
3252 } 3349 }
3253 } 3350 }
3254 3351
@@ -6244,7 +6341,15 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
6244 6341
6245 alloc_node_mem_map(pgdat); 6342 alloc_node_mem_map(pgdat);
6246 6343
6247 reset_deferred_meminit(pgdat); 6344#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
6345 /*
6346 * We start only with one section of pages, more pages are added as
6347 * needed until the rest of deferred pages are initialized.
6348 */
6349 pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
6350 pgdat->node_spanned_pages);
6351 pgdat->first_deferred_pfn = ULONG_MAX;
6352#endif
6248 free_area_init_core(pgdat); 6353 free_area_init_core(pgdat);
6249} 6354}
6250 6355