diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 183 |
1 files changed, 144 insertions, 39 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cf5555df78bd..3183eb2f579c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -292,40 +292,6 @@ EXPORT_SYMBOL(nr_online_nodes); | |||
292 | int page_group_by_mobility_disabled __read_mostly; | 292 | int page_group_by_mobility_disabled __read_mostly; |
293 | 293 | ||
294 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 294 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
295 | |||
296 | /* | ||
297 | * Determine how many pages need to be initialized during early boot | ||
298 | * (non-deferred initialization). | ||
299 | * The value of first_deferred_pfn will be set later, once non-deferred pages | ||
300 | * are initialized, but for now set it ULONG_MAX. | ||
301 | */ | ||
302 | static inline void reset_deferred_meminit(pg_data_t *pgdat) | ||
303 | { | ||
304 | phys_addr_t start_addr, end_addr; | ||
305 | unsigned long max_pgcnt; | ||
306 | unsigned long reserved; | ||
307 | |||
308 | /* | ||
309 | * Initialise at least 2G of a node but also take into account that | ||
310 | * two large system hashes that can take up 1GB for 0.25TB/node. | ||
311 | */ | ||
312 | max_pgcnt = max(2UL << (30 - PAGE_SHIFT), | ||
313 | (pgdat->node_spanned_pages >> 8)); | ||
314 | |||
315 | /* | ||
316 | * Compensate the all the memblock reservations (e.g. crash kernel) | ||
317 | * from the initial estimation to make sure we will initialize enough | ||
318 | * memory to boot. | ||
319 | */ | ||
320 | start_addr = PFN_PHYS(pgdat->node_start_pfn); | ||
321 | end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt); | ||
322 | reserved = memblock_reserved_memory_within(start_addr, end_addr); | ||
323 | max_pgcnt += PHYS_PFN(reserved); | ||
324 | |||
325 | pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages); | ||
326 | pgdat->first_deferred_pfn = ULONG_MAX; | ||
327 | } | ||
328 | |||
329 | /* Returns true if the struct page for the pfn is uninitialised */ | 295 | /* Returns true if the struct page for the pfn is uninitialised */ |
330 | static inline bool __meminit early_page_uninitialised(unsigned long pfn) | 296 | static inline bool __meminit early_page_uninitialised(unsigned long pfn) |
331 | { | 297 | { |
@@ -361,10 +327,6 @@ static inline bool update_defer_init(pg_data_t *pgdat, | |||
361 | return true; | 327 | return true; |
362 | } | 328 | } |
363 | #else | 329 | #else |
364 | static inline void reset_deferred_meminit(pg_data_t *pgdat) | ||
365 | { | ||
366 | } | ||
367 | |||
368 | static inline bool early_page_uninitialised(unsigned long pfn) | 330 | static inline bool early_page_uninitialised(unsigned long pfn) |
369 | { | 331 | { |
370 | return false; | 332 | return false; |
@@ -1611,6 +1573,117 @@ static int __init deferred_init_memmap(void *data) | |||
1611 | pgdat_init_report_one_done(); | 1573 | pgdat_init_report_one_done(); |
1612 | return 0; | 1574 | return 0; |
1613 | } | 1575 | } |
1576 | |||
1577 | /* | ||
1578 | * During boot we initialize deferred pages on-demand, as needed, but once | ||
1579 | * page_alloc_init_late() has finished, the deferred pages are all initialized, | ||
1580 | * and we can permanently disable that path. | ||
1581 | */ | ||
1582 | static DEFINE_STATIC_KEY_TRUE(deferred_pages); | ||
1583 | |||
1584 | /* | ||
1585 | * If this zone has deferred pages, try to grow it by initializing enough | ||
1586 | * deferred pages to satisfy the allocation specified by order, rounded up to | ||
1587 | * the nearest PAGES_PER_SECTION boundary. So we're adding memory in increments | ||
1588 | * of SECTION_SIZE bytes by initializing struct pages in increments of | ||
1589 | * PAGES_PER_SECTION * sizeof(struct page) bytes. | ||
1590 | * | ||
1591 | * Return true when zone was grown, otherwise return false. We return true even | ||
1592 | * when we grow less than requested, to let the caller decide if there are | ||
1593 | * enough pages to satisfy the allocation. | ||
1594 | * | ||
1595 | * Note: We use noinline because this function is needed only during boot, and | ||
1596 | * it is called from a __ref function _deferred_grow_zone. This way we are | ||
1597 | * making sure that it is not inlined into permanent text section. | ||
1598 | */ | ||
1599 | static noinline bool __init | ||
1600 | deferred_grow_zone(struct zone *zone, unsigned int order) | ||
1601 | { | ||
1602 | int zid = zone_idx(zone); | ||
1603 | int nid = zone_to_nid(zone); | ||
1604 | pg_data_t *pgdat = NODE_DATA(nid); | ||
1605 | unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION); | ||
1606 | unsigned long nr_pages = 0; | ||
1607 | unsigned long first_init_pfn, spfn, epfn, t, flags; | ||
1608 | unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; | ||
1609 | phys_addr_t spa, epa; | ||
1610 | u64 i; | ||
1611 | |||
1612 | /* Only the last zone may have deferred pages */ | ||
1613 | if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat)) | ||
1614 | return false; | ||
1615 | |||
1616 | pgdat_resize_lock(pgdat, &flags); | ||
1617 | |||
1618 | /* | ||
1619 | * If deferred pages have been initialized while we were waiting for | ||
1620 | * the lock, return true, as the zone was grown. The caller will retry | ||
1621 | * this zone. We won't return to this function since the caller also | ||
1622 | * has this static branch. | ||
1623 | */ | ||
1624 | if (!static_branch_unlikely(&deferred_pages)) { | ||
1625 | pgdat_resize_unlock(pgdat, &flags); | ||
1626 | return true; | ||
1627 | } | ||
1628 | |||
1629 | /* | ||
1630 | * If someone grew this zone while we were waiting for spinlock, return | ||
1631 | * true, as there might be enough pages already. | ||
1632 | */ | ||
1633 | if (first_deferred_pfn != pgdat->first_deferred_pfn) { | ||
1634 | pgdat_resize_unlock(pgdat, &flags); | ||
1635 | return true; | ||
1636 | } | ||
1637 | |||
1638 | first_init_pfn = max(zone->zone_start_pfn, first_deferred_pfn); | ||
1639 | |||
1640 | if (first_init_pfn >= pgdat_end_pfn(pgdat)) { | ||
1641 | pgdat_resize_unlock(pgdat, &flags); | ||
1642 | return false; | ||
1643 | } | ||
1644 | |||
1645 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | ||
1646 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | ||
1647 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); | ||
1648 | |||
1649 | while (spfn < epfn && nr_pages < nr_pages_needed) { | ||
1650 | t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION); | ||
1651 | first_deferred_pfn = min(t, epfn); | ||
1652 | nr_pages += deferred_init_pages(nid, zid, spfn, | ||
1653 | first_deferred_pfn); | ||
1654 | spfn = first_deferred_pfn; | ||
1655 | } | ||
1656 | |||
1657 | if (nr_pages >= nr_pages_needed) | ||
1658 | break; | ||
1659 | } | ||
1660 | |||
1661 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | ||
1662 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | ||
1663 | epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa)); | ||
1664 | deferred_free_pages(nid, zid, spfn, epfn); | ||
1665 | |||
1666 | if (first_deferred_pfn == epfn) | ||
1667 | break; | ||
1668 | } | ||
1669 | pgdat->first_deferred_pfn = first_deferred_pfn; | ||
1670 | pgdat_resize_unlock(pgdat, &flags); | ||
1671 | |||
1672 | return nr_pages > 0; | ||
1673 | } | ||
1674 | |||
1675 | /* | ||
1676 | * deferred_grow_zone() is __init, but it is called from | ||
1677 | * get_page_from_freelist() during early boot until deferred_pages permanently | ||
1678 | * disables this call. This is why we have refdata wrapper to avoid warning, | ||
1679 | * and to ensure that the function body gets unloaded. | ||
1680 | */ | ||
1681 | static bool __ref | ||
1682 | _deferred_grow_zone(struct zone *zone, unsigned int order) | ||
1683 | { | ||
1684 | return deferred_grow_zone(zone, order); | ||
1685 | } | ||
1686 | |||
1614 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | 1687 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ |
1615 | 1688 | ||
1616 | void __init page_alloc_init_late(void) | 1689 | void __init page_alloc_init_late(void) |
@@ -1629,6 +1702,12 @@ void __init page_alloc_init_late(void) | |||
1629 | /* Block until all are initialised */ | 1702 | /* Block until all are initialised */ |
1630 | wait_for_completion(&pgdat_init_all_done_comp); | 1703 | wait_for_completion(&pgdat_init_all_done_comp); |
1631 | 1704 | ||
1705 | /* | ||
1706 | * We initialized the rest of the deferred pages. Permanently disable | ||
1707 | * on-demand struct page initialization. | ||
1708 | */ | ||
1709 | static_branch_disable(&deferred_pages); | ||
1710 | |||
1632 | /* Reinit limits that are based on free pages after the kernel is up */ | 1711 | /* Reinit limits that are based on free pages after the kernel is up */ |
1633 | files_maxfiles_init(); | 1712 | files_maxfiles_init(); |
1634 | #endif | 1713 | #endif |
@@ -3208,6 +3287,16 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, | |||
3208 | ac_classzone_idx(ac), alloc_flags)) { | 3287 | ac_classzone_idx(ac), alloc_flags)) { |
3209 | int ret; | 3288 | int ret; |
3210 | 3289 | ||
3290 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
3291 | /* | ||
3292 | * Watermark failed for this zone, but see if we can | ||
3293 | * grow this zone if it contains deferred pages. | ||
3294 | */ | ||
3295 | if (static_branch_unlikely(&deferred_pages)) { | ||
3296 | if (_deferred_grow_zone(zone, order)) | ||
3297 | goto try_this_zone; | ||
3298 | } | ||
3299 | #endif | ||
3211 | /* Checked here to keep the fast path fast */ | 3300 | /* Checked here to keep the fast path fast */ |
3212 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); | 3301 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); |
3213 | if (alloc_flags & ALLOC_NO_WATERMARKS) | 3302 | if (alloc_flags & ALLOC_NO_WATERMARKS) |
@@ -3249,6 +3338,14 @@ try_this_zone: | |||
3249 | reserve_highatomic_pageblock(page, zone, order); | 3338 | reserve_highatomic_pageblock(page, zone, order); |
3250 | 3339 | ||
3251 | return page; | 3340 | return page; |
3341 | } else { | ||
3342 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
3343 | /* Try again if zone has deferred pages */ | ||
3344 | if (static_branch_unlikely(&deferred_pages)) { | ||
3345 | if (_deferred_grow_zone(zone, order)) | ||
3346 | goto try_this_zone; | ||
3347 | } | ||
3348 | #endif | ||
3252 | } | 3349 | } |
3253 | } | 3350 | } |
3254 | 3351 | ||
@@ -6244,7 +6341,15 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
6244 | 6341 | ||
6245 | alloc_node_mem_map(pgdat); | 6342 | alloc_node_mem_map(pgdat); |
6246 | 6343 | ||
6247 | reset_deferred_meminit(pgdat); | 6344 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
6345 | /* | ||
6346 | * We start only with one section of pages, more pages are added as | ||
6347 | * needed until the rest of deferred pages are initialized. | ||
6348 | */ | ||
6349 | pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION, | ||
6350 | pgdat->node_spanned_pages); | ||
6351 | pgdat->first_deferred_pfn = ULONG_MAX; | ||
6352 | #endif | ||
6248 | free_area_init_core(pgdat); | 6353 | free_area_init_core(pgdat); |
6249 | } | 6354 | } |
6250 | 6355 | ||