diff options
author | Alexander Duyck <alexander.h.duyck@linux.intel.com> | 2019-05-13 20:21:17 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-14 12:47:49 -0400 |
commit | 837566e7e08e3f89444166444836a8a49b9f9322 (patch) | |
tree | 189436a67634ff14180bba2925e1a8ef4fcda877 | |
parent | 56ec43d8b02719402c9fcf984feb52ec2300f8a5 (diff) |
mm: implement new zone specific memblock iterator
Introduce a new iterator for_each_free_mem_pfn_range_in_zone.
This iterator will take care of making sure a given memory range provided
is in fact contained within a zone. It takes are of all the bounds
checking we were doing in deferred_grow_zone, and deferred_init_memmap.
In addition it should help to speed up the search a bit by iterating until
the end of a range is greater than the start of the zone pfn range, and
will exit completely if the start is beyond the end of the zone.
Link: http://lkml.kernel.org/r/20190405221225.12227.22573.stgit@localhost.localdomain
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Reviewed-by: Pavel Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Khalid Aziz <khalid.aziz@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Pavel Tatashin <pavel.tatashin@microsoft.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <yi.z.zhang@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/memblock.h | 25 | ||||
-rw-r--r-- | mm/memblock.c | 64 | ||||
-rw-r--r-- | mm/page_alloc.c | 31 |
3 files changed, 101 insertions, 19 deletions
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 294d5d80e150..f8b78892b977 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h | |||
@@ -240,6 +240,31 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, | |||
240 | i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) | 240 | i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) |
241 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 241 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
242 | 242 | ||
243 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
244 | void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, | ||
245 | unsigned long *out_spfn, | ||
246 | unsigned long *out_epfn); | ||
247 | /** | ||
248 | * for_each_free_mem_range_in_zone - iterate through zone specific free | ||
249 | * memblock areas | ||
250 | * @i: u64 used as loop variable | ||
251 | * @zone: zone in which all of the memory blocks reside | ||
252 | * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL | ||
253 | * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL | ||
254 | * | ||
255 | * Walks over free (memory && !reserved) areas of memblock in a specific | ||
256 | * zone. Available once memblock and an empty zone is initialized. The main | ||
257 | * assumption is that the zone start, end, and pgdat have been associated. | ||
258 | * This way we can use the zone to determine NUMA node, and if a given part | ||
259 | * of the memblock is valid for the zone. | ||
260 | */ | ||
261 | #define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end) \ | ||
262 | for (i = 0, \ | ||
263 | __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end); \ | ||
264 | i != U64_MAX; \ | ||
265 | __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) | ||
266 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | ||
267 | |||
243 | /** | 268 | /** |
244 | * for_each_free_mem_range - iterate through free memblock areas | 269 | * for_each_free_mem_range - iterate through free memblock areas |
245 | * @i: u64 used as loop variable | 270 | * @i: u64 used as loop variable |
diff --git a/mm/memblock.c b/mm/memblock.c index a48f520c2d01..f315eca9f4a1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -1255,6 +1255,70 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, | |||
1255 | return 0; | 1255 | return 0; |
1256 | } | 1256 | } |
1257 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 1257 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
1258 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
1259 | /** | ||
1260 | * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone() | ||
1261 | * | ||
1262 | * @idx: pointer to u64 loop variable | ||
1263 | * @zone: zone in which all of the memory blocks reside | ||
1264 | * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL | ||
1265 | * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL | ||
1266 | * | ||
1267 | * This function is meant to be a zone/pfn specific wrapper for the | ||
1268 | * for_each_mem_range type iterators. Specifically they are used in the | ||
1269 | * deferred memory init routines and as such we were duplicating much of | ||
1270 | * this logic throughout the code. So instead of having it in multiple | ||
1271 | * locations it seemed like it would make more sense to centralize this to | ||
1272 | * one new iterator that does everything they need. | ||
1273 | */ | ||
1274 | void __init_memblock | ||
1275 | __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, | ||
1276 | unsigned long *out_spfn, unsigned long *out_epfn) | ||
1277 | { | ||
1278 | int zone_nid = zone_to_nid(zone); | ||
1279 | phys_addr_t spa, epa; | ||
1280 | int nid; | ||
1281 | |||
1282 | __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, | ||
1283 | &memblock.memory, &memblock.reserved, | ||
1284 | &spa, &epa, &nid); | ||
1285 | |||
1286 | while (*idx != U64_MAX) { | ||
1287 | unsigned long epfn = PFN_DOWN(epa); | ||
1288 | unsigned long spfn = PFN_UP(spa); | ||
1289 | |||
1290 | /* | ||
1291 | * Verify the end is at least past the start of the zone and | ||
1292 | * that we have at least one PFN to initialize. | ||
1293 | */ | ||
1294 | if (zone->zone_start_pfn < epfn && spfn < epfn) { | ||
1295 | /* if we went too far just stop searching */ | ||
1296 | if (zone_end_pfn(zone) <= spfn) { | ||
1297 | *idx = U64_MAX; | ||
1298 | break; | ||
1299 | } | ||
1300 | |||
1301 | if (out_spfn) | ||
1302 | *out_spfn = max(zone->zone_start_pfn, spfn); | ||
1303 | if (out_epfn) | ||
1304 | *out_epfn = min(zone_end_pfn(zone), epfn); | ||
1305 | |||
1306 | return; | ||
1307 | } | ||
1308 | |||
1309 | __next_mem_range(idx, zone_nid, MEMBLOCK_NONE, | ||
1310 | &memblock.memory, &memblock.reserved, | ||
1311 | &spa, &epa, &nid); | ||
1312 | } | ||
1313 | |||
1314 | /* signal end of iteration */ | ||
1315 | if (out_spfn) | ||
1316 | *out_spfn = ULONG_MAX; | ||
1317 | if (out_epfn) | ||
1318 | *out_epfn = 0; | ||
1319 | } | ||
1320 | |||
1321 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | ||
1258 | 1322 | ||
1259 | /** | 1323 | /** |
1260 | * memblock_alloc_range_nid - allocate boot memory block | 1324 | * memblock_alloc_range_nid - allocate boot memory block |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 25b82be438d7..fd42321c02f0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1631,11 +1631,9 @@ static unsigned long __init deferred_init_pages(struct zone *zone, | |||
1631 | static int __init deferred_init_memmap(void *data) | 1631 | static int __init deferred_init_memmap(void *data) |
1632 | { | 1632 | { |
1633 | pg_data_t *pgdat = data; | 1633 | pg_data_t *pgdat = data; |
1634 | int nid = pgdat->node_id; | ||
1635 | unsigned long start = jiffies; | 1634 | unsigned long start = jiffies; |
1636 | unsigned long nr_pages = 0; | 1635 | unsigned long nr_pages = 0; |
1637 | unsigned long spfn, epfn, first_init_pfn, flags; | 1636 | unsigned long spfn, epfn, first_init_pfn, flags; |
1638 | phys_addr_t spa, epa; | ||
1639 | int zid; | 1637 | int zid; |
1640 | struct zone *zone; | 1638 | struct zone *zone; |
1641 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); | 1639 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); |
@@ -1672,14 +1670,12 @@ static int __init deferred_init_memmap(void *data) | |||
1672 | * freeing pages we can access pages that are ahead (computing buddy | 1670 | * freeing pages we can access pages that are ahead (computing buddy |
1673 | * page in __free_one_page()). | 1671 | * page in __free_one_page()). |
1674 | */ | 1672 | */ |
1675 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | 1673 | for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) { |
1676 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | 1674 | spfn = max_t(unsigned long, first_init_pfn, spfn); |
1677 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); | ||
1678 | nr_pages += deferred_init_pages(zone, spfn, epfn); | 1675 | nr_pages += deferred_init_pages(zone, spfn, epfn); |
1679 | } | 1676 | } |
1680 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | 1677 | for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) { |
1681 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | 1678 | spfn = max_t(unsigned long, first_init_pfn, spfn); |
1682 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); | ||
1683 | deferred_free_pages(spfn, epfn); | 1679 | deferred_free_pages(spfn, epfn); |
1684 | } | 1680 | } |
1685 | pgdat_resize_unlock(pgdat, &flags); | 1681 | pgdat_resize_unlock(pgdat, &flags); |
@@ -1687,8 +1683,8 @@ static int __init deferred_init_memmap(void *data) | |||
1687 | /* Sanity check that the next zone really is unpopulated */ | 1683 | /* Sanity check that the next zone really is unpopulated */ |
1688 | WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); | 1684 | WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); |
1689 | 1685 | ||
1690 | pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages, | 1686 | pr_info("node %d initialised, %lu pages in %ums\n", |
1691 | jiffies_to_msecs(jiffies - start)); | 1687 | pgdat->node_id, nr_pages, jiffies_to_msecs(jiffies - start)); |
1692 | 1688 | ||
1693 | pgdat_init_report_one_done(); | 1689 | pgdat_init_report_one_done(); |
1694 | return 0; | 1690 | return 0; |
@@ -1712,13 +1708,11 @@ static int __init deferred_init_memmap(void *data) | |||
1712 | static noinline bool __init | 1708 | static noinline bool __init |
1713 | deferred_grow_zone(struct zone *zone, unsigned int order) | 1709 | deferred_grow_zone(struct zone *zone, unsigned int order) |
1714 | { | 1710 | { |
1715 | int nid = zone_to_nid(zone); | ||
1716 | pg_data_t *pgdat = NODE_DATA(nid); | ||
1717 | unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION); | 1711 | unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION); |
1712 | pg_data_t *pgdat = zone->zone_pgdat; | ||
1718 | unsigned long nr_pages = 0; | 1713 | unsigned long nr_pages = 0; |
1719 | unsigned long first_init_pfn, spfn, epfn, t, flags; | 1714 | unsigned long first_init_pfn, spfn, epfn, t, flags; |
1720 | unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; | 1715 | unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; |
1721 | phys_addr_t spa, epa; | ||
1722 | u64 i; | 1716 | u64 i; |
1723 | 1717 | ||
1724 | /* Only the last zone may have deferred pages */ | 1718 | /* Only the last zone may have deferred pages */ |
@@ -1754,9 +1748,8 @@ deferred_grow_zone(struct zone *zone, unsigned int order) | |||
1754 | return false; | 1748 | return false; |
1755 | } | 1749 | } |
1756 | 1750 | ||
1757 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | 1751 | for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) { |
1758 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | 1752 | spfn = max_t(unsigned long, first_init_pfn, spfn); |
1759 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); | ||
1760 | 1753 | ||
1761 | while (spfn < epfn && nr_pages < nr_pages_needed) { | 1754 | while (spfn < epfn && nr_pages < nr_pages_needed) { |
1762 | t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION); | 1755 | t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION); |
@@ -1770,9 +1763,9 @@ deferred_grow_zone(struct zone *zone, unsigned int order) | |||
1770 | break; | 1763 | break; |
1771 | } | 1764 | } |
1772 | 1765 | ||
1773 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | 1766 | for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) { |
1774 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | 1767 | spfn = max_t(unsigned long, first_init_pfn, spfn); |
1775 | epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa)); | 1768 | epfn = min_t(unsigned long, first_deferred_pfn, epfn); |
1776 | deferred_free_pages(spfn, epfn); | 1769 | deferred_free_pages(spfn, epfn); |
1777 | 1770 | ||
1778 | if (first_deferred_pfn == epfn) | 1771 | if (first_deferred_pfn == epfn) |