summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@linux.intel.com>2019-05-13 20:21:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-14 12:47:49 -0400
commit837566e7e08e3f89444166444836a8a49b9f9322 (patch)
tree189436a67634ff14180bba2925e1a8ef4fcda877
parent56ec43d8b02719402c9fcf984feb52ec2300f8a5 (diff)
mm: implement new zone specific memblock iterator
Introduce a new iterator for_each_free_mem_pfn_range_in_zone. This iterator will take care of making sure a given memory range provided is in fact contained within a zone. It takes are of all the bounds checking we were doing in deferred_grow_zone, and deferred_init_memmap. In addition it should help to speed up the search a bit by iterating until the end of a range is greater than the start of the zone pfn range, and will exit completely if the start is beyond the end of the zone. Link: http://lkml.kernel.org/r/20190405221225.12227.22573.stgit@localhost.localdomain Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com> Reviewed-by: Pavel Tatashin <pasha.tatashin@soleen.com> Reviewed-by: Mike Rapoport <rppt@linux.ibm.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: David S. Miller <davem@davemloft.net> Cc: Ingo Molnar <mingo@kernel.org> Cc: Khalid Aziz <khalid.aziz@oracle.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Pavel Tatashin <pavel.tatashin@microsoft.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: <yi.z.zhang@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memblock.h25
-rw-r--r--mm/memblock.c64
-rw-r--r--mm/page_alloc.c31
3 files changed, 101 insertions, 19 deletions
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 294d5d80e150..f8b78892b977 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -240,6 +240,31 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
240 i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid)) 240 i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
241#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 241#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
242 242
243#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
244void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
245 unsigned long *out_spfn,
246 unsigned long *out_epfn);
247/**
248 * for_each_free_mem_range_in_zone - iterate through zone specific free
249 * memblock areas
250 * @i: u64 used as loop variable
251 * @zone: zone in which all of the memory blocks reside
252 * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
253 * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
254 *
255 * Walks over free (memory && !reserved) areas of memblock in a specific
256 * zone. Available once memblock and an empty zone is initialized. The main
257 * assumption is that the zone start, end, and pgdat have been associated.
258 * This way we can use the zone to determine NUMA node, and if a given part
259 * of the memblock is valid for the zone.
260 */
261#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end) \
262 for (i = 0, \
263 __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end); \
264 i != U64_MAX; \
265 __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
266#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
267
243/** 268/**
244 * for_each_free_mem_range - iterate through free memblock areas 269 * for_each_free_mem_range - iterate through free memblock areas
245 * @i: u64 used as loop variable 270 * @i: u64 used as loop variable
diff --git a/mm/memblock.c b/mm/memblock.c
index a48f520c2d01..f315eca9f4a1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1255,6 +1255,70 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
1255 return 0; 1255 return 0;
1256} 1256}
1257#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ 1257#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
1258#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1259/**
1260 * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
1261 *
1262 * @idx: pointer to u64 loop variable
1263 * @zone: zone in which all of the memory blocks reside
1264 * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL
1265 * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL
1266 *
1267 * This function is meant to be a zone/pfn specific wrapper for the
1268 * for_each_mem_range type iterators. Specifically they are used in the
1269 * deferred memory init routines and as such we were duplicating much of
1270 * this logic throughout the code. So instead of having it in multiple
1271 * locations it seemed like it would make more sense to centralize this to
1272 * one new iterator that does everything they need.
1273 */
1274void __init_memblock
1275__next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
1276 unsigned long *out_spfn, unsigned long *out_epfn)
1277{
1278 int zone_nid = zone_to_nid(zone);
1279 phys_addr_t spa, epa;
1280 int nid;
1281
1282 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
1283 &memblock.memory, &memblock.reserved,
1284 &spa, &epa, &nid);
1285
1286 while (*idx != U64_MAX) {
1287 unsigned long epfn = PFN_DOWN(epa);
1288 unsigned long spfn = PFN_UP(spa);
1289
1290 /*
1291 * Verify the end is at least past the start of the zone and
1292 * that we have at least one PFN to initialize.
1293 */
1294 if (zone->zone_start_pfn < epfn && spfn < epfn) {
1295 /* if we went too far just stop searching */
1296 if (zone_end_pfn(zone) <= spfn) {
1297 *idx = U64_MAX;
1298 break;
1299 }
1300
1301 if (out_spfn)
1302 *out_spfn = max(zone->zone_start_pfn, spfn);
1303 if (out_epfn)
1304 *out_epfn = min(zone_end_pfn(zone), epfn);
1305
1306 return;
1307 }
1308
1309 __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
1310 &memblock.memory, &memblock.reserved,
1311 &spa, &epa, &nid);
1312 }
1313
1314 /* signal end of iteration */
1315 if (out_spfn)
1316 *out_spfn = ULONG_MAX;
1317 if (out_epfn)
1318 *out_epfn = 0;
1319}
1320
1321#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1258 1322
1259/** 1323/**
1260 * memblock_alloc_range_nid - allocate boot memory block 1324 * memblock_alloc_range_nid - allocate boot memory block
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 25b82be438d7..fd42321c02f0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1631,11 +1631,9 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
1631static int __init deferred_init_memmap(void *data) 1631static int __init deferred_init_memmap(void *data)
1632{ 1632{
1633 pg_data_t *pgdat = data; 1633 pg_data_t *pgdat = data;
1634 int nid = pgdat->node_id;
1635 unsigned long start = jiffies; 1634 unsigned long start = jiffies;
1636 unsigned long nr_pages = 0; 1635 unsigned long nr_pages = 0;
1637 unsigned long spfn, epfn, first_init_pfn, flags; 1636 unsigned long spfn, epfn, first_init_pfn, flags;
1638 phys_addr_t spa, epa;
1639 int zid; 1637 int zid;
1640 struct zone *zone; 1638 struct zone *zone;
1641 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); 1639 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
@@ -1672,14 +1670,12 @@ static int __init deferred_init_memmap(void *data)
1672 * freeing pages we can access pages that are ahead (computing buddy 1670 * freeing pages we can access pages that are ahead (computing buddy
1673 * page in __free_one_page()). 1671 * page in __free_one_page()).
1674 */ 1672 */
1675 for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { 1673 for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
1676 spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); 1674 spfn = max_t(unsigned long, first_init_pfn, spfn);
1677 epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
1678 nr_pages += deferred_init_pages(zone, spfn, epfn); 1675 nr_pages += deferred_init_pages(zone, spfn, epfn);
1679 } 1676 }
1680 for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { 1677 for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
1681 spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); 1678 spfn = max_t(unsigned long, first_init_pfn, spfn);
1682 epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
1683 deferred_free_pages(spfn, epfn); 1679 deferred_free_pages(spfn, epfn);
1684 } 1680 }
1685 pgdat_resize_unlock(pgdat, &flags); 1681 pgdat_resize_unlock(pgdat, &flags);
@@ -1687,8 +1683,8 @@ static int __init deferred_init_memmap(void *data)
1687 /* Sanity check that the next zone really is unpopulated */ 1683 /* Sanity check that the next zone really is unpopulated */
1688 WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); 1684 WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
1689 1685
1690 pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages, 1686 pr_info("node %d initialised, %lu pages in %ums\n",
1691 jiffies_to_msecs(jiffies - start)); 1687 pgdat->node_id, nr_pages, jiffies_to_msecs(jiffies - start));
1692 1688
1693 pgdat_init_report_one_done(); 1689 pgdat_init_report_one_done();
1694 return 0; 1690 return 0;
@@ -1712,13 +1708,11 @@ static int __init deferred_init_memmap(void *data)
1712static noinline bool __init 1708static noinline bool __init
1713deferred_grow_zone(struct zone *zone, unsigned int order) 1709deferred_grow_zone(struct zone *zone, unsigned int order)
1714{ 1710{
1715 int nid = zone_to_nid(zone);
1716 pg_data_t *pgdat = NODE_DATA(nid);
1717 unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION); 1711 unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
1712 pg_data_t *pgdat = zone->zone_pgdat;
1718 unsigned long nr_pages = 0; 1713 unsigned long nr_pages = 0;
1719 unsigned long first_init_pfn, spfn, epfn, t, flags; 1714 unsigned long first_init_pfn, spfn, epfn, t, flags;
1720 unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; 1715 unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
1721 phys_addr_t spa, epa;
1722 u64 i; 1716 u64 i;
1723 1717
1724 /* Only the last zone may have deferred pages */ 1718 /* Only the last zone may have deferred pages */
@@ -1754,9 +1748,8 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
1754 return false; 1748 return false;
1755 } 1749 }
1756 1750
1757 for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { 1751 for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
1758 spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); 1752 spfn = max_t(unsigned long, first_init_pfn, spfn);
1759 epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
1760 1753
1761 while (spfn < epfn && nr_pages < nr_pages_needed) { 1754 while (spfn < epfn && nr_pages < nr_pages_needed) {
1762 t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION); 1755 t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION);
@@ -1770,9 +1763,9 @@ deferred_grow_zone(struct zone *zone, unsigned int order)
1770 break; 1763 break;
1771 } 1764 }
1772 1765
1773 for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { 1766 for_each_free_mem_pfn_range_in_zone(i, zone, &spfn, &epfn) {
1774 spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); 1767 spfn = max_t(unsigned long, first_init_pfn, spfn);
1775 epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa)); 1768 epfn = min_t(unsigned long, first_deferred_pfn, epfn);
1776 deferred_free_pages(spfn, epfn); 1769 deferred_free_pages(spfn, epfn);
1777 1770
1778 if (first_deferred_pfn == epfn) 1771 if (first_deferred_pfn == epfn)