summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorPavel Tatashin <pasha.tatashin@oracle.com>2018-01-31 19:16:30 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 20:18:36 -0500
commit80b1f41c0957a9da3bab4fb9ae76dc886753a59b (patch)
tree4c2889b2809d41826aca835fb3c0225b97cce14d /mm/page_alloc.c
parent9092c71bb724dba2ecba849eae69e5c9d39bd3d2 (diff)
mm: split deferred_init_range into initializing and freeing parts
In deferred_init_range() we initialize struct pages, and also free them to buddy allocator. We do it in separate loops, because buddy page is computed ahead, so we do not want to access a struct page that has not been initialized yet. There is still, however, a corner case where it is potentially possible to access uninitialized struct page: this is when buddy page is from the next memblock range. This patch fixes this problem by splitting deferred_init_range() into two functions: one to initialize struct pages, and another to free them. In addition, this patch brings the following improvements: - Get rid of __def_free() helper function. And simplifies loop logic by adding a new pfn validity check function: deferred_pfn_valid(). - Reduces number of variables that we track. So, there is a higher chance that we will avoid using stack to store/load variables inside hot loops. - Enables future multi-threading of these functions: do initialization in multiple threads, wait for all threads to finish, do freeing part in multithreading. Tested on x86 with 1T of memory to make sure no regressions are introduced. [akpm@linux-foundation.org: fix spello in comment] Link: http://lkml.kernel.org/r/20171107150446.32055-2-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Steven Sistare <steven.sistare@oracle.com> Cc: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c146
1 files changed, 76 insertions, 70 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 76c9688b6a0a..a73cffe287a5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1457,92 +1457,87 @@ static inline void __init pgdat_init_report_one_done(void)
1457} 1457}
1458 1458
1459/* 1459/*
1460 * Helper for deferred_init_range, free the given range, reset the counters, and 1460 * Returns true if page needs to be initialized or freed to buddy allocator.
1461 * return number of pages freed. 1461 *
1462 * First we check if pfn is valid on architectures where it is possible to have
1463 * holes within pageblock_nr_pages. On systems where it is not possible, this
1464 * function is optimized out.
1465 *
1466 * Then, we check if a current large page is valid by only checking the validity
1467 * of the head pfn.
1468 *
1469 * Finally, meminit_pfn_in_nid is checked on systems where pfns can interleave
1470 * within a node: a pfn is between start and end of a node, but does not belong
1471 * to this memory node.
1462 */ 1472 */
1463static inline unsigned long __init __def_free(unsigned long *nr_free, 1473static inline bool __init
1464 unsigned long *free_base_pfn, 1474deferred_pfn_valid(int nid, unsigned long pfn,
1465 struct page **page) 1475 struct mminit_pfnnid_cache *nid_init_state)
1466{ 1476{
1467 unsigned long nr = *nr_free; 1477 if (!pfn_valid_within(pfn))
1478 return false;
1479 if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn))
1480 return false;
1481 if (!meminit_pfn_in_nid(pfn, nid, nid_init_state))
1482 return false;
1483 return true;
1484}
1468 1485
1469 deferred_free_range(*free_base_pfn, nr); 1486/*
1470 *free_base_pfn = 0; 1487 * Free pages to buddy allocator. Try to free aligned pages in
1471 *nr_free = 0; 1488 * pageblock_nr_pages sizes.
1472 *page = NULL; 1489 */
1490static void __init deferred_free_pages(int nid, int zid, unsigned long pfn,
1491 unsigned long end_pfn)
1492{
1493 struct mminit_pfnnid_cache nid_init_state = { };
1494 unsigned long nr_pgmask = pageblock_nr_pages - 1;
1495 unsigned long nr_free = 0;
1473 1496
1474 return nr; 1497 for (; pfn < end_pfn; pfn++) {
1498 if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) {
1499 deferred_free_range(pfn - nr_free, nr_free);
1500 nr_free = 0;
1501 } else if (!(pfn & nr_pgmask)) {
1502 deferred_free_range(pfn - nr_free, nr_free);
1503 nr_free = 1;
1504 cond_resched();
1505 } else {
1506 nr_free++;
1507 }
1508 }
1509 /* Free the last block of pages to allocator */
1510 deferred_free_range(pfn - nr_free, nr_free);
1475} 1511}
1476 1512
1477static unsigned long __init deferred_init_range(int nid, int zid, 1513/*
1478 unsigned long start_pfn, 1514 * Initialize struct pages. We minimize pfn page lookups and scheduler checks
1479 unsigned long end_pfn) 1515 * by performing it only once every pageblock_nr_pages.
1516 * Return number of pages initialized.
1517 */
1518static unsigned long __init deferred_init_pages(int nid, int zid,
1519 unsigned long pfn,
1520 unsigned long end_pfn)
1480{ 1521{
1481 struct mminit_pfnnid_cache nid_init_state = { }; 1522 struct mminit_pfnnid_cache nid_init_state = { };
1482 unsigned long nr_pgmask = pageblock_nr_pages - 1; 1523 unsigned long nr_pgmask = pageblock_nr_pages - 1;
1483 unsigned long free_base_pfn = 0;
1484 unsigned long nr_pages = 0; 1524 unsigned long nr_pages = 0;
1485 unsigned long nr_free = 0;
1486 struct page *page = NULL; 1525 struct page *page = NULL;
1487 unsigned long pfn;
1488 1526
1489 /* 1527 for (; pfn < end_pfn; pfn++) {
1490 * First we check if pfn is valid on architectures where it is possible 1528 if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) {
1491 * to have holes within pageblock_nr_pages. On systems where it is not 1529 page = NULL;
1492 * possible, this function is optimized out.
1493 *
1494 * Then, we check if a current large page is valid by only checking the
1495 * validity of the head pfn.
1496 *
1497 * meminit_pfn_in_nid is checked on systems where pfns can interleave
1498 * within a node: a pfn is between start and end of a node, but does not
1499 * belong to this memory node.
1500 *
1501 * Finally, we minimize pfn page lookups and scheduler checks by
1502 * performing it only once every pageblock_nr_pages.
1503 *
1504 * We do it in two loops: first we initialize struct page, than free to
1505 * buddy allocator, becuse while we are freeing pages we can access
1506 * pages that are ahead (computing buddy page in __free_one_page()).
1507 */
1508 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1509 if (!pfn_valid_within(pfn))
1510 continue; 1530 continue;
1511 if ((pfn & nr_pgmask) || pfn_valid(pfn)) { 1531 } else if (!page || !(pfn & nr_pgmask)) {
1512 if (meminit_pfn_in_nid(pfn, nid, &nid_init_state)) {
1513 if (page && (pfn & nr_pgmask))
1514 page++;
1515 else
1516 page = pfn_to_page(pfn);
1517 __init_single_page(page, pfn, zid, nid);
1518 cond_resched();
1519 }
1520 }
1521 }
1522
1523 page = NULL;
1524 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
1525 if (!pfn_valid_within(pfn)) {
1526 nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
1527 } else if (!(pfn & nr_pgmask) && !pfn_valid(pfn)) {
1528 nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
1529 } else if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) {
1530 nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
1531 } else if (page && (pfn & nr_pgmask)) {
1532 page++;
1533 nr_free++;
1534 } else {
1535 nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
1536 page = pfn_to_page(pfn); 1532 page = pfn_to_page(pfn);
1537 free_base_pfn = pfn;
1538 nr_free = 1;
1539 cond_resched(); 1533 cond_resched();
1534 } else {
1535 page++;
1540 } 1536 }
1537 __init_single_page(page, pfn, zid, nid);
1538 nr_pages++;
1541 } 1539 }
1542 /* Free the last block of pages to allocator */ 1540 return (nr_pages);
1543 nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
1544
1545 return nr_pages;
1546} 1541}
1547 1542
1548/* Initialise remaining memory on a node */ 1543/* Initialise remaining memory on a node */
@@ -1582,10 +1577,21 @@ static int __init deferred_init_memmap(void *data)
1582 } 1577 }
1583 first_init_pfn = max(zone->zone_start_pfn, first_init_pfn); 1578 first_init_pfn = max(zone->zone_start_pfn, first_init_pfn);
1584 1579
1580 /*
1581 * Initialize and free pages. We do it in two loops: first we initialize
1582 * struct page, than free to buddy allocator, because while we are
1583 * freeing pages we can access pages that are ahead (computing buddy
1584 * page in __free_one_page()).
1585 */
1586 for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
1587 spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
1588 epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
1589 nr_pages += deferred_init_pages(nid, zid, spfn, epfn);
1590 }
1585 for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { 1591 for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
1586 spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); 1592 spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
1587 epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); 1593 epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
1588 nr_pages += deferred_init_range(nid, zid, spfn, epfn); 1594 deferred_free_pages(nid, zid, spfn, epfn);
1589 } 1595 }
1590 1596
1591 /* Sanity check that the next zone really is unpopulated */ 1597 /* Sanity check that the next zone really is unpopulated */