diff options
author | Pavel Tatashin <pasha.tatashin@oracle.com> | 2018-01-31 19:16:30 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 20:18:36 -0500 |
commit | 80b1f41c0957a9da3bab4fb9ae76dc886753a59b (patch) | |
tree | 4c2889b2809d41826aca835fb3c0225b97cce14d /mm/page_alloc.c | |
parent | 9092c71bb724dba2ecba849eae69e5c9d39bd3d2 (diff) |
mm: split deferred_init_range into initializing and freeing parts
In deferred_init_range() we initialize struct pages, and also free them
to buddy allocator. We do it in separate loops, because buddy page is
computed ahead, so we do not want to access a struct page that has not
been initialized yet.
There is still, however, a corner case where it is potentially possible
to access uninitialized struct page: this is when buddy page is from the
next memblock range.
This patch fixes this problem by splitting deferred_init_range() into
two functions: one to initialize struct pages, and another to free them.
In addition, this patch brings the following improvements:
- Get rid of __def_free() helper function. And simplifies loop logic by
adding a new pfn validity check function: deferred_pfn_valid().
- Reduces number of variables that we track. So, there is a higher
chance that we will avoid using stack to store/load variables inside
hot loops.
- Enables future multi-threading of these functions: do initialization
in multiple threads, wait for all threads to finish, do freeing part
in multithreading.
Tested on x86 with 1T of memory to make sure no regressions are
introduced.
[akpm@linux-foundation.org: fix spello in comment]
Link: http://lkml.kernel.org/r/20171107150446.32055-2-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 146 |
1 files changed, 76 insertions, 70 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 76c9688b6a0a..a73cffe287a5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1457,92 +1457,87 @@ static inline void __init pgdat_init_report_one_done(void) | |||
1457 | } | 1457 | } |
1458 | 1458 | ||
1459 | /* | 1459 | /* |
1460 | * Helper for deferred_init_range, free the given range, reset the counters, and | 1460 | * Returns true if page needs to be initialized or freed to buddy allocator. |
1461 | * return number of pages freed. | 1461 | * |
1462 | * First we check if pfn is valid on architectures where it is possible to have | ||
1463 | * holes within pageblock_nr_pages. On systems where it is not possible, this | ||
1464 | * function is optimized out. | ||
1465 | * | ||
1466 | * Then, we check if a current large page is valid by only checking the validity | ||
1467 | * of the head pfn. | ||
1468 | * | ||
1469 | * Finally, meminit_pfn_in_nid is checked on systems where pfns can interleave | ||
1470 | * within a node: a pfn is between start and end of a node, but does not belong | ||
1471 | * to this memory node. | ||
1462 | */ | 1472 | */ |
1463 | static inline unsigned long __init __def_free(unsigned long *nr_free, | 1473 | static inline bool __init |
1464 | unsigned long *free_base_pfn, | 1474 | deferred_pfn_valid(int nid, unsigned long pfn, |
1465 | struct page **page) | 1475 | struct mminit_pfnnid_cache *nid_init_state) |
1466 | { | 1476 | { |
1467 | unsigned long nr = *nr_free; | 1477 | if (!pfn_valid_within(pfn)) |
1478 | return false; | ||
1479 | if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn)) | ||
1480 | return false; | ||
1481 | if (!meminit_pfn_in_nid(pfn, nid, nid_init_state)) | ||
1482 | return false; | ||
1483 | return true; | ||
1484 | } | ||
1468 | 1485 | ||
1469 | deferred_free_range(*free_base_pfn, nr); | 1486 | /* |
1470 | *free_base_pfn = 0; | 1487 | * Free pages to buddy allocator. Try to free aligned pages in |
1471 | *nr_free = 0; | 1488 | * pageblock_nr_pages sizes. |
1472 | *page = NULL; | 1489 | */ |
1490 | static void __init deferred_free_pages(int nid, int zid, unsigned long pfn, | ||
1491 | unsigned long end_pfn) | ||
1492 | { | ||
1493 | struct mminit_pfnnid_cache nid_init_state = { }; | ||
1494 | unsigned long nr_pgmask = pageblock_nr_pages - 1; | ||
1495 | unsigned long nr_free = 0; | ||
1473 | 1496 | ||
1474 | return nr; | 1497 | for (; pfn < end_pfn; pfn++) { |
1498 | if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) { | ||
1499 | deferred_free_range(pfn - nr_free, nr_free); | ||
1500 | nr_free = 0; | ||
1501 | } else if (!(pfn & nr_pgmask)) { | ||
1502 | deferred_free_range(pfn - nr_free, nr_free); | ||
1503 | nr_free = 1; | ||
1504 | cond_resched(); | ||
1505 | } else { | ||
1506 | nr_free++; | ||
1507 | } | ||
1508 | } | ||
1509 | /* Free the last block of pages to allocator */ | ||
1510 | deferred_free_range(pfn - nr_free, nr_free); | ||
1475 | } | 1511 | } |
1476 | 1512 | ||
1477 | static unsigned long __init deferred_init_range(int nid, int zid, | 1513 | /* |
1478 | unsigned long start_pfn, | 1514 | * Initialize struct pages. We minimize pfn page lookups and scheduler checks |
1479 | unsigned long end_pfn) | 1515 | * by performing it only once every pageblock_nr_pages. |
1516 | * Return number of pages initialized. | ||
1517 | */ | ||
1518 | static unsigned long __init deferred_init_pages(int nid, int zid, | ||
1519 | unsigned long pfn, | ||
1520 | unsigned long end_pfn) | ||
1480 | { | 1521 | { |
1481 | struct mminit_pfnnid_cache nid_init_state = { }; | 1522 | struct mminit_pfnnid_cache nid_init_state = { }; |
1482 | unsigned long nr_pgmask = pageblock_nr_pages - 1; | 1523 | unsigned long nr_pgmask = pageblock_nr_pages - 1; |
1483 | unsigned long free_base_pfn = 0; | ||
1484 | unsigned long nr_pages = 0; | 1524 | unsigned long nr_pages = 0; |
1485 | unsigned long nr_free = 0; | ||
1486 | struct page *page = NULL; | 1525 | struct page *page = NULL; |
1487 | unsigned long pfn; | ||
1488 | 1526 | ||
1489 | /* | 1527 | for (; pfn < end_pfn; pfn++) { |
1490 | * First we check if pfn is valid on architectures where it is possible | 1528 | if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) { |
1491 | * to have holes within pageblock_nr_pages. On systems where it is not | 1529 | page = NULL; |
1492 | * possible, this function is optimized out. | ||
1493 | * | ||
1494 | * Then, we check if a current large page is valid by only checking the | ||
1495 | * validity of the head pfn. | ||
1496 | * | ||
1497 | * meminit_pfn_in_nid is checked on systems where pfns can interleave | ||
1498 | * within a node: a pfn is between start and end of a node, but does not | ||
1499 | * belong to this memory node. | ||
1500 | * | ||
1501 | * Finally, we minimize pfn page lookups and scheduler checks by | ||
1502 | * performing it only once every pageblock_nr_pages. | ||
1503 | * | ||
1504 | * We do it in two loops: first we initialize struct page, than free to | ||
1505 | * buddy allocator, becuse while we are freeing pages we can access | ||
1506 | * pages that are ahead (computing buddy page in __free_one_page()). | ||
1507 | */ | ||
1508 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | ||
1509 | if (!pfn_valid_within(pfn)) | ||
1510 | continue; | 1530 | continue; |
1511 | if ((pfn & nr_pgmask) || pfn_valid(pfn)) { | 1531 | } else if (!page || !(pfn & nr_pgmask)) { |
1512 | if (meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1513 | if (page && (pfn & nr_pgmask)) | ||
1514 | page++; | ||
1515 | else | ||
1516 | page = pfn_to_page(pfn); | ||
1517 | __init_single_page(page, pfn, zid, nid); | ||
1518 | cond_resched(); | ||
1519 | } | ||
1520 | } | ||
1521 | } | ||
1522 | |||
1523 | page = NULL; | ||
1524 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | ||
1525 | if (!pfn_valid_within(pfn)) { | ||
1526 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1527 | } else if (!(pfn & nr_pgmask) && !pfn_valid(pfn)) { | ||
1528 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1529 | } else if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1530 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1531 | } else if (page && (pfn & nr_pgmask)) { | ||
1532 | page++; | ||
1533 | nr_free++; | ||
1534 | } else { | ||
1535 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1536 | page = pfn_to_page(pfn); | 1532 | page = pfn_to_page(pfn); |
1537 | free_base_pfn = pfn; | ||
1538 | nr_free = 1; | ||
1539 | cond_resched(); | 1533 | cond_resched(); |
1534 | } else { | ||
1535 | page++; | ||
1540 | } | 1536 | } |
1537 | __init_single_page(page, pfn, zid, nid); | ||
1538 | nr_pages++; | ||
1541 | } | 1539 | } |
1542 | /* Free the last block of pages to allocator */ | 1540 | return (nr_pages); |
1543 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1544 | |||
1545 | return nr_pages; | ||
1546 | } | 1541 | } |
1547 | 1542 | ||
1548 | /* Initialise remaining memory on a node */ | 1543 | /* Initialise remaining memory on a node */ |
@@ -1582,10 +1577,21 @@ static int __init deferred_init_memmap(void *data) | |||
1582 | } | 1577 | } |
1583 | first_init_pfn = max(zone->zone_start_pfn, first_init_pfn); | 1578 | first_init_pfn = max(zone->zone_start_pfn, first_init_pfn); |
1584 | 1579 | ||
1580 | /* | ||
1581 | * Initialize and free pages. We do it in two loops: first we initialize | ||
1582 | * struct page, than free to buddy allocator, because while we are | ||
1583 | * freeing pages we can access pages that are ahead (computing buddy | ||
1584 | * page in __free_one_page()). | ||
1585 | */ | ||
1586 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | ||
1587 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | ||
1588 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); | ||
1589 | nr_pages += deferred_init_pages(nid, zid, spfn, epfn); | ||
1590 | } | ||
1585 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | 1591 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { |
1586 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | 1592 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); |
1587 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); | 1593 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); |
1588 | nr_pages += deferred_init_range(nid, zid, spfn, epfn); | 1594 | deferred_free_pages(nid, zid, spfn, epfn); |
1589 | } | 1595 | } |
1590 | 1596 | ||
1591 | /* Sanity check that the next zone really is unpopulated */ | 1597 | /* Sanity check that the next zone really is unpopulated */ |