diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 174 |
1 files changed, 93 insertions, 81 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 76c9688b6a0a..c7dd9c86e353 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -293,7 +293,7 @@ int page_group_by_mobility_disabled __read_mostly; | |||
293 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 293 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
294 | 294 | ||
295 | /* | 295 | /* |
296 | * Determine how many pages need to be initialized durig early boot | 296 | * Determine how many pages need to be initialized during early boot |
297 | * (non-deferred initialization). | 297 | * (non-deferred initialization). |
298 | * The value of first_deferred_pfn will be set later, once non-deferred pages | 298 | * The value of first_deferred_pfn will be set later, once non-deferred pages |
299 | * are initialized, but for now set it ULONG_MAX. | 299 | * are initialized, but for now set it ULONG_MAX. |
@@ -344,7 +344,7 @@ static inline bool update_defer_init(pg_data_t *pgdat, | |||
344 | unsigned long pfn, unsigned long zone_end, | 344 | unsigned long pfn, unsigned long zone_end, |
345 | unsigned long *nr_initialised) | 345 | unsigned long *nr_initialised) |
346 | { | 346 | { |
347 | /* Always populate low zones for address-contrained allocations */ | 347 | /* Always populate low zones for address-constrained allocations */ |
348 | if (zone_end < pgdat_end_pfn(pgdat)) | 348 | if (zone_end < pgdat_end_pfn(pgdat)) |
349 | return true; | 349 | return true; |
350 | (*nr_initialised)++; | 350 | (*nr_initialised)++; |
@@ -1177,9 +1177,10 @@ static void free_one_page(struct zone *zone, | |||
1177 | } | 1177 | } |
1178 | 1178 | ||
1179 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | 1179 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, |
1180 | unsigned long zone, int nid) | 1180 | unsigned long zone, int nid, bool zero) |
1181 | { | 1181 | { |
1182 | mm_zero_struct_page(page); | 1182 | if (zero) |
1183 | mm_zero_struct_page(page); | ||
1183 | set_page_links(page, zone, nid, pfn); | 1184 | set_page_links(page, zone, nid, pfn); |
1184 | init_page_count(page); | 1185 | init_page_count(page); |
1185 | page_mapcount_reset(page); | 1186 | page_mapcount_reset(page); |
@@ -1194,9 +1195,9 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, | |||
1194 | } | 1195 | } |
1195 | 1196 | ||
1196 | static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone, | 1197 | static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone, |
1197 | int nid) | 1198 | int nid, bool zero) |
1198 | { | 1199 | { |
1199 | return __init_single_page(pfn_to_page(pfn), pfn, zone, nid); | 1200 | return __init_single_page(pfn_to_page(pfn), pfn, zone, nid, zero); |
1200 | } | 1201 | } |
1201 | 1202 | ||
1202 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 1203 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
@@ -1217,7 +1218,7 @@ static void __meminit init_reserved_page(unsigned long pfn) | |||
1217 | if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone)) | 1218 | if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone)) |
1218 | break; | 1219 | break; |
1219 | } | 1220 | } |
1220 | __init_single_pfn(pfn, zid, nid); | 1221 | __init_single_pfn(pfn, zid, nid, true); |
1221 | } | 1222 | } |
1222 | #else | 1223 | #else |
1223 | static inline void init_reserved_page(unsigned long pfn) | 1224 | static inline void init_reserved_page(unsigned long pfn) |
@@ -1457,92 +1458,87 @@ static inline void __init pgdat_init_report_one_done(void) | |||
1457 | } | 1458 | } |
1458 | 1459 | ||
1459 | /* | 1460 | /* |
1460 | * Helper for deferred_init_range, free the given range, reset the counters, and | 1461 | * Returns true if page needs to be initialized or freed to buddy allocator. |
1461 | * return number of pages freed. | 1462 | * |
1463 | * First we check if pfn is valid on architectures where it is possible to have | ||
1464 | * holes within pageblock_nr_pages. On systems where it is not possible, this | ||
1465 | * function is optimized out. | ||
1466 | * | ||
1467 | * Then, we check if a current large page is valid by only checking the validity | ||
1468 | * of the head pfn. | ||
1469 | * | ||
1470 | * Finally, meminit_pfn_in_nid is checked on systems where pfns can interleave | ||
1471 | * within a node: a pfn is between start and end of a node, but does not belong | ||
1472 | * to this memory node. | ||
1462 | */ | 1473 | */ |
1463 | static inline unsigned long __init __def_free(unsigned long *nr_free, | 1474 | static inline bool __init |
1464 | unsigned long *free_base_pfn, | 1475 | deferred_pfn_valid(int nid, unsigned long pfn, |
1465 | struct page **page) | 1476 | struct mminit_pfnnid_cache *nid_init_state) |
1466 | { | 1477 | { |
1467 | unsigned long nr = *nr_free; | 1478 | if (!pfn_valid_within(pfn)) |
1479 | return false; | ||
1480 | if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn)) | ||
1481 | return false; | ||
1482 | if (!meminit_pfn_in_nid(pfn, nid, nid_init_state)) | ||
1483 | return false; | ||
1484 | return true; | ||
1485 | } | ||
1468 | 1486 | ||
1469 | deferred_free_range(*free_base_pfn, nr); | 1487 | /* |
1470 | *free_base_pfn = 0; | 1488 | * Free pages to buddy allocator. Try to free aligned pages in |
1471 | *nr_free = 0; | 1489 | * pageblock_nr_pages sizes. |
1472 | *page = NULL; | 1490 | */ |
1491 | static void __init deferred_free_pages(int nid, int zid, unsigned long pfn, | ||
1492 | unsigned long end_pfn) | ||
1493 | { | ||
1494 | struct mminit_pfnnid_cache nid_init_state = { }; | ||
1495 | unsigned long nr_pgmask = pageblock_nr_pages - 1; | ||
1496 | unsigned long nr_free = 0; | ||
1473 | 1497 | ||
1474 | return nr; | 1498 | for (; pfn < end_pfn; pfn++) { |
1499 | if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) { | ||
1500 | deferred_free_range(pfn - nr_free, nr_free); | ||
1501 | nr_free = 0; | ||
1502 | } else if (!(pfn & nr_pgmask)) { | ||
1503 | deferred_free_range(pfn - nr_free, nr_free); | ||
1504 | nr_free = 1; | ||
1505 | cond_resched(); | ||
1506 | } else { | ||
1507 | nr_free++; | ||
1508 | } | ||
1509 | } | ||
1510 | /* Free the last block of pages to allocator */ | ||
1511 | deferred_free_range(pfn - nr_free, nr_free); | ||
1475 | } | 1512 | } |
1476 | 1513 | ||
1477 | static unsigned long __init deferred_init_range(int nid, int zid, | 1514 | /* |
1478 | unsigned long start_pfn, | 1515 | * Initialize struct pages. We minimize pfn page lookups and scheduler checks |
1479 | unsigned long end_pfn) | 1516 | * by performing it only once every pageblock_nr_pages. |
1517 | * Return number of pages initialized. | ||
1518 | */ | ||
1519 | static unsigned long __init deferred_init_pages(int nid, int zid, | ||
1520 | unsigned long pfn, | ||
1521 | unsigned long end_pfn) | ||
1480 | { | 1522 | { |
1481 | struct mminit_pfnnid_cache nid_init_state = { }; | 1523 | struct mminit_pfnnid_cache nid_init_state = { }; |
1482 | unsigned long nr_pgmask = pageblock_nr_pages - 1; | 1524 | unsigned long nr_pgmask = pageblock_nr_pages - 1; |
1483 | unsigned long free_base_pfn = 0; | ||
1484 | unsigned long nr_pages = 0; | 1525 | unsigned long nr_pages = 0; |
1485 | unsigned long nr_free = 0; | ||
1486 | struct page *page = NULL; | 1526 | struct page *page = NULL; |
1487 | unsigned long pfn; | ||
1488 | 1527 | ||
1489 | /* | 1528 | for (; pfn < end_pfn; pfn++) { |
1490 | * First we check if pfn is valid on architectures where it is possible | 1529 | if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) { |
1491 | * to have holes within pageblock_nr_pages. On systems where it is not | 1530 | page = NULL; |
1492 | * possible, this function is optimized out. | ||
1493 | * | ||
1494 | * Then, we check if a current large page is valid by only checking the | ||
1495 | * validity of the head pfn. | ||
1496 | * | ||
1497 | * meminit_pfn_in_nid is checked on systems where pfns can interleave | ||
1498 | * within a node: a pfn is between start and end of a node, but does not | ||
1499 | * belong to this memory node. | ||
1500 | * | ||
1501 | * Finally, we minimize pfn page lookups and scheduler checks by | ||
1502 | * performing it only once every pageblock_nr_pages. | ||
1503 | * | ||
1504 | * We do it in two loops: first we initialize struct page, than free to | ||
1505 | * buddy allocator, becuse while we are freeing pages we can access | ||
1506 | * pages that are ahead (computing buddy page in __free_one_page()). | ||
1507 | */ | ||
1508 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | ||
1509 | if (!pfn_valid_within(pfn)) | ||
1510 | continue; | 1531 | continue; |
1511 | if ((pfn & nr_pgmask) || pfn_valid(pfn)) { | 1532 | } else if (!page || !(pfn & nr_pgmask)) { |
1512 | if (meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1513 | if (page && (pfn & nr_pgmask)) | ||
1514 | page++; | ||
1515 | else | ||
1516 | page = pfn_to_page(pfn); | ||
1517 | __init_single_page(page, pfn, zid, nid); | ||
1518 | cond_resched(); | ||
1519 | } | ||
1520 | } | ||
1521 | } | ||
1522 | |||
1523 | page = NULL; | ||
1524 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | ||
1525 | if (!pfn_valid_within(pfn)) { | ||
1526 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1527 | } else if (!(pfn & nr_pgmask) && !pfn_valid(pfn)) { | ||
1528 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1529 | } else if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1530 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1531 | } else if (page && (pfn & nr_pgmask)) { | ||
1532 | page++; | ||
1533 | nr_free++; | ||
1534 | } else { | ||
1535 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1536 | page = pfn_to_page(pfn); | 1533 | page = pfn_to_page(pfn); |
1537 | free_base_pfn = pfn; | ||
1538 | nr_free = 1; | ||
1539 | cond_resched(); | 1534 | cond_resched(); |
1535 | } else { | ||
1536 | page++; | ||
1540 | } | 1537 | } |
1538 | __init_single_page(page, pfn, zid, nid, true); | ||
1539 | nr_pages++; | ||
1541 | } | 1540 | } |
1542 | /* Free the last block of pages to allocator */ | 1541 | return (nr_pages); |
1543 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1544 | |||
1545 | return nr_pages; | ||
1546 | } | 1542 | } |
1547 | 1543 | ||
1548 | /* Initialise remaining memory on a node */ | 1544 | /* Initialise remaining memory on a node */ |
@@ -1582,10 +1578,21 @@ static int __init deferred_init_memmap(void *data) | |||
1582 | } | 1578 | } |
1583 | first_init_pfn = max(zone->zone_start_pfn, first_init_pfn); | 1579 | first_init_pfn = max(zone->zone_start_pfn, first_init_pfn); |
1584 | 1580 | ||
1581 | /* | ||
1582 | * Initialize and free pages. We do it in two loops: first we initialize | ||
1583 | * struct page, than free to buddy allocator, because while we are | ||
1584 | * freeing pages we can access pages that are ahead (computing buddy | ||
1585 | * page in __free_one_page()). | ||
1586 | */ | ||
1587 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | ||
1588 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | ||
1589 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); | ||
1590 | nr_pages += deferred_init_pages(nid, zid, spfn, epfn); | ||
1591 | } | ||
1585 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { | 1592 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { |
1586 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); | 1593 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); |
1587 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); | 1594 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); |
1588 | nr_pages += deferred_init_range(nid, zid, spfn, epfn); | 1595 | deferred_free_pages(nid, zid, spfn, epfn); |
1589 | } | 1596 | } |
1590 | 1597 | ||
1591 | /* Sanity check that the next zone really is unpopulated */ | 1598 | /* Sanity check that the next zone really is unpopulated */ |
@@ -3391,7 +3398,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
3391 | if (gfp_mask & __GFP_THISNODE) | 3398 | if (gfp_mask & __GFP_THISNODE) |
3392 | goto out; | 3399 | goto out; |
3393 | 3400 | ||
3394 | /* Exhausted what can be done so it's blamo time */ | 3401 | /* Exhausted what can be done so it's blame time */ |
3395 | if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { | 3402 | if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) { |
3396 | *did_some_progress = 1; | 3403 | *did_some_progress = 1; |
3397 | 3404 | ||
@@ -4272,7 +4279,7 @@ unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) | |||
4272 | struct page *page; | 4279 | struct page *page; |
4273 | 4280 | ||
4274 | /* | 4281 | /* |
4275 | * __get_free_pages() returns a 32-bit address, which cannot represent | 4282 | * __get_free_pages() returns a virtual address, which cannot represent |
4276 | * a highmem page | 4283 | * a highmem page |
4277 | */ | 4284 | */ |
4278 | VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); | 4285 | VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0); |
@@ -5393,15 +5400,20 @@ not_early: | |||
5393 | * can be created for invalid pages (for alignment) | 5400 | * can be created for invalid pages (for alignment) |
5394 | * check here not to call set_pageblock_migratetype() against | 5401 | * check here not to call set_pageblock_migratetype() against |
5395 | * pfn out of zone. | 5402 | * pfn out of zone. |
5403 | * | ||
5404 | * Please note that MEMMAP_HOTPLUG path doesn't clear memmap | ||
5405 | * because this is done early in sparse_add_one_section | ||
5396 | */ | 5406 | */ |
5397 | if (!(pfn & (pageblock_nr_pages - 1))) { | 5407 | if (!(pfn & (pageblock_nr_pages - 1))) { |
5398 | struct page *page = pfn_to_page(pfn); | 5408 | struct page *page = pfn_to_page(pfn); |
5399 | 5409 | ||
5400 | __init_single_page(page, pfn, zone, nid); | 5410 | __init_single_page(page, pfn, zone, nid, |
5411 | context != MEMMAP_HOTPLUG); | ||
5401 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | 5412 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); |
5402 | cond_resched(); | 5413 | cond_resched(); |
5403 | } else { | 5414 | } else { |
5404 | __init_single_pfn(pfn, zone, nid); | 5415 | __init_single_pfn(pfn, zone, nid, |
5416 | context != MEMMAP_HOTPLUG); | ||
5405 | } | 5417 | } |
5406 | } | 5418 | } |
5407 | } | 5419 | } |