diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 469 |
1 files changed, 272 insertions, 197 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c841af88836a..d4096f4a5c1f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <linux/memblock.h> | 24 | #include <linux/memblock.h> |
25 | #include <linux/compiler.h> | 25 | #include <linux/compiler.h> |
26 | #include <linux/kernel.h> | 26 | #include <linux/kernel.h> |
27 | #include <linux/kmemcheck.h> | ||
28 | #include <linux/kasan.h> | 27 | #include <linux/kasan.h> |
29 | #include <linux/module.h> | 28 | #include <linux/module.h> |
30 | #include <linux/suspend.h> | 29 | #include <linux/suspend.h> |
@@ -83,6 +82,8 @@ DEFINE_PER_CPU(int, numa_node); | |||
83 | EXPORT_PER_CPU_SYMBOL(numa_node); | 82 | EXPORT_PER_CPU_SYMBOL(numa_node); |
84 | #endif | 83 | #endif |
85 | 84 | ||
85 | DEFINE_STATIC_KEY_TRUE(vm_numa_stat_key); | ||
86 | |||
86 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES | 87 | #ifdef CONFIG_HAVE_MEMORYLESS_NODES |
87 | /* | 88 | /* |
88 | * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. | 89 | * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. |
@@ -290,28 +291,37 @@ EXPORT_SYMBOL(nr_online_nodes); | |||
290 | int page_group_by_mobility_disabled __read_mostly; | 291 | int page_group_by_mobility_disabled __read_mostly; |
291 | 292 | ||
292 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 293 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
294 | |||
295 | /* | ||
296 | * Determine how many pages need to be initialized durig early boot | ||
297 | * (non-deferred initialization). | ||
298 | * The value of first_deferred_pfn will be set later, once non-deferred pages | ||
299 | * are initialized, but for now set it ULONG_MAX. | ||
300 | */ | ||
293 | static inline void reset_deferred_meminit(pg_data_t *pgdat) | 301 | static inline void reset_deferred_meminit(pg_data_t *pgdat) |
294 | { | 302 | { |
295 | unsigned long max_initialise; | 303 | phys_addr_t start_addr, end_addr; |
296 | unsigned long reserved_lowmem; | 304 | unsigned long max_pgcnt; |
305 | unsigned long reserved; | ||
297 | 306 | ||
298 | /* | 307 | /* |
299 | * Initialise at least 2G of a node but also take into account that | 308 | * Initialise at least 2G of a node but also take into account that |
300 | * two large system hashes that can take up 1GB for 0.25TB/node. | 309 | * two large system hashes that can take up 1GB for 0.25TB/node. |
301 | */ | 310 | */ |
302 | max_initialise = max(2UL << (30 - PAGE_SHIFT), | 311 | max_pgcnt = max(2UL << (30 - PAGE_SHIFT), |
303 | (pgdat->node_spanned_pages >> 8)); | 312 | (pgdat->node_spanned_pages >> 8)); |
304 | 313 | ||
305 | /* | 314 | /* |
306 | * Compensate the all the memblock reservations (e.g. crash kernel) | 315 | * Compensate the all the memblock reservations (e.g. crash kernel) |
307 | * from the initial estimation to make sure we will initialize enough | 316 | * from the initial estimation to make sure we will initialize enough |
308 | * memory to boot. | 317 | * memory to boot. |
309 | */ | 318 | */ |
310 | reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn, | 319 | start_addr = PFN_PHYS(pgdat->node_start_pfn); |
311 | pgdat->node_start_pfn + max_initialise); | 320 | end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt); |
312 | max_initialise += reserved_lowmem; | 321 | reserved = memblock_reserved_memory_within(start_addr, end_addr); |
322 | max_pgcnt += PHYS_PFN(reserved); | ||
313 | 323 | ||
314 | pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages); | 324 | pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages); |
315 | pgdat->first_deferred_pfn = ULONG_MAX; | 325 | pgdat->first_deferred_pfn = ULONG_MAX; |
316 | } | 326 | } |
317 | 327 | ||
@@ -338,7 +348,7 @@ static inline bool update_defer_init(pg_data_t *pgdat, | |||
338 | if (zone_end < pgdat_end_pfn(pgdat)) | 348 | if (zone_end < pgdat_end_pfn(pgdat)) |
339 | return true; | 349 | return true; |
340 | (*nr_initialised)++; | 350 | (*nr_initialised)++; |
341 | if ((*nr_initialised > pgdat->static_init_size) && | 351 | if ((*nr_initialised > pgdat->static_init_pgcnt) && |
342 | (pfn & (PAGES_PER_SECTION - 1)) == 0) { | 352 | (pfn & (PAGES_PER_SECTION - 1)) == 0) { |
343 | pgdat->first_deferred_pfn = pfn; | 353 | pgdat->first_deferred_pfn = pfn; |
344 | return false; | 354 | return false; |
@@ -1013,7 +1023,6 @@ static __always_inline bool free_pages_prepare(struct page *page, | |||
1013 | VM_BUG_ON_PAGE(PageTail(page), page); | 1023 | VM_BUG_ON_PAGE(PageTail(page), page); |
1014 | 1024 | ||
1015 | trace_mm_page_free(page, order); | 1025 | trace_mm_page_free(page, order); |
1016 | kmemcheck_free_shadow(page, order); | ||
1017 | 1026 | ||
1018 | /* | 1027 | /* |
1019 | * Check tail pages before head page information is cleared to | 1028 | * Check tail pages before head page information is cleared to |
@@ -1170,6 +1179,7 @@ static void free_one_page(struct zone *zone, | |||
1170 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | 1179 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, |
1171 | unsigned long zone, int nid) | 1180 | unsigned long zone, int nid) |
1172 | { | 1181 | { |
1182 | mm_zero_struct_page(page); | ||
1173 | set_page_links(page, zone, nid, pfn); | 1183 | set_page_links(page, zone, nid, pfn); |
1174 | init_page_count(page); | 1184 | init_page_count(page); |
1175 | page_mapcount_reset(page); | 1185 | page_mapcount_reset(page); |
@@ -1190,7 +1200,7 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone, | |||
1190 | } | 1200 | } |
1191 | 1201 | ||
1192 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 1202 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
1193 | static void init_reserved_page(unsigned long pfn) | 1203 | static void __meminit init_reserved_page(unsigned long pfn) |
1194 | { | 1204 | { |
1195 | pg_data_t *pgdat; | 1205 | pg_data_t *pgdat; |
1196 | int nid, zid; | 1206 | int nid, zid; |
@@ -1410,14 +1420,17 @@ void clear_zone_contiguous(struct zone *zone) | |||
1410 | } | 1420 | } |
1411 | 1421 | ||
1412 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 1422 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
1413 | static void __init deferred_free_range(struct page *page, | 1423 | static void __init deferred_free_range(unsigned long pfn, |
1414 | unsigned long pfn, int nr_pages) | 1424 | unsigned long nr_pages) |
1415 | { | 1425 | { |
1416 | int i; | 1426 | struct page *page; |
1427 | unsigned long i; | ||
1417 | 1428 | ||
1418 | if (!page) | 1429 | if (!nr_pages) |
1419 | return; | 1430 | return; |
1420 | 1431 | ||
1432 | page = pfn_to_page(pfn); | ||
1433 | |||
1421 | /* Free a large naturally-aligned chunk if possible */ | 1434 | /* Free a large naturally-aligned chunk if possible */ |
1422 | if (nr_pages == pageblock_nr_pages && | 1435 | if (nr_pages == pageblock_nr_pages && |
1423 | (pfn & (pageblock_nr_pages - 1)) == 0) { | 1436 | (pfn & (pageblock_nr_pages - 1)) == 0) { |
@@ -1443,19 +1456,109 @@ static inline void __init pgdat_init_report_one_done(void) | |||
1443 | complete(&pgdat_init_all_done_comp); | 1456 | complete(&pgdat_init_all_done_comp); |
1444 | } | 1457 | } |
1445 | 1458 | ||
1459 | /* | ||
1460 | * Helper for deferred_init_range, free the given range, reset the counters, and | ||
1461 | * return number of pages freed. | ||
1462 | */ | ||
1463 | static inline unsigned long __init __def_free(unsigned long *nr_free, | ||
1464 | unsigned long *free_base_pfn, | ||
1465 | struct page **page) | ||
1466 | { | ||
1467 | unsigned long nr = *nr_free; | ||
1468 | |||
1469 | deferred_free_range(*free_base_pfn, nr); | ||
1470 | *free_base_pfn = 0; | ||
1471 | *nr_free = 0; | ||
1472 | *page = NULL; | ||
1473 | |||
1474 | return nr; | ||
1475 | } | ||
1476 | |||
1477 | static unsigned long __init deferred_init_range(int nid, int zid, | ||
1478 | unsigned long start_pfn, | ||
1479 | unsigned long end_pfn) | ||
1480 | { | ||
1481 | struct mminit_pfnnid_cache nid_init_state = { }; | ||
1482 | unsigned long nr_pgmask = pageblock_nr_pages - 1; | ||
1483 | unsigned long free_base_pfn = 0; | ||
1484 | unsigned long nr_pages = 0; | ||
1485 | unsigned long nr_free = 0; | ||
1486 | struct page *page = NULL; | ||
1487 | unsigned long pfn; | ||
1488 | |||
1489 | /* | ||
1490 | * First we check if pfn is valid on architectures where it is possible | ||
1491 | * to have holes within pageblock_nr_pages. On systems where it is not | ||
1492 | * possible, this function is optimized out. | ||
1493 | * | ||
1494 | * Then, we check if a current large page is valid by only checking the | ||
1495 | * validity of the head pfn. | ||
1496 | * | ||
1497 | * meminit_pfn_in_nid is checked on systems where pfns can interleave | ||
1498 | * within a node: a pfn is between start and end of a node, but does not | ||
1499 | * belong to this memory node. | ||
1500 | * | ||
1501 | * Finally, we minimize pfn page lookups and scheduler checks by | ||
1502 | * performing it only once every pageblock_nr_pages. | ||
1503 | * | ||
1504 | * We do it in two loops: first we initialize struct page, than free to | ||
1505 | * buddy allocator, becuse while we are freeing pages we can access | ||
1506 | * pages that are ahead (computing buddy page in __free_one_page()). | ||
1507 | */ | ||
1508 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | ||
1509 | if (!pfn_valid_within(pfn)) | ||
1510 | continue; | ||
1511 | if ((pfn & nr_pgmask) || pfn_valid(pfn)) { | ||
1512 | if (meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1513 | if (page && (pfn & nr_pgmask)) | ||
1514 | page++; | ||
1515 | else | ||
1516 | page = pfn_to_page(pfn); | ||
1517 | __init_single_page(page, pfn, zid, nid); | ||
1518 | cond_resched(); | ||
1519 | } | ||
1520 | } | ||
1521 | } | ||
1522 | |||
1523 | page = NULL; | ||
1524 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | ||
1525 | if (!pfn_valid_within(pfn)) { | ||
1526 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1527 | } else if (!(pfn & nr_pgmask) && !pfn_valid(pfn)) { | ||
1528 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1529 | } else if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1530 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1531 | } else if (page && (pfn & nr_pgmask)) { | ||
1532 | page++; | ||
1533 | nr_free++; | ||
1534 | } else { | ||
1535 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1536 | page = pfn_to_page(pfn); | ||
1537 | free_base_pfn = pfn; | ||
1538 | nr_free = 1; | ||
1539 | cond_resched(); | ||
1540 | } | ||
1541 | } | ||
1542 | /* Free the last block of pages to allocator */ | ||
1543 | nr_pages += __def_free(&nr_free, &free_base_pfn, &page); | ||
1544 | |||
1545 | return nr_pages; | ||
1546 | } | ||
1547 | |||
1446 | /* Initialise remaining memory on a node */ | 1548 | /* Initialise remaining memory on a node */ |
1447 | static int __init deferred_init_memmap(void *data) | 1549 | static int __init deferred_init_memmap(void *data) |
1448 | { | 1550 | { |
1449 | pg_data_t *pgdat = data; | 1551 | pg_data_t *pgdat = data; |
1450 | int nid = pgdat->node_id; | 1552 | int nid = pgdat->node_id; |
1451 | struct mminit_pfnnid_cache nid_init_state = { }; | ||
1452 | unsigned long start = jiffies; | 1553 | unsigned long start = jiffies; |
1453 | unsigned long nr_pages = 0; | 1554 | unsigned long nr_pages = 0; |
1454 | unsigned long walk_start, walk_end; | 1555 | unsigned long spfn, epfn; |
1455 | int i, zid; | 1556 | phys_addr_t spa, epa; |
1557 | int zid; | ||
1456 | struct zone *zone; | 1558 | struct zone *zone; |
1457 | unsigned long first_init_pfn = pgdat->first_deferred_pfn; | 1559 | unsigned long first_init_pfn = pgdat->first_deferred_pfn; |
1458 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); | 1560 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); |
1561 | u64 i; | ||
1459 | 1562 | ||
1460 | if (first_init_pfn == ULONG_MAX) { | 1563 | if (first_init_pfn == ULONG_MAX) { |
1461 | pgdat_init_report_one_done(); | 1564 | pgdat_init_report_one_done(); |
@@ -1477,83 +1580,12 @@ static int __init deferred_init_memmap(void *data) | |||
1477 | if (first_init_pfn < zone_end_pfn(zone)) | 1580 | if (first_init_pfn < zone_end_pfn(zone)) |
1478 | break; | 1581 | break; |
1479 | } | 1582 | } |
1583 | first_init_pfn = max(zone->zone_start_pfn, first_init_pfn); | ||
1480 | 1584 | ||
1481 | for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) { | 1585 | for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { |
1482 | unsigned long pfn, end_pfn; | 1586 | spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); |
1483 | struct page *page = NULL; | 1587 | epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); |
1484 | struct page *free_base_page = NULL; | 1588 | nr_pages += deferred_init_range(nid, zid, spfn, epfn); |
1485 | unsigned long free_base_pfn = 0; | ||
1486 | int nr_to_free = 0; | ||
1487 | |||
1488 | end_pfn = min(walk_end, zone_end_pfn(zone)); | ||
1489 | pfn = first_init_pfn; | ||
1490 | if (pfn < walk_start) | ||
1491 | pfn = walk_start; | ||
1492 | if (pfn < zone->zone_start_pfn) | ||
1493 | pfn = zone->zone_start_pfn; | ||
1494 | |||
1495 | for (; pfn < end_pfn; pfn++) { | ||
1496 | if (!pfn_valid_within(pfn)) | ||
1497 | goto free_range; | ||
1498 | |||
1499 | /* | ||
1500 | * Ensure pfn_valid is checked every | ||
1501 | * pageblock_nr_pages for memory holes | ||
1502 | */ | ||
1503 | if ((pfn & (pageblock_nr_pages - 1)) == 0) { | ||
1504 | if (!pfn_valid(pfn)) { | ||
1505 | page = NULL; | ||
1506 | goto free_range; | ||
1507 | } | ||
1508 | } | ||
1509 | |||
1510 | if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1511 | page = NULL; | ||
1512 | goto free_range; | ||
1513 | } | ||
1514 | |||
1515 | /* Minimise pfn page lookups and scheduler checks */ | ||
1516 | if (page && (pfn & (pageblock_nr_pages - 1)) != 0) { | ||
1517 | page++; | ||
1518 | } else { | ||
1519 | nr_pages += nr_to_free; | ||
1520 | deferred_free_range(free_base_page, | ||
1521 | free_base_pfn, nr_to_free); | ||
1522 | free_base_page = NULL; | ||
1523 | free_base_pfn = nr_to_free = 0; | ||
1524 | |||
1525 | page = pfn_to_page(pfn); | ||
1526 | cond_resched(); | ||
1527 | } | ||
1528 | |||
1529 | if (page->flags) { | ||
1530 | VM_BUG_ON(page_zone(page) != zone); | ||
1531 | goto free_range; | ||
1532 | } | ||
1533 | |||
1534 | __init_single_page(page, pfn, zid, nid); | ||
1535 | if (!free_base_page) { | ||
1536 | free_base_page = page; | ||
1537 | free_base_pfn = pfn; | ||
1538 | nr_to_free = 0; | ||
1539 | } | ||
1540 | nr_to_free++; | ||
1541 | |||
1542 | /* Where possible, batch up pages for a single free */ | ||
1543 | continue; | ||
1544 | free_range: | ||
1545 | /* Free the current block of pages to allocator */ | ||
1546 | nr_pages += nr_to_free; | ||
1547 | deferred_free_range(free_base_page, free_base_pfn, | ||
1548 | nr_to_free); | ||
1549 | free_base_page = NULL; | ||
1550 | free_base_pfn = nr_to_free = 0; | ||
1551 | } | ||
1552 | /* Free the last block of pages to allocator */ | ||
1553 | nr_pages += nr_to_free; | ||
1554 | deferred_free_range(free_base_page, free_base_pfn, nr_to_free); | ||
1555 | |||
1556 | first_init_pfn = max(end_pfn, first_init_pfn); | ||
1557 | } | 1589 | } |
1558 | 1590 | ||
1559 | /* Sanity check that the next zone really is unpopulated */ | 1591 | /* Sanity check that the next zone really is unpopulated */ |
@@ -1792,7 +1824,7 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags | |||
1792 | * Go through the free lists for the given migratetype and remove | 1824 | * Go through the free lists for the given migratetype and remove |
1793 | * the smallest available page from the freelists | 1825 | * the smallest available page from the freelists |
1794 | */ | 1826 | */ |
1795 | static inline | 1827 | static __always_inline |
1796 | struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, | 1828 | struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, |
1797 | int migratetype) | 1829 | int migratetype) |
1798 | { | 1830 | { |
@@ -1836,7 +1868,7 @@ static int fallbacks[MIGRATE_TYPES][4] = { | |||
1836 | }; | 1868 | }; |
1837 | 1869 | ||
1838 | #ifdef CONFIG_CMA | 1870 | #ifdef CONFIG_CMA |
1839 | static struct page *__rmqueue_cma_fallback(struct zone *zone, | 1871 | static __always_inline struct page *__rmqueue_cma_fallback(struct zone *zone, |
1840 | unsigned int order) | 1872 | unsigned int order) |
1841 | { | 1873 | { |
1842 | return __rmqueue_smallest(zone, order, MIGRATE_CMA); | 1874 | return __rmqueue_smallest(zone, order, MIGRATE_CMA); |
@@ -2217,7 +2249,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, | |||
2217 | * deviation from the rest of this file, to make the for loop | 2249 | * deviation from the rest of this file, to make the for loop |
2218 | * condition simpler. | 2250 | * condition simpler. |
2219 | */ | 2251 | */ |
2220 | static inline bool | 2252 | static __always_inline bool |
2221 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | 2253 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) |
2222 | { | 2254 | { |
2223 | struct free_area *area; | 2255 | struct free_area *area; |
@@ -2289,8 +2321,8 @@ do_steal: | |||
2289 | * Do the hard work of removing an element from the buddy allocator. | 2321 | * Do the hard work of removing an element from the buddy allocator. |
2290 | * Call me with the zone->lock already held. | 2322 | * Call me with the zone->lock already held. |
2291 | */ | 2323 | */ |
2292 | static struct page *__rmqueue(struct zone *zone, unsigned int order, | 2324 | static __always_inline struct page * |
2293 | int migratetype) | 2325 | __rmqueue(struct zone *zone, unsigned int order, int migratetype) |
2294 | { | 2326 | { |
2295 | struct page *page; | 2327 | struct page *page; |
2296 | 2328 | ||
@@ -2315,7 +2347,7 @@ retry: | |||
2315 | */ | 2347 | */ |
2316 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 2348 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
2317 | unsigned long count, struct list_head *list, | 2349 | unsigned long count, struct list_head *list, |
2318 | int migratetype, bool cold) | 2350 | int migratetype) |
2319 | { | 2351 | { |
2320 | int i, alloced = 0; | 2352 | int i, alloced = 0; |
2321 | 2353 | ||
@@ -2329,19 +2361,16 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
2329 | continue; | 2361 | continue; |
2330 | 2362 | ||
2331 | /* | 2363 | /* |
2332 | * Split buddy pages returned by expand() are received here | 2364 | * Split buddy pages returned by expand() are received here in |
2333 | * in physical page order. The page is added to the callers and | 2365 | * physical page order. The page is added to the tail of |
2334 | * list and the list head then moves forward. From the callers | 2366 | * caller's list. From the callers perspective, the linked list |
2335 | * perspective, the linked list is ordered by page number in | 2367 | * is ordered by page number under some conditions. This is |
2336 | * some conditions. This is useful for IO devices that can | 2368 | * useful for IO devices that can forward direction from the |
2337 | * merge IO requests if the physical pages are ordered | 2369 | * head, thus also in the physical page order. This is useful |
2338 | * properly. | 2370 | * for IO devices that can merge IO requests if the physical |
2371 | * pages are ordered properly. | ||
2339 | */ | 2372 | */ |
2340 | if (likely(!cold)) | 2373 | list_add_tail(&page->lru, list); |
2341 | list_add(&page->lru, list); | ||
2342 | else | ||
2343 | list_add_tail(&page->lru, list); | ||
2344 | list = &page->lru; | ||
2345 | alloced++; | 2374 | alloced++; |
2346 | if (is_migrate_cma(get_pcppage_migratetype(page))) | 2375 | if (is_migrate_cma(get_pcppage_migratetype(page))) |
2347 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, | 2376 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, |
@@ -2590,24 +2619,25 @@ void mark_free_pages(struct zone *zone) | |||
2590 | } | 2619 | } |
2591 | #endif /* CONFIG_PM */ | 2620 | #endif /* CONFIG_PM */ |
2592 | 2621 | ||
2593 | /* | 2622 | static bool free_unref_page_prepare(struct page *page, unsigned long pfn) |
2594 | * Free a 0-order page | ||
2595 | * cold == true ? free a cold page : free a hot page | ||
2596 | */ | ||
2597 | void free_hot_cold_page(struct page *page, bool cold) | ||
2598 | { | 2623 | { |
2599 | struct zone *zone = page_zone(page); | ||
2600 | struct per_cpu_pages *pcp; | ||
2601 | unsigned long flags; | ||
2602 | unsigned long pfn = page_to_pfn(page); | ||
2603 | int migratetype; | 2624 | int migratetype; |
2604 | 2625 | ||
2605 | if (!free_pcp_prepare(page)) | 2626 | if (!free_pcp_prepare(page)) |
2606 | return; | 2627 | return false; |
2607 | 2628 | ||
2608 | migratetype = get_pfnblock_migratetype(page, pfn); | 2629 | migratetype = get_pfnblock_migratetype(page, pfn); |
2609 | set_pcppage_migratetype(page, migratetype); | 2630 | set_pcppage_migratetype(page, migratetype); |
2610 | local_irq_save(flags); | 2631 | return true; |
2632 | } | ||
2633 | |||
2634 | static void free_unref_page_commit(struct page *page, unsigned long pfn) | ||
2635 | { | ||
2636 | struct zone *zone = page_zone(page); | ||
2637 | struct per_cpu_pages *pcp; | ||
2638 | int migratetype; | ||
2639 | |||
2640 | migratetype = get_pcppage_migratetype(page); | ||
2611 | __count_vm_event(PGFREE); | 2641 | __count_vm_event(PGFREE); |
2612 | 2642 | ||
2613 | /* | 2643 | /* |
@@ -2620,38 +2650,62 @@ void free_hot_cold_page(struct page *page, bool cold) | |||
2620 | if (migratetype >= MIGRATE_PCPTYPES) { | 2650 | if (migratetype >= MIGRATE_PCPTYPES) { |
2621 | if (unlikely(is_migrate_isolate(migratetype))) { | 2651 | if (unlikely(is_migrate_isolate(migratetype))) { |
2622 | free_one_page(zone, page, pfn, 0, migratetype); | 2652 | free_one_page(zone, page, pfn, 0, migratetype); |
2623 | goto out; | 2653 | return; |
2624 | } | 2654 | } |
2625 | migratetype = MIGRATE_MOVABLE; | 2655 | migratetype = MIGRATE_MOVABLE; |
2626 | } | 2656 | } |
2627 | 2657 | ||
2628 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | 2658 | pcp = &this_cpu_ptr(zone->pageset)->pcp; |
2629 | if (!cold) | 2659 | list_add(&page->lru, &pcp->lists[migratetype]); |
2630 | list_add(&page->lru, &pcp->lists[migratetype]); | ||
2631 | else | ||
2632 | list_add_tail(&page->lru, &pcp->lists[migratetype]); | ||
2633 | pcp->count++; | 2660 | pcp->count++; |
2634 | if (pcp->count >= pcp->high) { | 2661 | if (pcp->count >= pcp->high) { |
2635 | unsigned long batch = READ_ONCE(pcp->batch); | 2662 | unsigned long batch = READ_ONCE(pcp->batch); |
2636 | free_pcppages_bulk(zone, batch, pcp); | 2663 | free_pcppages_bulk(zone, batch, pcp); |
2637 | pcp->count -= batch; | 2664 | pcp->count -= batch; |
2638 | } | 2665 | } |
2666 | } | ||
2639 | 2667 | ||
2640 | out: | 2668 | /* |
2669 | * Free a 0-order page | ||
2670 | */ | ||
2671 | void free_unref_page(struct page *page) | ||
2672 | { | ||
2673 | unsigned long flags; | ||
2674 | unsigned long pfn = page_to_pfn(page); | ||
2675 | |||
2676 | if (!free_unref_page_prepare(page, pfn)) | ||
2677 | return; | ||
2678 | |||
2679 | local_irq_save(flags); | ||
2680 | free_unref_page_commit(page, pfn); | ||
2641 | local_irq_restore(flags); | 2681 | local_irq_restore(flags); |
2642 | } | 2682 | } |
2643 | 2683 | ||
2644 | /* | 2684 | /* |
2645 | * Free a list of 0-order pages | 2685 | * Free a list of 0-order pages |
2646 | */ | 2686 | */ |
2647 | void free_hot_cold_page_list(struct list_head *list, bool cold) | 2687 | void free_unref_page_list(struct list_head *list) |
2648 | { | 2688 | { |
2649 | struct page *page, *next; | 2689 | struct page *page, *next; |
2690 | unsigned long flags, pfn; | ||
2691 | |||
2692 | /* Prepare pages for freeing */ | ||
2693 | list_for_each_entry_safe(page, next, list, lru) { | ||
2694 | pfn = page_to_pfn(page); | ||
2695 | if (!free_unref_page_prepare(page, pfn)) | ||
2696 | list_del(&page->lru); | ||
2697 | set_page_private(page, pfn); | ||
2698 | } | ||
2650 | 2699 | ||
2700 | local_irq_save(flags); | ||
2651 | list_for_each_entry_safe(page, next, list, lru) { | 2701 | list_for_each_entry_safe(page, next, list, lru) { |
2652 | trace_mm_page_free_batched(page, cold); | 2702 | unsigned long pfn = page_private(page); |
2653 | free_hot_cold_page(page, cold); | 2703 | |
2704 | set_page_private(page, 0); | ||
2705 | trace_mm_page_free_batched(page); | ||
2706 | free_unref_page_commit(page, pfn); | ||
2654 | } | 2707 | } |
2708 | local_irq_restore(flags); | ||
2655 | } | 2709 | } |
2656 | 2710 | ||
2657 | /* | 2711 | /* |
@@ -2669,15 +2723,6 @@ void split_page(struct page *page, unsigned int order) | |||
2669 | VM_BUG_ON_PAGE(PageCompound(page), page); | 2723 | VM_BUG_ON_PAGE(PageCompound(page), page); |
2670 | VM_BUG_ON_PAGE(!page_count(page), page); | 2724 | VM_BUG_ON_PAGE(!page_count(page), page); |
2671 | 2725 | ||
2672 | #ifdef CONFIG_KMEMCHECK | ||
2673 | /* | ||
2674 | * Split shadow pages too, because free(page[0]) would | ||
2675 | * otherwise free the whole shadow. | ||
2676 | */ | ||
2677 | if (kmemcheck_page_is_tracked(page)) | ||
2678 | split_page(virt_to_page(page[0].shadow), order); | ||
2679 | #endif | ||
2680 | |||
2681 | for (i = 1; i < (1 << order); i++) | 2726 | for (i = 1; i < (1 << order); i++) |
2682 | set_page_refcounted(page + i); | 2727 | set_page_refcounted(page + i); |
2683 | split_page_owner(page, order); | 2728 | split_page_owner(page, order); |
@@ -2743,6 +2788,10 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) | |||
2743 | #ifdef CONFIG_NUMA | 2788 | #ifdef CONFIG_NUMA |
2744 | enum numa_stat_item local_stat = NUMA_LOCAL; | 2789 | enum numa_stat_item local_stat = NUMA_LOCAL; |
2745 | 2790 | ||
2791 | /* skip numa counters update if numa stats is disabled */ | ||
2792 | if (!static_branch_likely(&vm_numa_stat_key)) | ||
2793 | return; | ||
2794 | |||
2746 | if (z->node != numa_node_id()) | 2795 | if (z->node != numa_node_id()) |
2747 | local_stat = NUMA_OTHER; | 2796 | local_stat = NUMA_OTHER; |
2748 | 2797 | ||
@@ -2758,7 +2807,7 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) | |||
2758 | 2807 | ||
2759 | /* Remove page from the per-cpu list, caller must protect the list */ | 2808 | /* Remove page from the per-cpu list, caller must protect the list */ |
2760 | static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | 2809 | static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, |
2761 | bool cold, struct per_cpu_pages *pcp, | 2810 | struct per_cpu_pages *pcp, |
2762 | struct list_head *list) | 2811 | struct list_head *list) |
2763 | { | 2812 | { |
2764 | struct page *page; | 2813 | struct page *page; |
@@ -2767,16 +2816,12 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | |||
2767 | if (list_empty(list)) { | 2816 | if (list_empty(list)) { |
2768 | pcp->count += rmqueue_bulk(zone, 0, | 2817 | pcp->count += rmqueue_bulk(zone, 0, |
2769 | pcp->batch, list, | 2818 | pcp->batch, list, |
2770 | migratetype, cold); | 2819 | migratetype); |
2771 | if (unlikely(list_empty(list))) | 2820 | if (unlikely(list_empty(list))) |
2772 | return NULL; | 2821 | return NULL; |
2773 | } | 2822 | } |
2774 | 2823 | ||
2775 | if (cold) | 2824 | page = list_first_entry(list, struct page, lru); |
2776 | page = list_last_entry(list, struct page, lru); | ||
2777 | else | ||
2778 | page = list_first_entry(list, struct page, lru); | ||
2779 | |||
2780 | list_del(&page->lru); | 2825 | list_del(&page->lru); |
2781 | pcp->count--; | 2826 | pcp->count--; |
2782 | } while (check_new_pcp(page)); | 2827 | } while (check_new_pcp(page)); |
@@ -2791,14 +2836,13 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, | |||
2791 | { | 2836 | { |
2792 | struct per_cpu_pages *pcp; | 2837 | struct per_cpu_pages *pcp; |
2793 | struct list_head *list; | 2838 | struct list_head *list; |
2794 | bool cold = ((gfp_flags & __GFP_COLD) != 0); | ||
2795 | struct page *page; | 2839 | struct page *page; |
2796 | unsigned long flags; | 2840 | unsigned long flags; |
2797 | 2841 | ||
2798 | local_irq_save(flags); | 2842 | local_irq_save(flags); |
2799 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | 2843 | pcp = &this_cpu_ptr(zone->pageset)->pcp; |
2800 | list = &pcp->lists[migratetype]; | 2844 | list = &pcp->lists[migratetype]; |
2801 | page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); | 2845 | page = __rmqueue_pcplist(zone, migratetype, pcp, list); |
2802 | if (page) { | 2846 | if (page) { |
2803 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | 2847 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); |
2804 | zone_statistics(preferred_zone, zone); | 2848 | zone_statistics(preferred_zone, zone); |
@@ -3006,9 +3050,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, | |||
3006 | if (!area->nr_free) | 3050 | if (!area->nr_free) |
3007 | continue; | 3051 | continue; |
3008 | 3052 | ||
3009 | if (alloc_harder) | ||
3010 | return true; | ||
3011 | |||
3012 | for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { | 3053 | for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { |
3013 | if (!list_empty(&area->free_list[mt])) | 3054 | if (!list_empty(&area->free_list[mt])) |
3014 | return true; | 3055 | return true; |
@@ -3020,6 +3061,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, | |||
3020 | return true; | 3061 | return true; |
3021 | } | 3062 | } |
3022 | #endif | 3063 | #endif |
3064 | if (alloc_harder && | ||
3065 | !list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) | ||
3066 | return true; | ||
3023 | } | 3067 | } |
3024 | return false; | 3068 | return false; |
3025 | } | 3069 | } |
@@ -3235,20 +3279,14 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) | |||
3235 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) | 3279 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) |
3236 | return; | 3280 | return; |
3237 | 3281 | ||
3238 | pr_warn("%s: ", current->comm); | ||
3239 | |||
3240 | va_start(args, fmt); | 3282 | va_start(args, fmt); |
3241 | vaf.fmt = fmt; | 3283 | vaf.fmt = fmt; |
3242 | vaf.va = &args; | 3284 | vaf.va = &args; |
3243 | pr_cont("%pV", &vaf); | 3285 | pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl\n", |
3286 | current->comm, &vaf, gfp_mask, &gfp_mask, | ||
3287 | nodemask_pr_args(nodemask)); | ||
3244 | va_end(args); | 3288 | va_end(args); |
3245 | 3289 | ||
3246 | pr_cont(", mode:%#x(%pGg), nodemask=", gfp_mask, &gfp_mask); | ||
3247 | if (nodemask) | ||
3248 | pr_cont("%*pbl\n", nodemask_pr_args(nodemask)); | ||
3249 | else | ||
3250 | pr_cont("(null)\n"); | ||
3251 | |||
3252 | cpuset_print_current_mems_allowed(); | 3290 | cpuset_print_current_mems_allowed(); |
3253 | 3291 | ||
3254 | dump_stack(); | 3292 | dump_stack(); |
@@ -3868,8 +3906,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
3868 | enum compact_result compact_result; | 3906 | enum compact_result compact_result; |
3869 | int compaction_retries; | 3907 | int compaction_retries; |
3870 | int no_progress_loops; | 3908 | int no_progress_loops; |
3871 | unsigned long alloc_start = jiffies; | ||
3872 | unsigned int stall_timeout = 10 * HZ; | ||
3873 | unsigned int cpuset_mems_cookie; | 3909 | unsigned int cpuset_mems_cookie; |
3874 | int reserve_flags; | 3910 | int reserve_flags; |
3875 | 3911 | ||
@@ -4001,14 +4037,6 @@ retry: | |||
4001 | if (!can_direct_reclaim) | 4037 | if (!can_direct_reclaim) |
4002 | goto nopage; | 4038 | goto nopage; |
4003 | 4039 | ||
4004 | /* Make sure we know about allocations which stall for too long */ | ||
4005 | if (time_after(jiffies, alloc_start + stall_timeout)) { | ||
4006 | warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask, | ||
4007 | "page allocation stalls for %ums, order:%u", | ||
4008 | jiffies_to_msecs(jiffies-alloc_start), order); | ||
4009 | stall_timeout += 10 * HZ; | ||
4010 | } | ||
4011 | |||
4012 | /* Avoid recursion of direct reclaim */ | 4040 | /* Avoid recursion of direct reclaim */ |
4013 | if (current->flags & PF_MEMALLOC) | 4041 | if (current->flags & PF_MEMALLOC) |
4014 | goto nopage; | 4042 | goto nopage; |
@@ -4223,9 +4251,6 @@ out: | |||
4223 | page = NULL; | 4251 | page = NULL; |
4224 | } | 4252 | } |
4225 | 4253 | ||
4226 | if (kmemcheck_enabled && page) | ||
4227 | kmemcheck_pagealloc_alloc(page, order, gfp_mask); | ||
4228 | |||
4229 | trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype); | 4254 | trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype); |
4230 | 4255 | ||
4231 | return page; | 4256 | return page; |
@@ -4262,7 +4287,7 @@ void __free_pages(struct page *page, unsigned int order) | |||
4262 | { | 4287 | { |
4263 | if (put_page_testzero(page)) { | 4288 | if (put_page_testzero(page)) { |
4264 | if (order == 0) | 4289 | if (order == 0) |
4265 | free_hot_cold_page(page, false); | 4290 | free_unref_page(page); |
4266 | else | 4291 | else |
4267 | __free_pages_ok(page, order); | 4292 | __free_pages_ok(page, order); |
4268 | } | 4293 | } |
@@ -4320,7 +4345,7 @@ void __page_frag_cache_drain(struct page *page, unsigned int count) | |||
4320 | unsigned int order = compound_order(page); | 4345 | unsigned int order = compound_order(page); |
4321 | 4346 | ||
4322 | if (order == 0) | 4347 | if (order == 0) |
4323 | free_hot_cold_page(page, false); | 4348 | free_unref_page(page); |
4324 | else | 4349 | else |
4325 | __free_pages_ok(page, order); | 4350 | __free_pages_ok(page, order); |
4326 | } | 4351 | } |
@@ -5367,6 +5392,7 @@ not_early: | |||
5367 | 5392 | ||
5368 | __init_single_page(page, pfn, zone, nid); | 5393 | __init_single_page(page, pfn, zone, nid); |
5369 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | 5394 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); |
5395 | cond_resched(); | ||
5370 | } else { | 5396 | } else { |
5371 | __init_single_pfn(pfn, zone, nid); | 5397 | __init_single_pfn(pfn, zone, nid); |
5372 | } | 5398 | } |
@@ -6125,6 +6151,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) | |||
6125 | } | 6151 | } |
6126 | } | 6152 | } |
6127 | 6153 | ||
6154 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
6128 | static void __ref alloc_node_mem_map(struct pglist_data *pgdat) | 6155 | static void __ref alloc_node_mem_map(struct pglist_data *pgdat) |
6129 | { | 6156 | { |
6130 | unsigned long __maybe_unused start = 0; | 6157 | unsigned long __maybe_unused start = 0; |
@@ -6134,7 +6161,6 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) | |||
6134 | if (!pgdat->node_spanned_pages) | 6161 | if (!pgdat->node_spanned_pages) |
6135 | return; | 6162 | return; |
6136 | 6163 | ||
6137 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
6138 | start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); | 6164 | start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); |
6139 | offset = pgdat->node_start_pfn - start; | 6165 | offset = pgdat->node_start_pfn - start; |
6140 | /* ia64 gets its own node_mem_map, before this, without bootmem */ | 6166 | /* ia64 gets its own node_mem_map, before this, without bootmem */ |
@@ -6156,6 +6182,9 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) | |||
6156 | pgdat->node_id); | 6182 | pgdat->node_id); |
6157 | pgdat->node_mem_map = map + offset; | 6183 | pgdat->node_mem_map = map + offset; |
6158 | } | 6184 | } |
6185 | pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", | ||
6186 | __func__, pgdat->node_id, (unsigned long)pgdat, | ||
6187 | (unsigned long)pgdat->node_mem_map); | ||
6159 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 6188 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
6160 | /* | 6189 | /* |
6161 | * With no DISCONTIG, the global mem_map is just set as node 0's | 6190 | * With no DISCONTIG, the global mem_map is just set as node 0's |
@@ -6168,8 +6197,10 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) | |||
6168 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 6197 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
6169 | } | 6198 | } |
6170 | #endif | 6199 | #endif |
6171 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ | ||
6172 | } | 6200 | } |
6201 | #else | ||
6202 | static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { } | ||
6203 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ | ||
6173 | 6204 | ||
6174 | void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | 6205 | void __paginginit free_area_init_node(int nid, unsigned long *zones_size, |
6175 | unsigned long node_start_pfn, unsigned long *zholes_size) | 6206 | unsigned long node_start_pfn, unsigned long *zholes_size) |
@@ -6196,16 +6227,49 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
6196 | zones_size, zholes_size); | 6227 | zones_size, zholes_size); |
6197 | 6228 | ||
6198 | alloc_node_mem_map(pgdat); | 6229 | alloc_node_mem_map(pgdat); |
6199 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
6200 | printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n", | ||
6201 | nid, (unsigned long)pgdat, | ||
6202 | (unsigned long)pgdat->node_mem_map); | ||
6203 | #endif | ||
6204 | 6230 | ||
6205 | reset_deferred_meminit(pgdat); | 6231 | reset_deferred_meminit(pgdat); |
6206 | free_area_init_core(pgdat); | 6232 | free_area_init_core(pgdat); |
6207 | } | 6233 | } |
6208 | 6234 | ||
6235 | #ifdef CONFIG_HAVE_MEMBLOCK | ||
6236 | /* | ||
6237 | * Only struct pages that are backed by physical memory are zeroed and | ||
6238 | * initialized by going through __init_single_page(). But, there are some | ||
6239 | * struct pages which are reserved in memblock allocator and their fields | ||
6240 | * may be accessed (for example page_to_pfn() on some configuration accesses | ||
6241 | * flags). We must explicitly zero those struct pages. | ||
6242 | */ | ||
6243 | void __paginginit zero_resv_unavail(void) | ||
6244 | { | ||
6245 | phys_addr_t start, end; | ||
6246 | unsigned long pfn; | ||
6247 | u64 i, pgcnt; | ||
6248 | |||
6249 | /* | ||
6250 | * Loop through ranges that are reserved, but do not have reported | ||
6251 | * physical memory backing. | ||
6252 | */ | ||
6253 | pgcnt = 0; | ||
6254 | for_each_resv_unavail_range(i, &start, &end) { | ||
6255 | for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) { | ||
6256 | mm_zero_struct_page(pfn_to_page(pfn)); | ||
6257 | pgcnt++; | ||
6258 | } | ||
6259 | } | ||
6260 | |||
6261 | /* | ||
6262 | * Struct pages that do not have backing memory. This could be because | ||
6263 | * firmware is using some of this memory, or for some other reasons. | ||
6264 | * Once memblock is changed so such behaviour is not allowed: i.e. | ||
6265 | * list of "reserved" memory must be a subset of list of "memory", then | ||
6266 | * this code can be removed. | ||
6267 | */ | ||
6268 | if (pgcnt) | ||
6269 | pr_info("Reserved but unavailable: %lld pages", pgcnt); | ||
6270 | } | ||
6271 | #endif /* CONFIG_HAVE_MEMBLOCK */ | ||
6272 | |||
6209 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 6273 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
6210 | 6274 | ||
6211 | #if MAX_NUMNODES > 1 | 6275 | #if MAX_NUMNODES > 1 |
@@ -6629,6 +6693,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
6629 | node_set_state(nid, N_MEMORY); | 6693 | node_set_state(nid, N_MEMORY); |
6630 | check_for_memory(pgdat, nid); | 6694 | check_for_memory(pgdat, nid); |
6631 | } | 6695 | } |
6696 | zero_resv_unavail(); | ||
6632 | } | 6697 | } |
6633 | 6698 | ||
6634 | static int __init cmdline_parse_core(char *p, unsigned long *core) | 6699 | static int __init cmdline_parse_core(char *p, unsigned long *core) |
@@ -6792,6 +6857,7 @@ void __init free_area_init(unsigned long *zones_size) | |||
6792 | { | 6857 | { |
6793 | free_area_init_node(0, zones_size, | 6858 | free_area_init_node(0, zones_size, |
6794 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); | 6859 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); |
6860 | zero_resv_unavail(); | ||
6795 | } | 6861 | } |
6796 | 6862 | ||
6797 | static int page_alloc_cpu_dead(unsigned int cpu) | 6863 | static int page_alloc_cpu_dead(unsigned int cpu) |
@@ -7304,18 +7370,17 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
7304 | 7370 | ||
7305 | log2qty = ilog2(numentries); | 7371 | log2qty = ilog2(numentries); |
7306 | 7372 | ||
7307 | /* | ||
7308 | * memblock allocator returns zeroed memory already, so HASH_ZERO is | ||
7309 | * currently not used when HASH_EARLY is specified. | ||
7310 | */ | ||
7311 | gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC; | 7373 | gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC; |
7312 | do { | 7374 | do { |
7313 | size = bucketsize << log2qty; | 7375 | size = bucketsize << log2qty; |
7314 | if (flags & HASH_EARLY) | 7376 | if (flags & HASH_EARLY) { |
7315 | table = memblock_virt_alloc_nopanic(size, 0); | 7377 | if (flags & HASH_ZERO) |
7316 | else if (hashdist) | 7378 | table = memblock_virt_alloc_nopanic(size, 0); |
7379 | else | ||
7380 | table = memblock_virt_alloc_raw(size, 0); | ||
7381 | } else if (hashdist) { | ||
7317 | table = __vmalloc(size, gfp_flags, PAGE_KERNEL); | 7382 | table = __vmalloc(size, gfp_flags, PAGE_KERNEL); |
7318 | else { | 7383 | } else { |
7319 | /* | 7384 | /* |
7320 | * If bucketsize is not a power-of-two, we may free | 7385 | * If bucketsize is not a power-of-two, we may free |
7321 | * some pages at the end of hash table which | 7386 | * some pages at the end of hash table which |
@@ -7352,10 +7417,10 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
7352 | * race condition. So you can't expect this function should be exact. | 7417 | * race condition. So you can't expect this function should be exact. |
7353 | */ | 7418 | */ |
7354 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | 7419 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, |
7420 | int migratetype, | ||
7355 | bool skip_hwpoisoned_pages) | 7421 | bool skip_hwpoisoned_pages) |
7356 | { | 7422 | { |
7357 | unsigned long pfn, iter, found; | 7423 | unsigned long pfn, iter, found; |
7358 | int mt; | ||
7359 | 7424 | ||
7360 | /* | 7425 | /* |
7361 | * For avoiding noise data, lru_add_drain_all() should be called | 7426 | * For avoiding noise data, lru_add_drain_all() should be called |
@@ -7363,8 +7428,14 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
7363 | */ | 7428 | */ |
7364 | if (zone_idx(zone) == ZONE_MOVABLE) | 7429 | if (zone_idx(zone) == ZONE_MOVABLE) |
7365 | return false; | 7430 | return false; |
7366 | mt = get_pageblock_migratetype(page); | 7431 | |
7367 | if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt)) | 7432 | /* |
7433 | * CMA allocations (alloc_contig_range) really need to mark isolate | ||
7434 | * CMA pageblocks even when they are not movable in fact so consider | ||
7435 | * them movable here. | ||
7436 | */ | ||
7437 | if (is_migrate_cma(migratetype) && | ||
7438 | is_migrate_cma(get_pageblock_migratetype(page))) | ||
7368 | return false; | 7439 | return false; |
7369 | 7440 | ||
7370 | pfn = page_to_pfn(page); | 7441 | pfn = page_to_pfn(page); |
@@ -7376,6 +7447,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
7376 | 7447 | ||
7377 | page = pfn_to_page(check); | 7448 | page = pfn_to_page(check); |
7378 | 7449 | ||
7450 | if (PageReserved(page)) | ||
7451 | return true; | ||
7452 | |||
7379 | /* | 7453 | /* |
7380 | * Hugepages are not in LRU lists, but they're movable. | 7454 | * Hugepages are not in LRU lists, but they're movable. |
7381 | * We need not scan over tail pages bacause we don't | 7455 | * We need not scan over tail pages bacause we don't |
@@ -7449,7 +7523,7 @@ bool is_pageblock_removable_nolock(struct page *page) | |||
7449 | if (!zone_spans_pfn(zone, pfn)) | 7523 | if (!zone_spans_pfn(zone, pfn)) |
7450 | return false; | 7524 | return false; |
7451 | 7525 | ||
7452 | return !has_unmovable_pages(zone, page, 0, true); | 7526 | return !has_unmovable_pages(zone, page, 0, MIGRATE_MOVABLE, true); |
7453 | } | 7527 | } |
7454 | 7528 | ||
7455 | #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) | 7529 | #if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) |
@@ -7545,6 +7619,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
7545 | .zone = page_zone(pfn_to_page(start)), | 7619 | .zone = page_zone(pfn_to_page(start)), |
7546 | .mode = MIGRATE_SYNC, | 7620 | .mode = MIGRATE_SYNC, |
7547 | .ignore_skip_hint = true, | 7621 | .ignore_skip_hint = true, |
7622 | .no_set_skip_hint = true, | ||
7548 | .gfp_mask = current_gfp_context(gfp_mask), | 7623 | .gfp_mask = current_gfp_context(gfp_mask), |
7549 | }; | 7624 | }; |
7550 | INIT_LIST_HEAD(&cc.migratepages); | 7625 | INIT_LIST_HEAD(&cc.migratepages); |