diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 317 |
1 files changed, 201 insertions, 116 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c13ea7538891..bb90971182bd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -558,7 +558,8 @@ static inline void __free_one_page(struct page *page, | |||
558 | if (page_is_guard(buddy)) { | 558 | if (page_is_guard(buddy)) { |
559 | clear_page_guard_flag(buddy); | 559 | clear_page_guard_flag(buddy); |
560 | set_page_private(page, 0); | 560 | set_page_private(page, 0); |
561 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); | 561 | __mod_zone_freepage_state(zone, 1 << order, |
562 | migratetype); | ||
562 | } else { | 563 | } else { |
563 | list_del(&buddy->lru); | 564 | list_del(&buddy->lru); |
564 | zone->free_area[order].nr_free--; | 565 | zone->free_area[order].nr_free--; |
@@ -597,17 +598,6 @@ out: | |||
597 | zone->free_area[order].nr_free++; | 598 | zone->free_area[order].nr_free++; |
598 | } | 599 | } |
599 | 600 | ||
600 | /* | ||
601 | * free_page_mlock() -- clean up attempts to free and mlocked() page. | ||
602 | * Page should not be on lru, so no need to fix that up. | ||
603 | * free_pages_check() will verify... | ||
604 | */ | ||
605 | static inline void free_page_mlock(struct page *page) | ||
606 | { | ||
607 | __dec_zone_page_state(page, NR_MLOCK); | ||
608 | __count_vm_event(UNEVICTABLE_MLOCKFREED); | ||
609 | } | ||
610 | |||
611 | static inline int free_pages_check(struct page *page) | 601 | static inline int free_pages_check(struct page *page) |
612 | { | 602 | { |
613 | if (unlikely(page_mapcount(page) | | 603 | if (unlikely(page_mapcount(page) | |
@@ -668,12 +658,17 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
668 | batch_free = to_free; | 658 | batch_free = to_free; |
669 | 659 | ||
670 | do { | 660 | do { |
661 | int mt; /* migratetype of the to-be-freed page */ | ||
662 | |||
671 | page = list_entry(list->prev, struct page, lru); | 663 | page = list_entry(list->prev, struct page, lru); |
672 | /* must delete as __free_one_page list manipulates */ | 664 | /* must delete as __free_one_page list manipulates */ |
673 | list_del(&page->lru); | 665 | list_del(&page->lru); |
666 | mt = get_freepage_migratetype(page); | ||
674 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ | 667 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ |
675 | __free_one_page(page, zone, 0, page_private(page)); | 668 | __free_one_page(page, zone, 0, mt); |
676 | trace_mm_page_pcpu_drain(page, 0, page_private(page)); | 669 | trace_mm_page_pcpu_drain(page, 0, mt); |
670 | if (is_migrate_cma(mt)) | ||
671 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); | ||
677 | } while (--to_free && --batch_free && !list_empty(list)); | 672 | } while (--to_free && --batch_free && !list_empty(list)); |
678 | } | 673 | } |
679 | __mod_zone_page_state(zone, NR_FREE_PAGES, count); | 674 | __mod_zone_page_state(zone, NR_FREE_PAGES, count); |
@@ -688,7 +683,8 @@ static void free_one_page(struct zone *zone, struct page *page, int order, | |||
688 | zone->pages_scanned = 0; | 683 | zone->pages_scanned = 0; |
689 | 684 | ||
690 | __free_one_page(page, zone, order, migratetype); | 685 | __free_one_page(page, zone, order, migratetype); |
691 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); | 686 | if (unlikely(migratetype != MIGRATE_ISOLATE)) |
687 | __mod_zone_freepage_state(zone, 1 << order, migratetype); | ||
692 | spin_unlock(&zone->lock); | 688 | spin_unlock(&zone->lock); |
693 | } | 689 | } |
694 | 690 | ||
@@ -721,17 +717,16 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
721 | static void __free_pages_ok(struct page *page, unsigned int order) | 717 | static void __free_pages_ok(struct page *page, unsigned int order) |
722 | { | 718 | { |
723 | unsigned long flags; | 719 | unsigned long flags; |
724 | int wasMlocked = __TestClearPageMlocked(page); | 720 | int migratetype; |
725 | 721 | ||
726 | if (!free_pages_prepare(page, order)) | 722 | if (!free_pages_prepare(page, order)) |
727 | return; | 723 | return; |
728 | 724 | ||
729 | local_irq_save(flags); | 725 | local_irq_save(flags); |
730 | if (unlikely(wasMlocked)) | ||
731 | free_page_mlock(page); | ||
732 | __count_vm_events(PGFREE, 1 << order); | 726 | __count_vm_events(PGFREE, 1 << order); |
733 | free_one_page(page_zone(page), page, order, | 727 | migratetype = get_pageblock_migratetype(page); |
734 | get_pageblock_migratetype(page)); | 728 | set_freepage_migratetype(page, migratetype); |
729 | free_one_page(page_zone(page), page, order, migratetype); | ||
735 | local_irq_restore(flags); | 730 | local_irq_restore(flags); |
736 | } | 731 | } |
737 | 732 | ||
@@ -811,7 +806,8 @@ static inline void expand(struct zone *zone, struct page *page, | |||
811 | set_page_guard_flag(&page[size]); | 806 | set_page_guard_flag(&page[size]); |
812 | set_page_private(&page[size], high); | 807 | set_page_private(&page[size], high); |
813 | /* Guard pages are not available for any usage */ | 808 | /* Guard pages are not available for any usage */ |
814 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high)); | 809 | __mod_zone_freepage_state(zone, -(1 << high), |
810 | migratetype); | ||
815 | continue; | 811 | continue; |
816 | } | 812 | } |
817 | #endif | 813 | #endif |
@@ -915,7 +911,7 @@ static int fallbacks[MIGRATE_TYPES][4] = { | |||
915 | * Note that start_page and end_pages are not aligned on a pageblock | 911 | * Note that start_page and end_pages are not aligned on a pageblock |
916 | * boundary. If alignment is required, use move_freepages_block() | 912 | * boundary. If alignment is required, use move_freepages_block() |
917 | */ | 913 | */ |
918 | static int move_freepages(struct zone *zone, | 914 | int move_freepages(struct zone *zone, |
919 | struct page *start_page, struct page *end_page, | 915 | struct page *start_page, struct page *end_page, |
920 | int migratetype) | 916 | int migratetype) |
921 | { | 917 | { |
@@ -951,6 +947,7 @@ static int move_freepages(struct zone *zone, | |||
951 | order = page_order(page); | 947 | order = page_order(page); |
952 | list_move(&page->lru, | 948 | list_move(&page->lru, |
953 | &zone->free_area[order].free_list[migratetype]); | 949 | &zone->free_area[order].free_list[migratetype]); |
950 | set_freepage_migratetype(page, migratetype); | ||
954 | page += 1 << order; | 951 | page += 1 << order; |
955 | pages_moved += 1 << order; | 952 | pages_moved += 1 << order; |
956 | } | 953 | } |
@@ -1135,8 +1132,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
1135 | if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) | 1132 | if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) |
1136 | mt = migratetype; | 1133 | mt = migratetype; |
1137 | } | 1134 | } |
1138 | set_page_private(page, mt); | 1135 | set_freepage_migratetype(page, mt); |
1139 | list = &page->lru; | 1136 | list = &page->lru; |
1137 | if (is_migrate_cma(mt)) | ||
1138 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, | ||
1139 | -(1 << order)); | ||
1140 | } | 1140 | } |
1141 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); | 1141 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); |
1142 | spin_unlock(&zone->lock); | 1142 | spin_unlock(&zone->lock); |
@@ -1296,16 +1296,13 @@ void free_hot_cold_page(struct page *page, int cold) | |||
1296 | struct per_cpu_pages *pcp; | 1296 | struct per_cpu_pages *pcp; |
1297 | unsigned long flags; | 1297 | unsigned long flags; |
1298 | int migratetype; | 1298 | int migratetype; |
1299 | int wasMlocked = __TestClearPageMlocked(page); | ||
1300 | 1299 | ||
1301 | if (!free_pages_prepare(page, 0)) | 1300 | if (!free_pages_prepare(page, 0)) |
1302 | return; | 1301 | return; |
1303 | 1302 | ||
1304 | migratetype = get_pageblock_migratetype(page); | 1303 | migratetype = get_pageblock_migratetype(page); |
1305 | set_page_private(page, migratetype); | 1304 | set_freepage_migratetype(page, migratetype); |
1306 | local_irq_save(flags); | 1305 | local_irq_save(flags); |
1307 | if (unlikely(wasMlocked)) | ||
1308 | free_page_mlock(page); | ||
1309 | __count_vm_event(PGFREE); | 1306 | __count_vm_event(PGFREE); |
1310 | 1307 | ||
1311 | /* | 1308 | /* |
@@ -1380,20 +1377,16 @@ void split_page(struct page *page, unsigned int order) | |||
1380 | } | 1377 | } |
1381 | 1378 | ||
1382 | /* | 1379 | /* |
1383 | * Similar to split_page except the page is already free. As this is only | 1380 | * Similar to the split_page family of functions except that the page |
1384 | * being used for migration, the migratetype of the block also changes. | 1381 | * required at the given order and being isolated now to prevent races |
1385 | * As this is called with interrupts disabled, the caller is responsible | 1382 | * with parallel allocators |
1386 | * for calling arch_alloc_page() and kernel_map_page() after interrupts | ||
1387 | * are enabled. | ||
1388 | * | ||
1389 | * Note: this is probably too low level an operation for use in drivers. | ||
1390 | * Please consult with lkml before using this in your driver. | ||
1391 | */ | 1383 | */ |
1392 | int split_free_page(struct page *page) | 1384 | int capture_free_page(struct page *page, int alloc_order, int migratetype) |
1393 | { | 1385 | { |
1394 | unsigned int order; | 1386 | unsigned int order; |
1395 | unsigned long watermark; | 1387 | unsigned long watermark; |
1396 | struct zone *zone; | 1388 | struct zone *zone; |
1389 | int mt; | ||
1397 | 1390 | ||
1398 | BUG_ON(!PageBuddy(page)); | 1391 | BUG_ON(!PageBuddy(page)); |
1399 | 1392 | ||
@@ -1409,12 +1402,16 @@ int split_free_page(struct page *page) | |||
1409 | list_del(&page->lru); | 1402 | list_del(&page->lru); |
1410 | zone->free_area[order].nr_free--; | 1403 | zone->free_area[order].nr_free--; |
1411 | rmv_page_order(page); | 1404 | rmv_page_order(page); |
1412 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order)); | ||
1413 | 1405 | ||
1414 | /* Split into individual pages */ | 1406 | mt = get_pageblock_migratetype(page); |
1415 | set_page_refcounted(page); | 1407 | if (unlikely(mt != MIGRATE_ISOLATE)) |
1416 | split_page(page, order); | 1408 | __mod_zone_freepage_state(zone, -(1UL << order), mt); |
1417 | 1409 | ||
1410 | if (alloc_order != order) | ||
1411 | expand(zone, page, alloc_order, order, | ||
1412 | &zone->free_area[order], migratetype); | ||
1413 | |||
1414 | /* Set the pageblock if the captured page is at least a pageblock */ | ||
1418 | if (order >= pageblock_order - 1) { | 1415 | if (order >= pageblock_order - 1) { |
1419 | struct page *endpage = page + (1 << order) - 1; | 1416 | struct page *endpage = page + (1 << order) - 1; |
1420 | for (; page < endpage; page += pageblock_nr_pages) { | 1417 | for (; page < endpage; page += pageblock_nr_pages) { |
@@ -1425,7 +1422,35 @@ int split_free_page(struct page *page) | |||
1425 | } | 1422 | } |
1426 | } | 1423 | } |
1427 | 1424 | ||
1428 | return 1 << order; | 1425 | return 1UL << order; |
1426 | } | ||
1427 | |||
1428 | /* | ||
1429 | * Similar to split_page except the page is already free. As this is only | ||
1430 | * being used for migration, the migratetype of the block also changes. | ||
1431 | * As this is called with interrupts disabled, the caller is responsible | ||
1432 | * for calling arch_alloc_page() and kernel_map_page() after interrupts | ||
1433 | * are enabled. | ||
1434 | * | ||
1435 | * Note: this is probably too low level an operation for use in drivers. | ||
1436 | * Please consult with lkml before using this in your driver. | ||
1437 | */ | ||
1438 | int split_free_page(struct page *page) | ||
1439 | { | ||
1440 | unsigned int order; | ||
1441 | int nr_pages; | ||
1442 | |||
1443 | BUG_ON(!PageBuddy(page)); | ||
1444 | order = page_order(page); | ||
1445 | |||
1446 | nr_pages = capture_free_page(page, order, 0); | ||
1447 | if (!nr_pages) | ||
1448 | return 0; | ||
1449 | |||
1450 | /* Split into individual pages */ | ||
1451 | set_page_refcounted(page); | ||
1452 | split_page(page, order); | ||
1453 | return nr_pages; | ||
1429 | } | 1454 | } |
1430 | 1455 | ||
1431 | /* | 1456 | /* |
@@ -1484,7 +1509,8 @@ again: | |||
1484 | spin_unlock(&zone->lock); | 1509 | spin_unlock(&zone->lock); |
1485 | if (!page) | 1510 | if (!page) |
1486 | goto failed; | 1511 | goto failed; |
1487 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); | 1512 | __mod_zone_freepage_state(zone, -(1 << order), |
1513 | get_pageblock_migratetype(page)); | ||
1488 | } | 1514 | } |
1489 | 1515 | ||
1490 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1516 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
@@ -1501,19 +1527,6 @@ failed: | |||
1501 | return NULL; | 1527 | return NULL; |
1502 | } | 1528 | } |
1503 | 1529 | ||
1504 | /* The ALLOC_WMARK bits are used as an index to zone->watermark */ | ||
1505 | #define ALLOC_WMARK_MIN WMARK_MIN | ||
1506 | #define ALLOC_WMARK_LOW WMARK_LOW | ||
1507 | #define ALLOC_WMARK_HIGH WMARK_HIGH | ||
1508 | #define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */ | ||
1509 | |||
1510 | /* Mask to get the watermark bits */ | ||
1511 | #define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1) | ||
1512 | |||
1513 | #define ALLOC_HARDER 0x10 /* try to alloc harder */ | ||
1514 | #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ | ||
1515 | #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ | ||
1516 | |||
1517 | #ifdef CONFIG_FAIL_PAGE_ALLOC | 1530 | #ifdef CONFIG_FAIL_PAGE_ALLOC |
1518 | 1531 | ||
1519 | static struct { | 1532 | static struct { |
@@ -1608,7 +1621,11 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
1608 | min -= min / 2; | 1621 | min -= min / 2; |
1609 | if (alloc_flags & ALLOC_HARDER) | 1622 | if (alloc_flags & ALLOC_HARDER) |
1610 | min -= min / 4; | 1623 | min -= min / 4; |
1611 | 1624 | #ifdef CONFIG_CMA | |
1625 | /* If allocation can't use CMA areas don't use free CMA pages */ | ||
1626 | if (!(alloc_flags & ALLOC_CMA)) | ||
1627 | free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES); | ||
1628 | #endif | ||
1612 | if (free_pages <= min + lowmem_reserve) | 1629 | if (free_pages <= min + lowmem_reserve) |
1613 | return false; | 1630 | return false; |
1614 | for (o = 0; o < order; o++) { | 1631 | for (o = 0; o < order; o++) { |
@@ -1782,6 +1799,22 @@ static void zlc_clear_zones_full(struct zonelist *zonelist) | |||
1782 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); | 1799 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); |
1783 | } | 1800 | } |
1784 | 1801 | ||
1802 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) | ||
1803 | { | ||
1804 | return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes); | ||
1805 | } | ||
1806 | |||
1807 | static void __paginginit init_zone_allows_reclaim(int nid) | ||
1808 | { | ||
1809 | int i; | ||
1810 | |||
1811 | for_each_online_node(i) | ||
1812 | if (node_distance(nid, i) <= RECLAIM_DISTANCE) { | ||
1813 | node_set(i, NODE_DATA(nid)->reclaim_nodes); | ||
1814 | zone_reclaim_mode = 1; | ||
1815 | } | ||
1816 | } | ||
1817 | |||
1785 | #else /* CONFIG_NUMA */ | 1818 | #else /* CONFIG_NUMA */ |
1786 | 1819 | ||
1787 | static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) | 1820 | static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) |
@@ -1802,6 +1835,15 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) | |||
1802 | static void zlc_clear_zones_full(struct zonelist *zonelist) | 1835 | static void zlc_clear_zones_full(struct zonelist *zonelist) |
1803 | { | 1836 | { |
1804 | } | 1837 | } |
1838 | |||
1839 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) | ||
1840 | { | ||
1841 | return true; | ||
1842 | } | ||
1843 | |||
1844 | static inline void init_zone_allows_reclaim(int nid) | ||
1845 | { | ||
1846 | } | ||
1805 | #endif /* CONFIG_NUMA */ | 1847 | #endif /* CONFIG_NUMA */ |
1806 | 1848 | ||
1807 | /* | 1849 | /* |
@@ -1886,7 +1928,8 @@ zonelist_scan: | |||
1886 | did_zlc_setup = 1; | 1928 | did_zlc_setup = 1; |
1887 | } | 1929 | } |
1888 | 1930 | ||
1889 | if (zone_reclaim_mode == 0) | 1931 | if (zone_reclaim_mode == 0 || |
1932 | !zone_allows_reclaim(preferred_zone, zone)) | ||
1890 | goto this_zone_full; | 1933 | goto this_zone_full; |
1891 | 1934 | ||
1892 | /* | 1935 | /* |
@@ -2105,7 +2148,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2105 | bool *contended_compaction, bool *deferred_compaction, | 2148 | bool *contended_compaction, bool *deferred_compaction, |
2106 | unsigned long *did_some_progress) | 2149 | unsigned long *did_some_progress) |
2107 | { | 2150 | { |
2108 | struct page *page; | 2151 | struct page *page = NULL; |
2109 | 2152 | ||
2110 | if (!order) | 2153 | if (!order) |
2111 | return NULL; | 2154 | return NULL; |
@@ -2118,10 +2161,16 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2118 | current->flags |= PF_MEMALLOC; | 2161 | current->flags |= PF_MEMALLOC; |
2119 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | 2162 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, |
2120 | nodemask, sync_migration, | 2163 | nodemask, sync_migration, |
2121 | contended_compaction); | 2164 | contended_compaction, &page); |
2122 | current->flags &= ~PF_MEMALLOC; | 2165 | current->flags &= ~PF_MEMALLOC; |
2123 | if (*did_some_progress != COMPACT_SKIPPED) { | ||
2124 | 2166 | ||
2167 | /* If compaction captured a page, prep and use it */ | ||
2168 | if (page) { | ||
2169 | prep_new_page(page, order, gfp_mask); | ||
2170 | goto got_page; | ||
2171 | } | ||
2172 | |||
2173 | if (*did_some_progress != COMPACT_SKIPPED) { | ||
2125 | /* Page migration frees to the PCP lists but we want merging */ | 2174 | /* Page migration frees to the PCP lists but we want merging */ |
2126 | drain_pages(get_cpu()); | 2175 | drain_pages(get_cpu()); |
2127 | put_cpu(); | 2176 | put_cpu(); |
@@ -2131,6 +2180,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
2131 | alloc_flags & ~ALLOC_NO_WATERMARKS, | 2180 | alloc_flags & ~ALLOC_NO_WATERMARKS, |
2132 | preferred_zone, migratetype); | 2181 | preferred_zone, migratetype); |
2133 | if (page) { | 2182 | if (page) { |
2183 | got_page: | ||
2184 | preferred_zone->compact_blockskip_flush = false; | ||
2134 | preferred_zone->compact_considered = 0; | 2185 | preferred_zone->compact_considered = 0; |
2135 | preferred_zone->compact_defer_shift = 0; | 2186 | preferred_zone->compact_defer_shift = 0; |
2136 | if (order >= preferred_zone->compact_order_failed) | 2187 | if (order >= preferred_zone->compact_order_failed) |
@@ -2315,7 +2366,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask) | |||
2315 | unlikely(test_thread_flag(TIF_MEMDIE)))) | 2366 | unlikely(test_thread_flag(TIF_MEMDIE)))) |
2316 | alloc_flags |= ALLOC_NO_WATERMARKS; | 2367 | alloc_flags |= ALLOC_NO_WATERMARKS; |
2317 | } | 2368 | } |
2318 | 2369 | #ifdef CONFIG_CMA | |
2370 | if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) | ||
2371 | alloc_flags |= ALLOC_CMA; | ||
2372 | #endif | ||
2319 | return alloc_flags; | 2373 | return alloc_flags; |
2320 | } | 2374 | } |
2321 | 2375 | ||
@@ -2362,9 +2416,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2362 | goto nopage; | 2416 | goto nopage; |
2363 | 2417 | ||
2364 | restart: | 2418 | restart: |
2365 | if (!(gfp_mask & __GFP_NO_KSWAPD)) | 2419 | wake_all_kswapd(order, zonelist, high_zoneidx, |
2366 | wake_all_kswapd(order, zonelist, high_zoneidx, | 2420 | zone_idx(preferred_zone)); |
2367 | zone_idx(preferred_zone)); | ||
2368 | 2421 | ||
2369 | /* | 2422 | /* |
2370 | * OK, we're below the kswapd watermark and have kicked background | 2423 | * OK, we're below the kswapd watermark and have kicked background |
@@ -2441,7 +2494,7 @@ rebalance: | |||
2441 | * system then fail the allocation instead of entering direct reclaim. | 2494 | * system then fail the allocation instead of entering direct reclaim. |
2442 | */ | 2495 | */ |
2443 | if ((deferred_compaction || contended_compaction) && | 2496 | if ((deferred_compaction || contended_compaction) && |
2444 | (gfp_mask & __GFP_NO_KSWAPD)) | 2497 | (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) |
2445 | goto nopage; | 2498 | goto nopage; |
2446 | 2499 | ||
2447 | /* Try direct reclaim and then allocating */ | 2500 | /* Try direct reclaim and then allocating */ |
@@ -2541,6 +2594,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2541 | struct page *page = NULL; | 2594 | struct page *page = NULL; |
2542 | int migratetype = allocflags_to_migratetype(gfp_mask); | 2595 | int migratetype = allocflags_to_migratetype(gfp_mask); |
2543 | unsigned int cpuset_mems_cookie; | 2596 | unsigned int cpuset_mems_cookie; |
2597 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; | ||
2544 | 2598 | ||
2545 | gfp_mask &= gfp_allowed_mask; | 2599 | gfp_mask &= gfp_allowed_mask; |
2546 | 2600 | ||
@@ -2569,9 +2623,13 @@ retry_cpuset: | |||
2569 | if (!preferred_zone) | 2623 | if (!preferred_zone) |
2570 | goto out; | 2624 | goto out; |
2571 | 2625 | ||
2626 | #ifdef CONFIG_CMA | ||
2627 | if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) | ||
2628 | alloc_flags |= ALLOC_CMA; | ||
2629 | #endif | ||
2572 | /* First allocation attempt */ | 2630 | /* First allocation attempt */ |
2573 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, | 2631 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, |
2574 | zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, | 2632 | zonelist, high_zoneidx, alloc_flags, |
2575 | preferred_zone, migratetype); | 2633 | preferred_zone, migratetype); |
2576 | if (unlikely(!page)) | 2634 | if (unlikely(!page)) |
2577 | page = __alloc_pages_slowpath(gfp_mask, order, | 2635 | page = __alloc_pages_slowpath(gfp_mask, order, |
@@ -2852,7 +2910,8 @@ void show_free_areas(unsigned int filter) | |||
2852 | " unevictable:%lu" | 2910 | " unevictable:%lu" |
2853 | " dirty:%lu writeback:%lu unstable:%lu\n" | 2911 | " dirty:%lu writeback:%lu unstable:%lu\n" |
2854 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" | 2912 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" |
2855 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", | 2913 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n" |
2914 | " free_cma:%lu\n", | ||
2856 | global_page_state(NR_ACTIVE_ANON), | 2915 | global_page_state(NR_ACTIVE_ANON), |
2857 | global_page_state(NR_INACTIVE_ANON), | 2916 | global_page_state(NR_INACTIVE_ANON), |
2858 | global_page_state(NR_ISOLATED_ANON), | 2917 | global_page_state(NR_ISOLATED_ANON), |
@@ -2869,7 +2928,8 @@ void show_free_areas(unsigned int filter) | |||
2869 | global_page_state(NR_FILE_MAPPED), | 2928 | global_page_state(NR_FILE_MAPPED), |
2870 | global_page_state(NR_SHMEM), | 2929 | global_page_state(NR_SHMEM), |
2871 | global_page_state(NR_PAGETABLE), | 2930 | global_page_state(NR_PAGETABLE), |
2872 | global_page_state(NR_BOUNCE)); | 2931 | global_page_state(NR_BOUNCE), |
2932 | global_page_state(NR_FREE_CMA_PAGES)); | ||
2873 | 2933 | ||
2874 | for_each_populated_zone(zone) { | 2934 | for_each_populated_zone(zone) { |
2875 | int i; | 2935 | int i; |
@@ -2901,6 +2961,7 @@ void show_free_areas(unsigned int filter) | |||
2901 | " pagetables:%lukB" | 2961 | " pagetables:%lukB" |
2902 | " unstable:%lukB" | 2962 | " unstable:%lukB" |
2903 | " bounce:%lukB" | 2963 | " bounce:%lukB" |
2964 | " free_cma:%lukB" | ||
2904 | " writeback_tmp:%lukB" | 2965 | " writeback_tmp:%lukB" |
2905 | " pages_scanned:%lu" | 2966 | " pages_scanned:%lu" |
2906 | " all_unreclaimable? %s" | 2967 | " all_unreclaimable? %s" |
@@ -2930,6 +2991,7 @@ void show_free_areas(unsigned int filter) | |||
2930 | K(zone_page_state(zone, NR_PAGETABLE)), | 2991 | K(zone_page_state(zone, NR_PAGETABLE)), |
2931 | K(zone_page_state(zone, NR_UNSTABLE_NFS)), | 2992 | K(zone_page_state(zone, NR_UNSTABLE_NFS)), |
2932 | K(zone_page_state(zone, NR_BOUNCE)), | 2993 | K(zone_page_state(zone, NR_BOUNCE)), |
2994 | K(zone_page_state(zone, NR_FREE_CMA_PAGES)), | ||
2933 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), | 2995 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), |
2934 | zone->pages_scanned, | 2996 | zone->pages_scanned, |
2935 | (zone->all_unreclaimable ? "yes" : "no") | 2997 | (zone->all_unreclaimable ? "yes" : "no") |
@@ -3328,21 +3390,13 @@ static void build_zonelists(pg_data_t *pgdat) | |||
3328 | j = 0; | 3390 | j = 0; |
3329 | 3391 | ||
3330 | while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { | 3392 | while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { |
3331 | int distance = node_distance(local_node, node); | ||
3332 | |||
3333 | /* | ||
3334 | * If another node is sufficiently far away then it is better | ||
3335 | * to reclaim pages in a zone before going off node. | ||
3336 | */ | ||
3337 | if (distance > RECLAIM_DISTANCE) | ||
3338 | zone_reclaim_mode = 1; | ||
3339 | |||
3340 | /* | 3393 | /* |
3341 | * We don't want to pressure a particular node. | 3394 | * We don't want to pressure a particular node. |
3342 | * So adding penalty to the first node in same | 3395 | * So adding penalty to the first node in same |
3343 | * distance group to make it round-robin. | 3396 | * distance group to make it round-robin. |
3344 | */ | 3397 | */ |
3345 | if (distance != node_distance(local_node, prev_node)) | 3398 | if (node_distance(local_node, node) != |
3399 | node_distance(local_node, prev_node)) | ||
3346 | node_load[node] = load; | 3400 | node_load[node] = load; |
3347 | 3401 | ||
3348 | prev_node = node; | 3402 | prev_node = node; |
@@ -4438,11 +4492,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
4438 | 4492 | ||
4439 | zone->spanned_pages = size; | 4493 | zone->spanned_pages = size; |
4440 | zone->present_pages = realsize; | 4494 | zone->present_pages = realsize; |
4441 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA | ||
4442 | zone->compact_cached_free_pfn = zone->zone_start_pfn + | ||
4443 | zone->spanned_pages; | ||
4444 | zone->compact_cached_free_pfn &= ~(pageblock_nr_pages-1); | ||
4445 | #endif | ||
4446 | #ifdef CONFIG_NUMA | 4495 | #ifdef CONFIG_NUMA |
4447 | zone->node = nid; | 4496 | zone->node = nid; |
4448 | zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) | 4497 | zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) |
@@ -4521,6 +4570,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
4521 | 4570 | ||
4522 | pgdat->node_id = nid; | 4571 | pgdat->node_id = nid; |
4523 | pgdat->node_start_pfn = node_start_pfn; | 4572 | pgdat->node_start_pfn = node_start_pfn; |
4573 | init_zone_allows_reclaim(nid); | ||
4524 | calculate_node_totalpages(pgdat, zones_size, zholes_size); | 4574 | calculate_node_totalpages(pgdat, zones_size, zholes_size); |
4525 | 4575 | ||
4526 | alloc_node_mem_map(pgdat); | 4576 | alloc_node_mem_map(pgdat); |
@@ -4879,7 +4929,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
4879 | zone_movable_pfn[i] << PAGE_SHIFT); | 4929 | zone_movable_pfn[i] << PAGE_SHIFT); |
4880 | } | 4930 | } |
4881 | 4931 | ||
4882 | /* Print out the early_node_map[] */ | 4932 | /* Print out the early node map */ |
4883 | printk("Early memory node ranges\n"); | 4933 | printk("Early memory node ranges\n"); |
4884 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) | 4934 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) |
4885 | printk(" node %3d: [mem %#010lx-%#010lx]\n", nid, | 4935 | printk(" node %3d: [mem %#010lx-%#010lx]\n", nid, |
@@ -5619,47 +5669,28 @@ static unsigned long pfn_max_align_up(unsigned long pfn) | |||
5619 | pageblock_nr_pages)); | 5669 | pageblock_nr_pages)); |
5620 | } | 5670 | } |
5621 | 5671 | ||
5622 | static struct page * | ||
5623 | __alloc_contig_migrate_alloc(struct page *page, unsigned long private, | ||
5624 | int **resultp) | ||
5625 | { | ||
5626 | gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; | ||
5627 | |||
5628 | if (PageHighMem(page)) | ||
5629 | gfp_mask |= __GFP_HIGHMEM; | ||
5630 | |||
5631 | return alloc_page(gfp_mask); | ||
5632 | } | ||
5633 | |||
5634 | /* [start, end) must belong to a single zone. */ | 5672 | /* [start, end) must belong to a single zone. */ |
5635 | static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) | 5673 | static int __alloc_contig_migrate_range(struct compact_control *cc, |
5674 | unsigned long start, unsigned long end) | ||
5636 | { | 5675 | { |
5637 | /* This function is based on compact_zone() from compaction.c. */ | 5676 | /* This function is based on compact_zone() from compaction.c. */ |
5638 | 5677 | unsigned long nr_reclaimed; | |
5639 | unsigned long pfn = start; | 5678 | unsigned long pfn = start; |
5640 | unsigned int tries = 0; | 5679 | unsigned int tries = 0; |
5641 | int ret = 0; | 5680 | int ret = 0; |
5642 | 5681 | ||
5643 | struct compact_control cc = { | ||
5644 | .nr_migratepages = 0, | ||
5645 | .order = -1, | ||
5646 | .zone = page_zone(pfn_to_page(start)), | ||
5647 | .sync = true, | ||
5648 | }; | ||
5649 | INIT_LIST_HEAD(&cc.migratepages); | ||
5650 | |||
5651 | migrate_prep_local(); | 5682 | migrate_prep_local(); |
5652 | 5683 | ||
5653 | while (pfn < end || !list_empty(&cc.migratepages)) { | 5684 | while (pfn < end || !list_empty(&cc->migratepages)) { |
5654 | if (fatal_signal_pending(current)) { | 5685 | if (fatal_signal_pending(current)) { |
5655 | ret = -EINTR; | 5686 | ret = -EINTR; |
5656 | break; | 5687 | break; |
5657 | } | 5688 | } |
5658 | 5689 | ||
5659 | if (list_empty(&cc.migratepages)) { | 5690 | if (list_empty(&cc->migratepages)) { |
5660 | cc.nr_migratepages = 0; | 5691 | cc->nr_migratepages = 0; |
5661 | pfn = isolate_migratepages_range(cc.zone, &cc, | 5692 | pfn = isolate_migratepages_range(cc->zone, cc, |
5662 | pfn, end); | 5693 | pfn, end, true); |
5663 | if (!pfn) { | 5694 | if (!pfn) { |
5664 | ret = -EINTR; | 5695 | ret = -EINTR; |
5665 | break; | 5696 | break; |
@@ -5670,12 +5701,16 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) | |||
5670 | break; | 5701 | break; |
5671 | } | 5702 | } |
5672 | 5703 | ||
5673 | ret = migrate_pages(&cc.migratepages, | 5704 | nr_reclaimed = reclaim_clean_pages_from_list(cc->zone, |
5674 | __alloc_contig_migrate_alloc, | 5705 | &cc->migratepages); |
5706 | cc->nr_migratepages -= nr_reclaimed; | ||
5707 | |||
5708 | ret = migrate_pages(&cc->migratepages, | ||
5709 | alloc_migrate_target, | ||
5675 | 0, false, MIGRATE_SYNC); | 5710 | 0, false, MIGRATE_SYNC); |
5676 | } | 5711 | } |
5677 | 5712 | ||
5678 | putback_lru_pages(&cc.migratepages); | 5713 | putback_lru_pages(&cc->migratepages); |
5679 | return ret > 0 ? 0 : ret; | 5714 | return ret > 0 ? 0 : ret; |
5680 | } | 5715 | } |
5681 | 5716 | ||
@@ -5754,6 +5789,15 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
5754 | unsigned long outer_start, outer_end; | 5789 | unsigned long outer_start, outer_end; |
5755 | int ret = 0, order; | 5790 | int ret = 0, order; |
5756 | 5791 | ||
5792 | struct compact_control cc = { | ||
5793 | .nr_migratepages = 0, | ||
5794 | .order = -1, | ||
5795 | .zone = page_zone(pfn_to_page(start)), | ||
5796 | .sync = true, | ||
5797 | .ignore_skip_hint = true, | ||
5798 | }; | ||
5799 | INIT_LIST_HEAD(&cc.migratepages); | ||
5800 | |||
5757 | /* | 5801 | /* |
5758 | * What we do here is we mark all pageblocks in range as | 5802 | * What we do here is we mark all pageblocks in range as |
5759 | * MIGRATE_ISOLATE. Because pageblock and max order pages may | 5803 | * MIGRATE_ISOLATE. Because pageblock and max order pages may |
@@ -5783,7 +5827,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
5783 | if (ret) | 5827 | if (ret) |
5784 | goto done; | 5828 | goto done; |
5785 | 5829 | ||
5786 | ret = __alloc_contig_migrate_range(start, end); | 5830 | ret = __alloc_contig_migrate_range(&cc, start, end); |
5787 | if (ret) | 5831 | if (ret) |
5788 | goto done; | 5832 | goto done; |
5789 | 5833 | ||
@@ -5832,7 +5876,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
5832 | __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); | 5876 | __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); |
5833 | 5877 | ||
5834 | /* Grab isolated pages from freelists. */ | 5878 | /* Grab isolated pages from freelists. */ |
5835 | outer_end = isolate_freepages_range(outer_start, end); | 5879 | outer_end = isolate_freepages_range(&cc, outer_start, end); |
5836 | if (!outer_end) { | 5880 | if (!outer_end) { |
5837 | ret = -EBUSY; | 5881 | ret = -EBUSY; |
5838 | goto done; | 5882 | goto done; |
@@ -5874,6 +5918,7 @@ static int __meminit __zone_pcp_update(void *data) | |||
5874 | local_irq_save(flags); | 5918 | local_irq_save(flags); |
5875 | if (pcp->count > 0) | 5919 | if (pcp->count > 0) |
5876 | free_pcppages_bulk(zone, pcp->count, pcp); | 5920 | free_pcppages_bulk(zone, pcp->count, pcp); |
5921 | drain_zonestat(zone, pset); | ||
5877 | setup_pageset(pset, batch); | 5922 | setup_pageset(pset, batch); |
5878 | local_irq_restore(flags); | 5923 | local_irq_restore(flags); |
5879 | } | 5924 | } |
@@ -5890,10 +5935,16 @@ void __meminit zone_pcp_update(struct zone *zone) | |||
5890 | void zone_pcp_reset(struct zone *zone) | 5935 | void zone_pcp_reset(struct zone *zone) |
5891 | { | 5936 | { |
5892 | unsigned long flags; | 5937 | unsigned long flags; |
5938 | int cpu; | ||
5939 | struct per_cpu_pageset *pset; | ||
5893 | 5940 | ||
5894 | /* avoid races with drain_pages() */ | 5941 | /* avoid races with drain_pages() */ |
5895 | local_irq_save(flags); | 5942 | local_irq_save(flags); |
5896 | if (zone->pageset != &boot_pageset) { | 5943 | if (zone->pageset != &boot_pageset) { |
5944 | for_each_online_cpu(cpu) { | ||
5945 | pset = per_cpu_ptr(zone->pageset, cpu); | ||
5946 | drain_zonestat(zone, pset); | ||
5947 | } | ||
5897 | free_percpu(zone->pageset); | 5948 | free_percpu(zone->pageset); |
5898 | zone->pageset = &boot_pageset; | 5949 | zone->pageset = &boot_pageset; |
5899 | } | 5950 | } |
@@ -6047,3 +6098,37 @@ void dump_page(struct page *page) | |||
6047 | dump_page_flags(page->flags); | 6098 | dump_page_flags(page->flags); |
6048 | mem_cgroup_print_bad_page(page); | 6099 | mem_cgroup_print_bad_page(page); |
6049 | } | 6100 | } |
6101 | |||
6102 | /* reset zone->present_pages */ | ||
6103 | void reset_zone_present_pages(void) | ||
6104 | { | ||
6105 | struct zone *z; | ||
6106 | int i, nid; | ||
6107 | |||
6108 | for_each_node_state(nid, N_HIGH_MEMORY) { | ||
6109 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
6110 | z = NODE_DATA(nid)->node_zones + i; | ||
6111 | z->present_pages = 0; | ||
6112 | } | ||
6113 | } | ||
6114 | } | ||
6115 | |||
6116 | /* calculate zone's present pages in buddy system */ | ||
6117 | void fixup_zone_present_pages(int nid, unsigned long start_pfn, | ||
6118 | unsigned long end_pfn) | ||
6119 | { | ||
6120 | struct zone *z; | ||
6121 | unsigned long zone_start_pfn, zone_end_pfn; | ||
6122 | int i; | ||
6123 | |||
6124 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
6125 | z = NODE_DATA(nid)->node_zones + i; | ||
6126 | zone_start_pfn = z->zone_start_pfn; | ||
6127 | zone_end_pfn = zone_start_pfn + z->spanned_pages; | ||
6128 | |||
6129 | /* if the two regions intersect */ | ||
6130 | if (!(zone_start_pfn >= end_pfn || zone_end_pfn <= start_pfn)) | ||
6131 | z->present_pages += min(end_pfn, zone_end_pfn) - | ||
6132 | max(start_pfn, zone_start_pfn); | ||
6133 | } | ||
6134 | } | ||