diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 317 |
1 files changed, 201 insertions, 116 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c13ea7538891..bb90971182bd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -558,7 +558,8 @@ static inline void __free_one_page(struct page *page, | |||
| 558 | if (page_is_guard(buddy)) { | 558 | if (page_is_guard(buddy)) { |
| 559 | clear_page_guard_flag(buddy); | 559 | clear_page_guard_flag(buddy); |
| 560 | set_page_private(page, 0); | 560 | set_page_private(page, 0); |
| 561 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); | 561 | __mod_zone_freepage_state(zone, 1 << order, |
| 562 | migratetype); | ||
| 562 | } else { | 563 | } else { |
| 563 | list_del(&buddy->lru); | 564 | list_del(&buddy->lru); |
| 564 | zone->free_area[order].nr_free--; | 565 | zone->free_area[order].nr_free--; |
| @@ -597,17 +598,6 @@ out: | |||
| 597 | zone->free_area[order].nr_free++; | 598 | zone->free_area[order].nr_free++; |
| 598 | } | 599 | } |
| 599 | 600 | ||
| 600 | /* | ||
| 601 | * free_page_mlock() -- clean up attempts to free and mlocked() page. | ||
| 602 | * Page should not be on lru, so no need to fix that up. | ||
| 603 | * free_pages_check() will verify... | ||
| 604 | */ | ||
| 605 | static inline void free_page_mlock(struct page *page) | ||
| 606 | { | ||
| 607 | __dec_zone_page_state(page, NR_MLOCK); | ||
| 608 | __count_vm_event(UNEVICTABLE_MLOCKFREED); | ||
| 609 | } | ||
| 610 | |||
| 611 | static inline int free_pages_check(struct page *page) | 601 | static inline int free_pages_check(struct page *page) |
| 612 | { | 602 | { |
| 613 | if (unlikely(page_mapcount(page) | | 603 | if (unlikely(page_mapcount(page) | |
| @@ -668,12 +658,17 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
| 668 | batch_free = to_free; | 658 | batch_free = to_free; |
| 669 | 659 | ||
| 670 | do { | 660 | do { |
| 661 | int mt; /* migratetype of the to-be-freed page */ | ||
| 662 | |||
| 671 | page = list_entry(list->prev, struct page, lru); | 663 | page = list_entry(list->prev, struct page, lru); |
| 672 | /* must delete as __free_one_page list manipulates */ | 664 | /* must delete as __free_one_page list manipulates */ |
| 673 | list_del(&page->lru); | 665 | list_del(&page->lru); |
| 666 | mt = get_freepage_migratetype(page); | ||
| 674 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ | 667 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ |
| 675 | __free_one_page(page, zone, 0, page_private(page)); | 668 | __free_one_page(page, zone, 0, mt); |
| 676 | trace_mm_page_pcpu_drain(page, 0, page_private(page)); | 669 | trace_mm_page_pcpu_drain(page, 0, mt); |
| 670 | if (is_migrate_cma(mt)) | ||
| 671 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1); | ||
| 677 | } while (--to_free && --batch_free && !list_empty(list)); | 672 | } while (--to_free && --batch_free && !list_empty(list)); |
| 678 | } | 673 | } |
| 679 | __mod_zone_page_state(zone, NR_FREE_PAGES, count); | 674 | __mod_zone_page_state(zone, NR_FREE_PAGES, count); |
| @@ -688,7 +683,8 @@ static void free_one_page(struct zone *zone, struct page *page, int order, | |||
| 688 | zone->pages_scanned = 0; | 683 | zone->pages_scanned = 0; |
| 689 | 684 | ||
| 690 | __free_one_page(page, zone, order, migratetype); | 685 | __free_one_page(page, zone, order, migratetype); |
| 691 | __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); | 686 | if (unlikely(migratetype != MIGRATE_ISOLATE)) |
| 687 | __mod_zone_freepage_state(zone, 1 << order, migratetype); | ||
| 692 | spin_unlock(&zone->lock); | 688 | spin_unlock(&zone->lock); |
| 693 | } | 689 | } |
| 694 | 690 | ||
| @@ -721,17 +717,16 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
| 721 | static void __free_pages_ok(struct page *page, unsigned int order) | 717 | static void __free_pages_ok(struct page *page, unsigned int order) |
| 722 | { | 718 | { |
| 723 | unsigned long flags; | 719 | unsigned long flags; |
| 724 | int wasMlocked = __TestClearPageMlocked(page); | 720 | int migratetype; |
| 725 | 721 | ||
| 726 | if (!free_pages_prepare(page, order)) | 722 | if (!free_pages_prepare(page, order)) |
| 727 | return; | 723 | return; |
| 728 | 724 | ||
| 729 | local_irq_save(flags); | 725 | local_irq_save(flags); |
| 730 | if (unlikely(wasMlocked)) | ||
| 731 | free_page_mlock(page); | ||
| 732 | __count_vm_events(PGFREE, 1 << order); | 726 | __count_vm_events(PGFREE, 1 << order); |
| 733 | free_one_page(page_zone(page), page, order, | 727 | migratetype = get_pageblock_migratetype(page); |
| 734 | get_pageblock_migratetype(page)); | 728 | set_freepage_migratetype(page, migratetype); |
| 729 | free_one_page(page_zone(page), page, order, migratetype); | ||
| 735 | local_irq_restore(flags); | 730 | local_irq_restore(flags); |
| 736 | } | 731 | } |
| 737 | 732 | ||
| @@ -811,7 +806,8 @@ static inline void expand(struct zone *zone, struct page *page, | |||
| 811 | set_page_guard_flag(&page[size]); | 806 | set_page_guard_flag(&page[size]); |
| 812 | set_page_private(&page[size], high); | 807 | set_page_private(&page[size], high); |
| 813 | /* Guard pages are not available for any usage */ | 808 | /* Guard pages are not available for any usage */ |
| 814 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high)); | 809 | __mod_zone_freepage_state(zone, -(1 << high), |
| 810 | migratetype); | ||
| 815 | continue; | 811 | continue; |
| 816 | } | 812 | } |
| 817 | #endif | 813 | #endif |
| @@ -915,7 +911,7 @@ static int fallbacks[MIGRATE_TYPES][4] = { | |||
| 915 | * Note that start_page and end_pages are not aligned on a pageblock | 911 | * Note that start_page and end_pages are not aligned on a pageblock |
| 916 | * boundary. If alignment is required, use move_freepages_block() | 912 | * boundary. If alignment is required, use move_freepages_block() |
| 917 | */ | 913 | */ |
| 918 | static int move_freepages(struct zone *zone, | 914 | int move_freepages(struct zone *zone, |
| 919 | struct page *start_page, struct page *end_page, | 915 | struct page *start_page, struct page *end_page, |
| 920 | int migratetype) | 916 | int migratetype) |
| 921 | { | 917 | { |
| @@ -951,6 +947,7 @@ static int move_freepages(struct zone *zone, | |||
| 951 | order = page_order(page); | 947 | order = page_order(page); |
| 952 | list_move(&page->lru, | 948 | list_move(&page->lru, |
| 953 | &zone->free_area[order].free_list[migratetype]); | 949 | &zone->free_area[order].free_list[migratetype]); |
| 950 | set_freepage_migratetype(page, migratetype); | ||
| 954 | page += 1 << order; | 951 | page += 1 << order; |
| 955 | pages_moved += 1 << order; | 952 | pages_moved += 1 << order; |
| 956 | } | 953 | } |
| @@ -1135,8 +1132,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
| 1135 | if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) | 1132 | if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) |
| 1136 | mt = migratetype; | 1133 | mt = migratetype; |
| 1137 | } | 1134 | } |
| 1138 | set_page_private(page, mt); | 1135 | set_freepage_migratetype(page, mt); |
| 1139 | list = &page->lru; | 1136 | list = &page->lru; |
| 1137 | if (is_migrate_cma(mt)) | ||
| 1138 | __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, | ||
| 1139 | -(1 << order)); | ||
| 1140 | } | 1140 | } |
| 1141 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); | 1141 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); |
| 1142 | spin_unlock(&zone->lock); | 1142 | spin_unlock(&zone->lock); |
| @@ -1296,16 +1296,13 @@ void free_hot_cold_page(struct page *page, int cold) | |||
| 1296 | struct per_cpu_pages *pcp; | 1296 | struct per_cpu_pages *pcp; |
| 1297 | unsigned long flags; | 1297 | unsigned long flags; |
| 1298 | int migratetype; | 1298 | int migratetype; |
| 1299 | int wasMlocked = __TestClearPageMlocked(page); | ||
| 1300 | 1299 | ||
| 1301 | if (!free_pages_prepare(page, 0)) | 1300 | if (!free_pages_prepare(page, 0)) |
| 1302 | return; | 1301 | return; |
| 1303 | 1302 | ||
| 1304 | migratetype = get_pageblock_migratetype(page); | 1303 | migratetype = get_pageblock_migratetype(page); |
| 1305 | set_page_private(page, migratetype); | 1304 | set_freepage_migratetype(page, migratetype); |
| 1306 | local_irq_save(flags); | 1305 | local_irq_save(flags); |
| 1307 | if (unlikely(wasMlocked)) | ||
| 1308 | free_page_mlock(page); | ||
| 1309 | __count_vm_event(PGFREE); | 1306 | __count_vm_event(PGFREE); |
| 1310 | 1307 | ||
| 1311 | /* | 1308 | /* |
| @@ -1380,20 +1377,16 @@ void split_page(struct page *page, unsigned int order) | |||
| 1380 | } | 1377 | } |
| 1381 | 1378 | ||
| 1382 | /* | 1379 | /* |
| 1383 | * Similar to split_page except the page is already free. As this is only | 1380 | * Similar to the split_page family of functions except that the page |
| 1384 | * being used for migration, the migratetype of the block also changes. | 1381 | * required at the given order and being isolated now to prevent races |
| 1385 | * As this is called with interrupts disabled, the caller is responsible | 1382 | * with parallel allocators |
| 1386 | * for calling arch_alloc_page() and kernel_map_page() after interrupts | ||
| 1387 | * are enabled. | ||
| 1388 | * | ||
| 1389 | * Note: this is probably too low level an operation for use in drivers. | ||
| 1390 | * Please consult with lkml before using this in your driver. | ||
| 1391 | */ | 1383 | */ |
| 1392 | int split_free_page(struct page *page) | 1384 | int capture_free_page(struct page *page, int alloc_order, int migratetype) |
| 1393 | { | 1385 | { |
| 1394 | unsigned int order; | 1386 | unsigned int order; |
| 1395 | unsigned long watermark; | 1387 | unsigned long watermark; |
| 1396 | struct zone *zone; | 1388 | struct zone *zone; |
| 1389 | int mt; | ||
| 1397 | 1390 | ||
| 1398 | BUG_ON(!PageBuddy(page)); | 1391 | BUG_ON(!PageBuddy(page)); |
| 1399 | 1392 | ||
| @@ -1409,12 +1402,16 @@ int split_free_page(struct page *page) | |||
| 1409 | list_del(&page->lru); | 1402 | list_del(&page->lru); |
| 1410 | zone->free_area[order].nr_free--; | 1403 | zone->free_area[order].nr_free--; |
| 1411 | rmv_page_order(page); | 1404 | rmv_page_order(page); |
| 1412 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order)); | ||
| 1413 | 1405 | ||
| 1414 | /* Split into individual pages */ | 1406 | mt = get_pageblock_migratetype(page); |
| 1415 | set_page_refcounted(page); | 1407 | if (unlikely(mt != MIGRATE_ISOLATE)) |
| 1416 | split_page(page, order); | 1408 | __mod_zone_freepage_state(zone, -(1UL << order), mt); |
| 1417 | 1409 | ||
| 1410 | if (alloc_order != order) | ||
| 1411 | expand(zone, page, alloc_order, order, | ||
| 1412 | &zone->free_area[order], migratetype); | ||
| 1413 | |||
| 1414 | /* Set the pageblock if the captured page is at least a pageblock */ | ||
| 1418 | if (order >= pageblock_order - 1) { | 1415 | if (order >= pageblock_order - 1) { |
| 1419 | struct page *endpage = page + (1 << order) - 1; | 1416 | struct page *endpage = page + (1 << order) - 1; |
| 1420 | for (; page < endpage; page += pageblock_nr_pages) { | 1417 | for (; page < endpage; page += pageblock_nr_pages) { |
| @@ -1425,7 +1422,35 @@ int split_free_page(struct page *page) | |||
| 1425 | } | 1422 | } |
| 1426 | } | 1423 | } |
| 1427 | 1424 | ||
| 1428 | return 1 << order; | 1425 | return 1UL << order; |
| 1426 | } | ||
| 1427 | |||
| 1428 | /* | ||
| 1429 | * Similar to split_page except the page is already free. As this is only | ||
| 1430 | * being used for migration, the migratetype of the block also changes. | ||
| 1431 | * As this is called with interrupts disabled, the caller is responsible | ||
| 1432 | * for calling arch_alloc_page() and kernel_map_page() after interrupts | ||
| 1433 | * are enabled. | ||
| 1434 | * | ||
| 1435 | * Note: this is probably too low level an operation for use in drivers. | ||
| 1436 | * Please consult with lkml before using this in your driver. | ||
| 1437 | */ | ||
| 1438 | int split_free_page(struct page *page) | ||
| 1439 | { | ||
| 1440 | unsigned int order; | ||
| 1441 | int nr_pages; | ||
| 1442 | |||
| 1443 | BUG_ON(!PageBuddy(page)); | ||
| 1444 | order = page_order(page); | ||
| 1445 | |||
| 1446 | nr_pages = capture_free_page(page, order, 0); | ||
| 1447 | if (!nr_pages) | ||
| 1448 | return 0; | ||
| 1449 | |||
| 1450 | /* Split into individual pages */ | ||
| 1451 | set_page_refcounted(page); | ||
| 1452 | split_page(page, order); | ||
| 1453 | return nr_pages; | ||
| 1429 | } | 1454 | } |
| 1430 | 1455 | ||
| 1431 | /* | 1456 | /* |
| @@ -1484,7 +1509,8 @@ again: | |||
| 1484 | spin_unlock(&zone->lock); | 1509 | spin_unlock(&zone->lock); |
| 1485 | if (!page) | 1510 | if (!page) |
| 1486 | goto failed; | 1511 | goto failed; |
| 1487 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); | 1512 | __mod_zone_freepage_state(zone, -(1 << order), |
| 1513 | get_pageblock_migratetype(page)); | ||
| 1488 | } | 1514 | } |
| 1489 | 1515 | ||
| 1490 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1516 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
| @@ -1501,19 +1527,6 @@ failed: | |||
| 1501 | return NULL; | 1527 | return NULL; |
| 1502 | } | 1528 | } |
| 1503 | 1529 | ||
| 1504 | /* The ALLOC_WMARK bits are used as an index to zone->watermark */ | ||
| 1505 | #define ALLOC_WMARK_MIN WMARK_MIN | ||
| 1506 | #define ALLOC_WMARK_LOW WMARK_LOW | ||
| 1507 | #define ALLOC_WMARK_HIGH WMARK_HIGH | ||
| 1508 | #define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */ | ||
| 1509 | |||
| 1510 | /* Mask to get the watermark bits */ | ||
| 1511 | #define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1) | ||
| 1512 | |||
| 1513 | #define ALLOC_HARDER 0x10 /* try to alloc harder */ | ||
| 1514 | #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ | ||
| 1515 | #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ | ||
| 1516 | |||
| 1517 | #ifdef CONFIG_FAIL_PAGE_ALLOC | 1530 | #ifdef CONFIG_FAIL_PAGE_ALLOC |
| 1518 | 1531 | ||
| 1519 | static struct { | 1532 | static struct { |
| @@ -1608,7 +1621,11 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
| 1608 | min -= min / 2; | 1621 | min -= min / 2; |
| 1609 | if (alloc_flags & ALLOC_HARDER) | 1622 | if (alloc_flags & ALLOC_HARDER) |
| 1610 | min -= min / 4; | 1623 | min -= min / 4; |
| 1611 | 1624 | #ifdef CONFIG_CMA | |
| 1625 | /* If allocation can't use CMA areas don't use free CMA pages */ | ||
| 1626 | if (!(alloc_flags & ALLOC_CMA)) | ||
| 1627 | free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES); | ||
| 1628 | #endif | ||
| 1612 | if (free_pages <= min + lowmem_reserve) | 1629 | if (free_pages <= min + lowmem_reserve) |
| 1613 | return false; | 1630 | return false; |
| 1614 | for (o = 0; o < order; o++) { | 1631 | for (o = 0; o < order; o++) { |
| @@ -1782,6 +1799,22 @@ static void zlc_clear_zones_full(struct zonelist *zonelist) | |||
| 1782 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); | 1799 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); |
| 1783 | } | 1800 | } |
| 1784 | 1801 | ||
| 1802 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) | ||
| 1803 | { | ||
| 1804 | return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes); | ||
| 1805 | } | ||
| 1806 | |||
| 1807 | static void __paginginit init_zone_allows_reclaim(int nid) | ||
| 1808 | { | ||
| 1809 | int i; | ||
| 1810 | |||
| 1811 | for_each_online_node(i) | ||
| 1812 | if (node_distance(nid, i) <= RECLAIM_DISTANCE) { | ||
| 1813 | node_set(i, NODE_DATA(nid)->reclaim_nodes); | ||
| 1814 | zone_reclaim_mode = 1; | ||
| 1815 | } | ||
| 1816 | } | ||
| 1817 | |||
| 1785 | #else /* CONFIG_NUMA */ | 1818 | #else /* CONFIG_NUMA */ |
| 1786 | 1819 | ||
| 1787 | static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) | 1820 | static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) |
| @@ -1802,6 +1835,15 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) | |||
| 1802 | static void zlc_clear_zones_full(struct zonelist *zonelist) | 1835 | static void zlc_clear_zones_full(struct zonelist *zonelist) |
| 1803 | { | 1836 | { |
| 1804 | } | 1837 | } |
| 1838 | |||
| 1839 | static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) | ||
| 1840 | { | ||
| 1841 | return true; | ||
| 1842 | } | ||
| 1843 | |||
| 1844 | static inline void init_zone_allows_reclaim(int nid) | ||
| 1845 | { | ||
| 1846 | } | ||
| 1805 | #endif /* CONFIG_NUMA */ | 1847 | #endif /* CONFIG_NUMA */ |
| 1806 | 1848 | ||
| 1807 | /* | 1849 | /* |
| @@ -1886,7 +1928,8 @@ zonelist_scan: | |||
| 1886 | did_zlc_setup = 1; | 1928 | did_zlc_setup = 1; |
| 1887 | } | 1929 | } |
| 1888 | 1930 | ||
| 1889 | if (zone_reclaim_mode == 0) | 1931 | if (zone_reclaim_mode == 0 || |
| 1932 | !zone_allows_reclaim(preferred_zone, zone)) | ||
| 1890 | goto this_zone_full; | 1933 | goto this_zone_full; |
| 1891 | 1934 | ||
| 1892 | /* | 1935 | /* |
| @@ -2105,7 +2148,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
| 2105 | bool *contended_compaction, bool *deferred_compaction, | 2148 | bool *contended_compaction, bool *deferred_compaction, |
| 2106 | unsigned long *did_some_progress) | 2149 | unsigned long *did_some_progress) |
| 2107 | { | 2150 | { |
| 2108 | struct page *page; | 2151 | struct page *page = NULL; |
| 2109 | 2152 | ||
| 2110 | if (!order) | 2153 | if (!order) |
| 2111 | return NULL; | 2154 | return NULL; |
| @@ -2118,10 +2161,16 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
| 2118 | current->flags |= PF_MEMALLOC; | 2161 | current->flags |= PF_MEMALLOC; |
| 2119 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | 2162 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, |
| 2120 | nodemask, sync_migration, | 2163 | nodemask, sync_migration, |
| 2121 | contended_compaction); | 2164 | contended_compaction, &page); |
| 2122 | current->flags &= ~PF_MEMALLOC; | 2165 | current->flags &= ~PF_MEMALLOC; |
| 2123 | if (*did_some_progress != COMPACT_SKIPPED) { | ||
| 2124 | 2166 | ||
| 2167 | /* If compaction captured a page, prep and use it */ | ||
| 2168 | if (page) { | ||
| 2169 | prep_new_page(page, order, gfp_mask); | ||
| 2170 | goto got_page; | ||
| 2171 | } | ||
| 2172 | |||
| 2173 | if (*did_some_progress != COMPACT_SKIPPED) { | ||
| 2125 | /* Page migration frees to the PCP lists but we want merging */ | 2174 | /* Page migration frees to the PCP lists but we want merging */ |
| 2126 | drain_pages(get_cpu()); | 2175 | drain_pages(get_cpu()); |
| 2127 | put_cpu(); | 2176 | put_cpu(); |
| @@ -2131,6 +2180,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
| 2131 | alloc_flags & ~ALLOC_NO_WATERMARKS, | 2180 | alloc_flags & ~ALLOC_NO_WATERMARKS, |
| 2132 | preferred_zone, migratetype); | 2181 | preferred_zone, migratetype); |
| 2133 | if (page) { | 2182 | if (page) { |
| 2183 | got_page: | ||
| 2184 | preferred_zone->compact_blockskip_flush = false; | ||
| 2134 | preferred_zone->compact_considered = 0; | 2185 | preferred_zone->compact_considered = 0; |
| 2135 | preferred_zone->compact_defer_shift = 0; | 2186 | preferred_zone->compact_defer_shift = 0; |
| 2136 | if (order >= preferred_zone->compact_order_failed) | 2187 | if (order >= preferred_zone->compact_order_failed) |
| @@ -2315,7 +2366,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask) | |||
| 2315 | unlikely(test_thread_flag(TIF_MEMDIE)))) | 2366 | unlikely(test_thread_flag(TIF_MEMDIE)))) |
| 2316 | alloc_flags |= ALLOC_NO_WATERMARKS; | 2367 | alloc_flags |= ALLOC_NO_WATERMARKS; |
| 2317 | } | 2368 | } |
| 2318 | 2369 | #ifdef CONFIG_CMA | |
| 2370 | if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) | ||
| 2371 | alloc_flags |= ALLOC_CMA; | ||
| 2372 | #endif | ||
| 2319 | return alloc_flags; | 2373 | return alloc_flags; |
| 2320 | } | 2374 | } |
| 2321 | 2375 | ||
| @@ -2362,9 +2416,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
| 2362 | goto nopage; | 2416 | goto nopage; |
| 2363 | 2417 | ||
| 2364 | restart: | 2418 | restart: |
| 2365 | if (!(gfp_mask & __GFP_NO_KSWAPD)) | 2419 | wake_all_kswapd(order, zonelist, high_zoneidx, |
| 2366 | wake_all_kswapd(order, zonelist, high_zoneidx, | 2420 | zone_idx(preferred_zone)); |
| 2367 | zone_idx(preferred_zone)); | ||
| 2368 | 2421 | ||
| 2369 | /* | 2422 | /* |
| 2370 | * OK, we're below the kswapd watermark and have kicked background | 2423 | * OK, we're below the kswapd watermark and have kicked background |
| @@ -2441,7 +2494,7 @@ rebalance: | |||
| 2441 | * system then fail the allocation instead of entering direct reclaim. | 2494 | * system then fail the allocation instead of entering direct reclaim. |
| 2442 | */ | 2495 | */ |
| 2443 | if ((deferred_compaction || contended_compaction) && | 2496 | if ((deferred_compaction || contended_compaction) && |
| 2444 | (gfp_mask & __GFP_NO_KSWAPD)) | 2497 | (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) |
| 2445 | goto nopage; | 2498 | goto nopage; |
| 2446 | 2499 | ||
| 2447 | /* Try direct reclaim and then allocating */ | 2500 | /* Try direct reclaim and then allocating */ |
| @@ -2541,6 +2594,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
| 2541 | struct page *page = NULL; | 2594 | struct page *page = NULL; |
| 2542 | int migratetype = allocflags_to_migratetype(gfp_mask); | 2595 | int migratetype = allocflags_to_migratetype(gfp_mask); |
| 2543 | unsigned int cpuset_mems_cookie; | 2596 | unsigned int cpuset_mems_cookie; |
| 2597 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; | ||
| 2544 | 2598 | ||
| 2545 | gfp_mask &= gfp_allowed_mask; | 2599 | gfp_mask &= gfp_allowed_mask; |
| 2546 | 2600 | ||
| @@ -2569,9 +2623,13 @@ retry_cpuset: | |||
| 2569 | if (!preferred_zone) | 2623 | if (!preferred_zone) |
| 2570 | goto out; | 2624 | goto out; |
| 2571 | 2625 | ||
| 2626 | #ifdef CONFIG_CMA | ||
| 2627 | if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) | ||
| 2628 | alloc_flags |= ALLOC_CMA; | ||
| 2629 | #endif | ||
| 2572 | /* First allocation attempt */ | 2630 | /* First allocation attempt */ |
| 2573 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, | 2631 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, |
| 2574 | zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, | 2632 | zonelist, high_zoneidx, alloc_flags, |
| 2575 | preferred_zone, migratetype); | 2633 | preferred_zone, migratetype); |
| 2576 | if (unlikely(!page)) | 2634 | if (unlikely(!page)) |
| 2577 | page = __alloc_pages_slowpath(gfp_mask, order, | 2635 | page = __alloc_pages_slowpath(gfp_mask, order, |
| @@ -2852,7 +2910,8 @@ void show_free_areas(unsigned int filter) | |||
| 2852 | " unevictable:%lu" | 2910 | " unevictable:%lu" |
| 2853 | " dirty:%lu writeback:%lu unstable:%lu\n" | 2911 | " dirty:%lu writeback:%lu unstable:%lu\n" |
| 2854 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" | 2912 | " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" |
| 2855 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", | 2913 | " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n" |
| 2914 | " free_cma:%lu\n", | ||
| 2856 | global_page_state(NR_ACTIVE_ANON), | 2915 | global_page_state(NR_ACTIVE_ANON), |
| 2857 | global_page_state(NR_INACTIVE_ANON), | 2916 | global_page_state(NR_INACTIVE_ANON), |
| 2858 | global_page_state(NR_ISOLATED_ANON), | 2917 | global_page_state(NR_ISOLATED_ANON), |
| @@ -2869,7 +2928,8 @@ void show_free_areas(unsigned int filter) | |||
| 2869 | global_page_state(NR_FILE_MAPPED), | 2928 | global_page_state(NR_FILE_MAPPED), |
| 2870 | global_page_state(NR_SHMEM), | 2929 | global_page_state(NR_SHMEM), |
| 2871 | global_page_state(NR_PAGETABLE), | 2930 | global_page_state(NR_PAGETABLE), |
| 2872 | global_page_state(NR_BOUNCE)); | 2931 | global_page_state(NR_BOUNCE), |
| 2932 | global_page_state(NR_FREE_CMA_PAGES)); | ||
| 2873 | 2933 | ||
| 2874 | for_each_populated_zone(zone) { | 2934 | for_each_populated_zone(zone) { |
| 2875 | int i; | 2935 | int i; |
| @@ -2901,6 +2961,7 @@ void show_free_areas(unsigned int filter) | |||
| 2901 | " pagetables:%lukB" | 2961 | " pagetables:%lukB" |
| 2902 | " unstable:%lukB" | 2962 | " unstable:%lukB" |
| 2903 | " bounce:%lukB" | 2963 | " bounce:%lukB" |
| 2964 | " free_cma:%lukB" | ||
| 2904 | " writeback_tmp:%lukB" | 2965 | " writeback_tmp:%lukB" |
| 2905 | " pages_scanned:%lu" | 2966 | " pages_scanned:%lu" |
| 2906 | " all_unreclaimable? %s" | 2967 | " all_unreclaimable? %s" |
| @@ -2930,6 +2991,7 @@ void show_free_areas(unsigned int filter) | |||
| 2930 | K(zone_page_state(zone, NR_PAGETABLE)), | 2991 | K(zone_page_state(zone, NR_PAGETABLE)), |
| 2931 | K(zone_page_state(zone, NR_UNSTABLE_NFS)), | 2992 | K(zone_page_state(zone, NR_UNSTABLE_NFS)), |
| 2932 | K(zone_page_state(zone, NR_BOUNCE)), | 2993 | K(zone_page_state(zone, NR_BOUNCE)), |
| 2994 | K(zone_page_state(zone, NR_FREE_CMA_PAGES)), | ||
| 2933 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), | 2995 | K(zone_page_state(zone, NR_WRITEBACK_TEMP)), |
| 2934 | zone->pages_scanned, | 2996 | zone->pages_scanned, |
| 2935 | (zone->all_unreclaimable ? "yes" : "no") | 2997 | (zone->all_unreclaimable ? "yes" : "no") |
| @@ -3328,21 +3390,13 @@ static void build_zonelists(pg_data_t *pgdat) | |||
| 3328 | j = 0; | 3390 | j = 0; |
| 3329 | 3391 | ||
| 3330 | while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { | 3392 | while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { |
| 3331 | int distance = node_distance(local_node, node); | ||
| 3332 | |||
| 3333 | /* | ||
| 3334 | * If another node is sufficiently far away then it is better | ||
| 3335 | * to reclaim pages in a zone before going off node. | ||
| 3336 | */ | ||
| 3337 | if (distance > RECLAIM_DISTANCE) | ||
| 3338 | zone_reclaim_mode = 1; | ||
| 3339 | |||
| 3340 | /* | 3393 | /* |
| 3341 | * We don't want to pressure a particular node. | 3394 | * We don't want to pressure a particular node. |
| 3342 | * So adding penalty to the first node in same | 3395 | * So adding penalty to the first node in same |
| 3343 | * distance group to make it round-robin. | 3396 | * distance group to make it round-robin. |
| 3344 | */ | 3397 | */ |
| 3345 | if (distance != node_distance(local_node, prev_node)) | 3398 | if (node_distance(local_node, node) != |
| 3399 | node_distance(local_node, prev_node)) | ||
| 3346 | node_load[node] = load; | 3400 | node_load[node] = load; |
| 3347 | 3401 | ||
| 3348 | prev_node = node; | 3402 | prev_node = node; |
| @@ -4438,11 +4492,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
| 4438 | 4492 | ||
| 4439 | zone->spanned_pages = size; | 4493 | zone->spanned_pages = size; |
| 4440 | zone->present_pages = realsize; | 4494 | zone->present_pages = realsize; |
| 4441 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA | ||
| 4442 | zone->compact_cached_free_pfn = zone->zone_start_pfn + | ||
| 4443 | zone->spanned_pages; | ||
| 4444 | zone->compact_cached_free_pfn &= ~(pageblock_nr_pages-1); | ||
| 4445 | #endif | ||
| 4446 | #ifdef CONFIG_NUMA | 4495 | #ifdef CONFIG_NUMA |
| 4447 | zone->node = nid; | 4496 | zone->node = nid; |
| 4448 | zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) | 4497 | zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) |
| @@ -4521,6 +4570,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
| 4521 | 4570 | ||
| 4522 | pgdat->node_id = nid; | 4571 | pgdat->node_id = nid; |
| 4523 | pgdat->node_start_pfn = node_start_pfn; | 4572 | pgdat->node_start_pfn = node_start_pfn; |
| 4573 | init_zone_allows_reclaim(nid); | ||
| 4524 | calculate_node_totalpages(pgdat, zones_size, zholes_size); | 4574 | calculate_node_totalpages(pgdat, zones_size, zholes_size); |
| 4525 | 4575 | ||
| 4526 | alloc_node_mem_map(pgdat); | 4576 | alloc_node_mem_map(pgdat); |
| @@ -4879,7 +4929,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) | |||
| 4879 | zone_movable_pfn[i] << PAGE_SHIFT); | 4929 | zone_movable_pfn[i] << PAGE_SHIFT); |
| 4880 | } | 4930 | } |
| 4881 | 4931 | ||
| 4882 | /* Print out the early_node_map[] */ | 4932 | /* Print out the early node map */ |
| 4883 | printk("Early memory node ranges\n"); | 4933 | printk("Early memory node ranges\n"); |
| 4884 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) | 4934 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) |
| 4885 | printk(" node %3d: [mem %#010lx-%#010lx]\n", nid, | 4935 | printk(" node %3d: [mem %#010lx-%#010lx]\n", nid, |
| @@ -5619,47 +5669,28 @@ static unsigned long pfn_max_align_up(unsigned long pfn) | |||
| 5619 | pageblock_nr_pages)); | 5669 | pageblock_nr_pages)); |
| 5620 | } | 5670 | } |
| 5621 | 5671 | ||
| 5622 | static struct page * | ||
| 5623 | __alloc_contig_migrate_alloc(struct page *page, unsigned long private, | ||
| 5624 | int **resultp) | ||
| 5625 | { | ||
| 5626 | gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; | ||
| 5627 | |||
| 5628 | if (PageHighMem(page)) | ||
| 5629 | gfp_mask |= __GFP_HIGHMEM; | ||
| 5630 | |||
| 5631 | return alloc_page(gfp_mask); | ||
| 5632 | } | ||
| 5633 | |||
| 5634 | /* [start, end) must belong to a single zone. */ | 5672 | /* [start, end) must belong to a single zone. */ |
| 5635 | static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) | 5673 | static int __alloc_contig_migrate_range(struct compact_control *cc, |
| 5674 | unsigned long start, unsigned long end) | ||
| 5636 | { | 5675 | { |
| 5637 | /* This function is based on compact_zone() from compaction.c. */ | 5676 | /* This function is based on compact_zone() from compaction.c. */ |
| 5638 | 5677 | unsigned long nr_reclaimed; | |
| 5639 | unsigned long pfn = start; | 5678 | unsigned long pfn = start; |
| 5640 | unsigned int tries = 0; | 5679 | unsigned int tries = 0; |
| 5641 | int ret = 0; | 5680 | int ret = 0; |
| 5642 | 5681 | ||
| 5643 | struct compact_control cc = { | ||
| 5644 | .nr_migratepages = 0, | ||
| 5645 | .order = -1, | ||
| 5646 | .zone = page_zone(pfn_to_page(start)), | ||
| 5647 | .sync = true, | ||
| 5648 | }; | ||
| 5649 | INIT_LIST_HEAD(&cc.migratepages); | ||
| 5650 | |||
| 5651 | migrate_prep_local(); | 5682 | migrate_prep_local(); |
| 5652 | 5683 | ||
| 5653 | while (pfn < end || !list_empty(&cc.migratepages)) { | 5684 | while (pfn < end || !list_empty(&cc->migratepages)) { |
| 5654 | if (fatal_signal_pending(current)) { | 5685 | if (fatal_signal_pending(current)) { |
| 5655 | ret = -EINTR; | 5686 | ret = -EINTR; |
| 5656 | break; | 5687 | break; |
| 5657 | } | 5688 | } |
| 5658 | 5689 | ||
| 5659 | if (list_empty(&cc.migratepages)) { | 5690 | if (list_empty(&cc->migratepages)) { |
| 5660 | cc.nr_migratepages = 0; | 5691 | cc->nr_migratepages = 0; |
| 5661 | pfn = isolate_migratepages_range(cc.zone, &cc, | 5692 | pfn = isolate_migratepages_range(cc->zone, cc, |
| 5662 | pfn, end); | 5693 | pfn, end, true); |
| 5663 | if (!pfn) { | 5694 | if (!pfn) { |
| 5664 | ret = -EINTR; | 5695 | ret = -EINTR; |
| 5665 | break; | 5696 | break; |
| @@ -5670,12 +5701,16 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) | |||
| 5670 | break; | 5701 | break; |
| 5671 | } | 5702 | } |
| 5672 | 5703 | ||
| 5673 | ret = migrate_pages(&cc.migratepages, | 5704 | nr_reclaimed = reclaim_clean_pages_from_list(cc->zone, |
| 5674 | __alloc_contig_migrate_alloc, | 5705 | &cc->migratepages); |
| 5706 | cc->nr_migratepages -= nr_reclaimed; | ||
| 5707 | |||
| 5708 | ret = migrate_pages(&cc->migratepages, | ||
| 5709 | alloc_migrate_target, | ||
| 5675 | 0, false, MIGRATE_SYNC); | 5710 | 0, false, MIGRATE_SYNC); |
| 5676 | } | 5711 | } |
| 5677 | 5712 | ||
| 5678 | putback_lru_pages(&cc.migratepages); | 5713 | putback_lru_pages(&cc->migratepages); |
| 5679 | return ret > 0 ? 0 : ret; | 5714 | return ret > 0 ? 0 : ret; |
| 5680 | } | 5715 | } |
| 5681 | 5716 | ||
| @@ -5754,6 +5789,15 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
| 5754 | unsigned long outer_start, outer_end; | 5789 | unsigned long outer_start, outer_end; |
| 5755 | int ret = 0, order; | 5790 | int ret = 0, order; |
| 5756 | 5791 | ||
| 5792 | struct compact_control cc = { | ||
| 5793 | .nr_migratepages = 0, | ||
| 5794 | .order = -1, | ||
| 5795 | .zone = page_zone(pfn_to_page(start)), | ||
| 5796 | .sync = true, | ||
| 5797 | .ignore_skip_hint = true, | ||
| 5798 | }; | ||
| 5799 | INIT_LIST_HEAD(&cc.migratepages); | ||
| 5800 | |||
| 5757 | /* | 5801 | /* |
| 5758 | * What we do here is we mark all pageblocks in range as | 5802 | * What we do here is we mark all pageblocks in range as |
| 5759 | * MIGRATE_ISOLATE. Because pageblock and max order pages may | 5803 | * MIGRATE_ISOLATE. Because pageblock and max order pages may |
| @@ -5783,7 +5827,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
| 5783 | if (ret) | 5827 | if (ret) |
| 5784 | goto done; | 5828 | goto done; |
| 5785 | 5829 | ||
| 5786 | ret = __alloc_contig_migrate_range(start, end); | 5830 | ret = __alloc_contig_migrate_range(&cc, start, end); |
| 5787 | if (ret) | 5831 | if (ret) |
| 5788 | goto done; | 5832 | goto done; |
| 5789 | 5833 | ||
| @@ -5832,7 +5876,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
| 5832 | __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); | 5876 | __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); |
| 5833 | 5877 | ||
| 5834 | /* Grab isolated pages from freelists. */ | 5878 | /* Grab isolated pages from freelists. */ |
| 5835 | outer_end = isolate_freepages_range(outer_start, end); | 5879 | outer_end = isolate_freepages_range(&cc, outer_start, end); |
| 5836 | if (!outer_end) { | 5880 | if (!outer_end) { |
| 5837 | ret = -EBUSY; | 5881 | ret = -EBUSY; |
| 5838 | goto done; | 5882 | goto done; |
| @@ -5874,6 +5918,7 @@ static int __meminit __zone_pcp_update(void *data) | |||
| 5874 | local_irq_save(flags); | 5918 | local_irq_save(flags); |
| 5875 | if (pcp->count > 0) | 5919 | if (pcp->count > 0) |
| 5876 | free_pcppages_bulk(zone, pcp->count, pcp); | 5920 | free_pcppages_bulk(zone, pcp->count, pcp); |
| 5921 | drain_zonestat(zone, pset); | ||
| 5877 | setup_pageset(pset, batch); | 5922 | setup_pageset(pset, batch); |
| 5878 | local_irq_restore(flags); | 5923 | local_irq_restore(flags); |
| 5879 | } | 5924 | } |
| @@ -5890,10 +5935,16 @@ void __meminit zone_pcp_update(struct zone *zone) | |||
| 5890 | void zone_pcp_reset(struct zone *zone) | 5935 | void zone_pcp_reset(struct zone *zone) |
| 5891 | { | 5936 | { |
| 5892 | unsigned long flags; | 5937 | unsigned long flags; |
| 5938 | int cpu; | ||
| 5939 | struct per_cpu_pageset *pset; | ||
| 5893 | 5940 | ||
| 5894 | /* avoid races with drain_pages() */ | 5941 | /* avoid races with drain_pages() */ |
| 5895 | local_irq_save(flags); | 5942 | local_irq_save(flags); |
| 5896 | if (zone->pageset != &boot_pageset) { | 5943 | if (zone->pageset != &boot_pageset) { |
| 5944 | for_each_online_cpu(cpu) { | ||
| 5945 | pset = per_cpu_ptr(zone->pageset, cpu); | ||
| 5946 | drain_zonestat(zone, pset); | ||
| 5947 | } | ||
| 5897 | free_percpu(zone->pageset); | 5948 | free_percpu(zone->pageset); |
| 5898 | zone->pageset = &boot_pageset; | 5949 | zone->pageset = &boot_pageset; |
| 5899 | } | 5950 | } |
| @@ -6047,3 +6098,37 @@ void dump_page(struct page *page) | |||
| 6047 | dump_page_flags(page->flags); | 6098 | dump_page_flags(page->flags); |
| 6048 | mem_cgroup_print_bad_page(page); | 6099 | mem_cgroup_print_bad_page(page); |
| 6049 | } | 6100 | } |
| 6101 | |||
| 6102 | /* reset zone->present_pages */ | ||
| 6103 | void reset_zone_present_pages(void) | ||
| 6104 | { | ||
| 6105 | struct zone *z; | ||
| 6106 | int i, nid; | ||
| 6107 | |||
| 6108 | for_each_node_state(nid, N_HIGH_MEMORY) { | ||
| 6109 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
| 6110 | z = NODE_DATA(nid)->node_zones + i; | ||
| 6111 | z->present_pages = 0; | ||
| 6112 | } | ||
| 6113 | } | ||
| 6114 | } | ||
| 6115 | |||
| 6116 | /* calculate zone's present pages in buddy system */ | ||
| 6117 | void fixup_zone_present_pages(int nid, unsigned long start_pfn, | ||
| 6118 | unsigned long end_pfn) | ||
| 6119 | { | ||
| 6120 | struct zone *z; | ||
| 6121 | unsigned long zone_start_pfn, zone_end_pfn; | ||
| 6122 | int i; | ||
| 6123 | |||
| 6124 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
| 6125 | z = NODE_DATA(nid)->node_zones + i; | ||
| 6126 | zone_start_pfn = z->zone_start_pfn; | ||
| 6127 | zone_end_pfn = zone_start_pfn + z->spanned_pages; | ||
| 6128 | |||
| 6129 | /* if the two regions intersect */ | ||
| 6130 | if (!(zone_start_pfn >= end_pfn || zone_end_pfn <= start_pfn)) | ||
| 6131 | z->present_pages += min(end_pfn, zone_end_pfn) - | ||
| 6132 | max(start_pfn, zone_start_pfn); | ||
| 6133 | } | ||
| 6134 | } | ||
