diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 259 |
1 files changed, 156 insertions, 103 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ff7e1587239..bd7625676a6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -357,6 +357,7 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
357 | } | 357 | } |
358 | } | 358 | } |
359 | 359 | ||
360 | /* update __split_huge_page_refcount if you change this function */ | ||
360 | static int destroy_compound_page(struct page *page, unsigned long order) | 361 | static int destroy_compound_page(struct page *page, unsigned long order) |
361 | { | 362 | { |
362 | int i; | 363 | int i; |
@@ -426,18 +427,10 @@ static inline void rmv_page_order(struct page *page) | |||
426 | * | 427 | * |
427 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER | 428 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER |
428 | */ | 429 | */ |
429 | static inline struct page * | ||
430 | __page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order) | ||
431 | { | ||
432 | unsigned long buddy_idx = page_idx ^ (1 << order); | ||
433 | |||
434 | return page + (buddy_idx - page_idx); | ||
435 | } | ||
436 | |||
437 | static inline unsigned long | 430 | static inline unsigned long |
438 | __find_combined_index(unsigned long page_idx, unsigned int order) | 431 | __find_buddy_index(unsigned long page_idx, unsigned int order) |
439 | { | 432 | { |
440 | return (page_idx & ~(1 << order)); | 433 | return page_idx ^ (1 << order); |
441 | } | 434 | } |
442 | 435 | ||
443 | /* | 436 | /* |
@@ -448,8 +441,8 @@ __find_combined_index(unsigned long page_idx, unsigned int order) | |||
448 | * (c) a page and its buddy have the same order && | 441 | * (c) a page and its buddy have the same order && |
449 | * (d) a page and its buddy are in the same zone. | 442 | * (d) a page and its buddy are in the same zone. |
450 | * | 443 | * |
451 | * For recording whether a page is in the buddy system, we use PG_buddy. | 444 | * For recording whether a page is in the buddy system, we set ->_mapcount -2. |
452 | * Setting, clearing, and testing PG_buddy is serialized by zone->lock. | 445 | * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock. |
453 | * | 446 | * |
454 | * For recording page's order, we use page_private(page). | 447 | * For recording page's order, we use page_private(page). |
455 | */ | 448 | */ |
@@ -482,7 +475,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
482 | * as necessary, plus some accounting needed to play nicely with other | 475 | * as necessary, plus some accounting needed to play nicely with other |
483 | * parts of the VM system. | 476 | * parts of the VM system. |
484 | * At each level, we keep a list of pages, which are heads of continuous | 477 | * At each level, we keep a list of pages, which are heads of continuous |
485 | * free pages of length of (1 << order) and marked with PG_buddy. Page's | 478 | * free pages of length of (1 << order) and marked with _mapcount -2. Page's |
486 | * order is recorded in page_private(page) field. | 479 | * order is recorded in page_private(page) field. |
487 | * So when we are allocating or freeing one, we can derive the state of the | 480 | * So when we are allocating or freeing one, we can derive the state of the |
488 | * other. That is, if we allocate a small block, and both were | 481 | * other. That is, if we allocate a small block, and both were |
@@ -499,6 +492,7 @@ static inline void __free_one_page(struct page *page, | |||
499 | { | 492 | { |
500 | unsigned long page_idx; | 493 | unsigned long page_idx; |
501 | unsigned long combined_idx; | 494 | unsigned long combined_idx; |
495 | unsigned long uninitialized_var(buddy_idx); | ||
502 | struct page *buddy; | 496 | struct page *buddy; |
503 | 497 | ||
504 | if (unlikely(PageCompound(page))) | 498 | if (unlikely(PageCompound(page))) |
@@ -513,7 +507,8 @@ static inline void __free_one_page(struct page *page, | |||
513 | VM_BUG_ON(bad_range(zone, page)); | 507 | VM_BUG_ON(bad_range(zone, page)); |
514 | 508 | ||
515 | while (order < MAX_ORDER-1) { | 509 | while (order < MAX_ORDER-1) { |
516 | buddy = __page_find_buddy(page, page_idx, order); | 510 | buddy_idx = __find_buddy_index(page_idx, order); |
511 | buddy = page + (buddy_idx - page_idx); | ||
517 | if (!page_is_buddy(page, buddy, order)) | 512 | if (!page_is_buddy(page, buddy, order)) |
518 | break; | 513 | break; |
519 | 514 | ||
@@ -521,7 +516,7 @@ static inline void __free_one_page(struct page *page, | |||
521 | list_del(&buddy->lru); | 516 | list_del(&buddy->lru); |
522 | zone->free_area[order].nr_free--; | 517 | zone->free_area[order].nr_free--; |
523 | rmv_page_order(buddy); | 518 | rmv_page_order(buddy); |
524 | combined_idx = __find_combined_index(page_idx, order); | 519 | combined_idx = buddy_idx & page_idx; |
525 | page = page + (combined_idx - page_idx); | 520 | page = page + (combined_idx - page_idx); |
526 | page_idx = combined_idx; | 521 | page_idx = combined_idx; |
527 | order++; | 522 | order++; |
@@ -538,9 +533,10 @@ static inline void __free_one_page(struct page *page, | |||
538 | */ | 533 | */ |
539 | if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) { | 534 | if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) { |
540 | struct page *higher_page, *higher_buddy; | 535 | struct page *higher_page, *higher_buddy; |
541 | combined_idx = __find_combined_index(page_idx, order); | 536 | combined_idx = buddy_idx & page_idx; |
542 | higher_page = page + combined_idx - page_idx; | 537 | higher_page = page + (combined_idx - page_idx); |
543 | higher_buddy = __page_find_buddy(higher_page, combined_idx, order + 1); | 538 | buddy_idx = __find_buddy_index(combined_idx, order + 1); |
539 | higher_buddy = page + (buddy_idx - combined_idx); | ||
544 | if (page_is_buddy(higher_page, higher_buddy, order + 1)) { | 540 | if (page_is_buddy(higher_page, higher_buddy, order + 1)) { |
545 | list_add_tail(&page->lru, | 541 | list_add_tail(&page->lru, |
546 | &zone->free_area[order].free_list[migratetype]); | 542 | &zone->free_area[order].free_list[migratetype]); |
@@ -651,13 +647,10 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
651 | trace_mm_page_free_direct(page, order); | 647 | trace_mm_page_free_direct(page, order); |
652 | kmemcheck_free_shadow(page, order); | 648 | kmemcheck_free_shadow(page, order); |
653 | 649 | ||
654 | for (i = 0; i < (1 << order); i++) { | 650 | if (PageAnon(page)) |
655 | struct page *pg = page + i; | 651 | page->mapping = NULL; |
656 | 652 | for (i = 0; i < (1 << order); i++) | |
657 | if (PageAnon(pg)) | 653 | bad += free_pages_check(page + i); |
658 | pg->mapping = NULL; | ||
659 | bad += free_pages_check(pg); | ||
660 | } | ||
661 | if (bad) | 654 | if (bad) |
662 | return false; | 655 | return false; |
663 | 656 | ||
@@ -1095,8 +1088,10 @@ static void drain_pages(unsigned int cpu) | |||
1095 | pset = per_cpu_ptr(zone->pageset, cpu); | 1088 | pset = per_cpu_ptr(zone->pageset, cpu); |
1096 | 1089 | ||
1097 | pcp = &pset->pcp; | 1090 | pcp = &pset->pcp; |
1098 | free_pcppages_bulk(zone, pcp->count, pcp); | 1091 | if (pcp->count) { |
1099 | pcp->count = 0; | 1092 | free_pcppages_bulk(zone, pcp->count, pcp); |
1093 | pcp->count = 0; | ||
1094 | } | ||
1100 | local_irq_restore(flags); | 1095 | local_irq_restore(flags); |
1101 | } | 1096 | } |
1102 | } | 1097 | } |
@@ -1460,24 +1455,24 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) | |||
1460 | #endif /* CONFIG_FAIL_PAGE_ALLOC */ | 1455 | #endif /* CONFIG_FAIL_PAGE_ALLOC */ |
1461 | 1456 | ||
1462 | /* | 1457 | /* |
1463 | * Return 1 if free pages are above 'mark'. This takes into account the order | 1458 | * Return true if free pages are above 'mark'. This takes into account the order |
1464 | * of the allocation. | 1459 | * of the allocation. |
1465 | */ | 1460 | */ |
1466 | int zone_watermark_ok(struct zone *z, int order, unsigned long mark, | 1461 | static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, |
1467 | int classzone_idx, int alloc_flags) | 1462 | int classzone_idx, int alloc_flags, long free_pages) |
1468 | { | 1463 | { |
1469 | /* free_pages my go negative - that's OK */ | 1464 | /* free_pages my go negative - that's OK */ |
1470 | long min = mark; | 1465 | long min = mark; |
1471 | long free_pages = zone_nr_free_pages(z) - (1 << order) + 1; | ||
1472 | int o; | 1466 | int o; |
1473 | 1467 | ||
1468 | free_pages -= (1 << order) + 1; | ||
1474 | if (alloc_flags & ALLOC_HIGH) | 1469 | if (alloc_flags & ALLOC_HIGH) |
1475 | min -= min / 2; | 1470 | min -= min / 2; |
1476 | if (alloc_flags & ALLOC_HARDER) | 1471 | if (alloc_flags & ALLOC_HARDER) |
1477 | min -= min / 4; | 1472 | min -= min / 4; |
1478 | 1473 | ||
1479 | if (free_pages <= min + z->lowmem_reserve[classzone_idx]) | 1474 | if (free_pages <= min + z->lowmem_reserve[classzone_idx]) |
1480 | return 0; | 1475 | return false; |
1481 | for (o = 0; o < order; o++) { | 1476 | for (o = 0; o < order; o++) { |
1482 | /* At the next order, this order's pages become unavailable */ | 1477 | /* At the next order, this order's pages become unavailable */ |
1483 | free_pages -= z->free_area[o].nr_free << o; | 1478 | free_pages -= z->free_area[o].nr_free << o; |
@@ -1486,9 +1481,28 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
1486 | min >>= 1; | 1481 | min >>= 1; |
1487 | 1482 | ||
1488 | if (free_pages <= min) | 1483 | if (free_pages <= min) |
1489 | return 0; | 1484 | return false; |
1490 | } | 1485 | } |
1491 | return 1; | 1486 | return true; |
1487 | } | ||
1488 | |||
1489 | bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, | ||
1490 | int classzone_idx, int alloc_flags) | ||
1491 | { | ||
1492 | return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, | ||
1493 | zone_page_state(z, NR_FREE_PAGES)); | ||
1494 | } | ||
1495 | |||
1496 | bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, | ||
1497 | int classzone_idx, int alloc_flags) | ||
1498 | { | ||
1499 | long free_pages = zone_page_state(z, NR_FREE_PAGES); | ||
1500 | |||
1501 | if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) | ||
1502 | free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); | ||
1503 | |||
1504 | return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, | ||
1505 | free_pages); | ||
1492 | } | 1506 | } |
1493 | 1507 | ||
1494 | #ifdef CONFIG_NUMA | 1508 | #ifdef CONFIG_NUMA |
@@ -1793,15 +1807,18 @@ static struct page * | |||
1793 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | 1807 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, |
1794 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 1808 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
1795 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | 1809 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, |
1796 | int migratetype, unsigned long *did_some_progress) | 1810 | int migratetype, unsigned long *did_some_progress, |
1811 | bool sync_migration) | ||
1797 | { | 1812 | { |
1798 | struct page *page; | 1813 | struct page *page; |
1799 | 1814 | ||
1800 | if (!order || compaction_deferred(preferred_zone)) | 1815 | if (!order || compaction_deferred(preferred_zone)) |
1801 | return NULL; | 1816 | return NULL; |
1802 | 1817 | ||
1818 | current->flags |= PF_MEMALLOC; | ||
1803 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | 1819 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, |
1804 | nodemask); | 1820 | nodemask, sync_migration); |
1821 | current->flags &= ~PF_MEMALLOC; | ||
1805 | if (*did_some_progress != COMPACT_SKIPPED) { | 1822 | if (*did_some_progress != COMPACT_SKIPPED) { |
1806 | 1823 | ||
1807 | /* Page migration frees to the PCP lists but we want merging */ | 1824 | /* Page migration frees to the PCP lists but we want merging */ |
@@ -1837,7 +1854,8 @@ static inline struct page * | |||
1837 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | 1854 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, |
1838 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 1855 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
1839 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | 1856 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, |
1840 | int migratetype, unsigned long *did_some_progress) | 1857 | int migratetype, unsigned long *did_some_progress, |
1858 | bool sync_migration) | ||
1841 | { | 1859 | { |
1842 | return NULL; | 1860 | return NULL; |
1843 | } | 1861 | } |
@@ -1852,23 +1870,22 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
1852 | { | 1870 | { |
1853 | struct page *page = NULL; | 1871 | struct page *page = NULL; |
1854 | struct reclaim_state reclaim_state; | 1872 | struct reclaim_state reclaim_state; |
1855 | struct task_struct *p = current; | ||
1856 | bool drained = false; | 1873 | bool drained = false; |
1857 | 1874 | ||
1858 | cond_resched(); | 1875 | cond_resched(); |
1859 | 1876 | ||
1860 | /* We now go into synchronous reclaim */ | 1877 | /* We now go into synchronous reclaim */ |
1861 | cpuset_memory_pressure_bump(); | 1878 | cpuset_memory_pressure_bump(); |
1862 | p->flags |= PF_MEMALLOC; | 1879 | current->flags |= PF_MEMALLOC; |
1863 | lockdep_set_current_reclaim_state(gfp_mask); | 1880 | lockdep_set_current_reclaim_state(gfp_mask); |
1864 | reclaim_state.reclaimed_slab = 0; | 1881 | reclaim_state.reclaimed_slab = 0; |
1865 | p->reclaim_state = &reclaim_state; | 1882 | current->reclaim_state = &reclaim_state; |
1866 | 1883 | ||
1867 | *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); | 1884 | *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); |
1868 | 1885 | ||
1869 | p->reclaim_state = NULL; | 1886 | current->reclaim_state = NULL; |
1870 | lockdep_clear_current_reclaim_state(); | 1887 | lockdep_clear_current_reclaim_state(); |
1871 | p->flags &= ~PF_MEMALLOC; | 1888 | current->flags &= ~PF_MEMALLOC; |
1872 | 1889 | ||
1873 | cond_resched(); | 1890 | cond_resched(); |
1874 | 1891 | ||
@@ -1920,19 +1937,19 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, | |||
1920 | 1937 | ||
1921 | static inline | 1938 | static inline |
1922 | void wake_all_kswapd(unsigned int order, struct zonelist *zonelist, | 1939 | void wake_all_kswapd(unsigned int order, struct zonelist *zonelist, |
1923 | enum zone_type high_zoneidx) | 1940 | enum zone_type high_zoneidx, |
1941 | enum zone_type classzone_idx) | ||
1924 | { | 1942 | { |
1925 | struct zoneref *z; | 1943 | struct zoneref *z; |
1926 | struct zone *zone; | 1944 | struct zone *zone; |
1927 | 1945 | ||
1928 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) | 1946 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) |
1929 | wakeup_kswapd(zone, order); | 1947 | wakeup_kswapd(zone, order, classzone_idx); |
1930 | } | 1948 | } |
1931 | 1949 | ||
1932 | static inline int | 1950 | static inline int |
1933 | gfp_to_alloc_flags(gfp_t gfp_mask) | 1951 | gfp_to_alloc_flags(gfp_t gfp_mask) |
1934 | { | 1952 | { |
1935 | struct task_struct *p = current; | ||
1936 | int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; | 1953 | int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; |
1937 | const gfp_t wait = gfp_mask & __GFP_WAIT; | 1954 | const gfp_t wait = gfp_mask & __GFP_WAIT; |
1938 | 1955 | ||
@@ -1948,18 +1965,23 @@ gfp_to_alloc_flags(gfp_t gfp_mask) | |||
1948 | alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); | 1965 | alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); |
1949 | 1966 | ||
1950 | if (!wait) { | 1967 | if (!wait) { |
1951 | alloc_flags |= ALLOC_HARDER; | 1968 | /* |
1969 | * Not worth trying to allocate harder for | ||
1970 | * __GFP_NOMEMALLOC even if it can't schedule. | ||
1971 | */ | ||
1972 | if (!(gfp_mask & __GFP_NOMEMALLOC)) | ||
1973 | alloc_flags |= ALLOC_HARDER; | ||
1952 | /* | 1974 | /* |
1953 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. | 1975 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. |
1954 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 1976 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
1955 | */ | 1977 | */ |
1956 | alloc_flags &= ~ALLOC_CPUSET; | 1978 | alloc_flags &= ~ALLOC_CPUSET; |
1957 | } else if (unlikely(rt_task(p)) && !in_interrupt()) | 1979 | } else if (unlikely(rt_task(current)) && !in_interrupt()) |
1958 | alloc_flags |= ALLOC_HARDER; | 1980 | alloc_flags |= ALLOC_HARDER; |
1959 | 1981 | ||
1960 | if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { | 1982 | if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { |
1961 | if (!in_interrupt() && | 1983 | if (!in_interrupt() && |
1962 | ((p->flags & PF_MEMALLOC) || | 1984 | ((current->flags & PF_MEMALLOC) || |
1963 | unlikely(test_thread_flag(TIF_MEMDIE)))) | 1985 | unlikely(test_thread_flag(TIF_MEMDIE)))) |
1964 | alloc_flags |= ALLOC_NO_WATERMARKS; | 1986 | alloc_flags |= ALLOC_NO_WATERMARKS; |
1965 | } | 1987 | } |
@@ -1978,7 +2000,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
1978 | int alloc_flags; | 2000 | int alloc_flags; |
1979 | unsigned long pages_reclaimed = 0; | 2001 | unsigned long pages_reclaimed = 0; |
1980 | unsigned long did_some_progress; | 2002 | unsigned long did_some_progress; |
1981 | struct task_struct *p = current; | 2003 | bool sync_migration = false; |
1982 | 2004 | ||
1983 | /* | 2005 | /* |
1984 | * In the slowpath, we sanity check order to avoid ever trying to | 2006 | * In the slowpath, we sanity check order to avoid ever trying to |
@@ -2003,7 +2025,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2003 | goto nopage; | 2025 | goto nopage; |
2004 | 2026 | ||
2005 | restart: | 2027 | restart: |
2006 | wake_all_kswapd(order, zonelist, high_zoneidx); | 2028 | if (!(gfp_mask & __GFP_NO_KSWAPD)) |
2029 | wake_all_kswapd(order, zonelist, high_zoneidx, | ||
2030 | zone_idx(preferred_zone)); | ||
2007 | 2031 | ||
2008 | /* | 2032 | /* |
2009 | * OK, we're below the kswapd watermark and have kicked background | 2033 | * OK, we're below the kswapd watermark and have kicked background |
@@ -2012,6 +2036,14 @@ restart: | |||
2012 | */ | 2036 | */ |
2013 | alloc_flags = gfp_to_alloc_flags(gfp_mask); | 2037 | alloc_flags = gfp_to_alloc_flags(gfp_mask); |
2014 | 2038 | ||
2039 | /* | ||
2040 | * Find the true preferred zone if the allocation is unconstrained by | ||
2041 | * cpusets. | ||
2042 | */ | ||
2043 | if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) | ||
2044 | first_zones_zonelist(zonelist, high_zoneidx, NULL, | ||
2045 | &preferred_zone); | ||
2046 | |||
2015 | /* This is the last chance, in general, before the goto nopage. */ | 2047 | /* This is the last chance, in general, before the goto nopage. */ |
2016 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, | 2048 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, |
2017 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, | 2049 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, |
@@ -2034,21 +2066,26 @@ rebalance: | |||
2034 | goto nopage; | 2066 | goto nopage; |
2035 | 2067 | ||
2036 | /* Avoid recursion of direct reclaim */ | 2068 | /* Avoid recursion of direct reclaim */ |
2037 | if (p->flags & PF_MEMALLOC) | 2069 | if (current->flags & PF_MEMALLOC) |
2038 | goto nopage; | 2070 | goto nopage; |
2039 | 2071 | ||
2040 | /* Avoid allocations with no watermarks from looping endlessly */ | 2072 | /* Avoid allocations with no watermarks from looping endlessly */ |
2041 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) | 2073 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) |
2042 | goto nopage; | 2074 | goto nopage; |
2043 | 2075 | ||
2044 | /* Try direct compaction */ | 2076 | /* |
2077 | * Try direct compaction. The first pass is asynchronous. Subsequent | ||
2078 | * attempts after direct reclaim are synchronous | ||
2079 | */ | ||
2045 | page = __alloc_pages_direct_compact(gfp_mask, order, | 2080 | page = __alloc_pages_direct_compact(gfp_mask, order, |
2046 | zonelist, high_zoneidx, | 2081 | zonelist, high_zoneidx, |
2047 | nodemask, | 2082 | nodemask, |
2048 | alloc_flags, preferred_zone, | 2083 | alloc_flags, preferred_zone, |
2049 | migratetype, &did_some_progress); | 2084 | migratetype, &did_some_progress, |
2085 | sync_migration); | ||
2050 | if (page) | 2086 | if (page) |
2051 | goto got_pg; | 2087 | goto got_pg; |
2088 | sync_migration = true; | ||
2052 | 2089 | ||
2053 | /* Try direct reclaim and then allocating */ | 2090 | /* Try direct reclaim and then allocating */ |
2054 | page = __alloc_pages_direct_reclaim(gfp_mask, order, | 2091 | page = __alloc_pages_direct_reclaim(gfp_mask, order, |
@@ -2102,13 +2139,27 @@ rebalance: | |||
2102 | /* Wait for some write requests to complete then retry */ | 2139 | /* Wait for some write requests to complete then retry */ |
2103 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); | 2140 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); |
2104 | goto rebalance; | 2141 | goto rebalance; |
2142 | } else { | ||
2143 | /* | ||
2144 | * High-order allocations do not necessarily loop after | ||
2145 | * direct reclaim and reclaim/compaction depends on compaction | ||
2146 | * being called after reclaim so call directly if necessary | ||
2147 | */ | ||
2148 | page = __alloc_pages_direct_compact(gfp_mask, order, | ||
2149 | zonelist, high_zoneidx, | ||
2150 | nodemask, | ||
2151 | alloc_flags, preferred_zone, | ||
2152 | migratetype, &did_some_progress, | ||
2153 | sync_migration); | ||
2154 | if (page) | ||
2155 | goto got_pg; | ||
2105 | } | 2156 | } |
2106 | 2157 | ||
2107 | nopage: | 2158 | nopage: |
2108 | if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { | 2159 | if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { |
2109 | printk(KERN_WARNING "%s: page allocation failure." | 2160 | printk(KERN_WARNING "%s: page allocation failure." |
2110 | " order:%d, mode:0x%x\n", | 2161 | " order:%d, mode:0x%x\n", |
2111 | p->comm, order, gfp_mask); | 2162 | current->comm, order, gfp_mask); |
2112 | dump_stack(); | 2163 | dump_stack(); |
2113 | show_mem(); | 2164 | show_mem(); |
2114 | } | 2165 | } |
@@ -2151,7 +2202,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2151 | 2202 | ||
2152 | get_mems_allowed(); | 2203 | get_mems_allowed(); |
2153 | /* The preferred zone is used for statistics later */ | 2204 | /* The preferred zone is used for statistics later */ |
2154 | first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); | 2205 | first_zones_zonelist(zonelist, high_zoneidx, |
2206 | nodemask ? : &cpuset_current_mems_allowed, | ||
2207 | &preferred_zone); | ||
2155 | if (!preferred_zone) { | 2208 | if (!preferred_zone) { |
2156 | put_mems_allowed(); | 2209 | put_mems_allowed(); |
2157 | return NULL; | 2210 | return NULL; |
@@ -2442,7 +2495,7 @@ void show_free_areas(void) | |||
2442 | " all_unreclaimable? %s" | 2495 | " all_unreclaimable? %s" |
2443 | "\n", | 2496 | "\n", |
2444 | zone->name, | 2497 | zone->name, |
2445 | K(zone_nr_free_pages(zone)), | 2498 | K(zone_page_state(zone, NR_FREE_PAGES)), |
2446 | K(min_wmark_pages(zone)), | 2499 | K(min_wmark_pages(zone)), |
2447 | K(low_wmark_pages(zone)), | 2500 | K(low_wmark_pages(zone)), |
2448 | K(high_wmark_pages(zone)), | 2501 | K(high_wmark_pages(zone)), |
@@ -2585,9 +2638,16 @@ static int __parse_numa_zonelist_order(char *s) | |||
2585 | 2638 | ||
2586 | static __init int setup_numa_zonelist_order(char *s) | 2639 | static __init int setup_numa_zonelist_order(char *s) |
2587 | { | 2640 | { |
2588 | if (s) | 2641 | int ret; |
2589 | return __parse_numa_zonelist_order(s); | 2642 | |
2590 | return 0; | 2643 | if (!s) |
2644 | return 0; | ||
2645 | |||
2646 | ret = __parse_numa_zonelist_order(s); | ||
2647 | if (ret == 0) | ||
2648 | strlcpy(numa_zonelist_order, s, NUMA_ZONELIST_ORDER_LEN); | ||
2649 | |||
2650 | return ret; | ||
2591 | } | 2651 | } |
2592 | early_param("numa_zonelist_order", setup_numa_zonelist_order); | 2652 | early_param("numa_zonelist_order", setup_numa_zonelist_order); |
2593 | 2653 | ||
@@ -3639,13 +3699,45 @@ void __init free_bootmem_with_active_regions(int nid, | |||
3639 | } | 3699 | } |
3640 | 3700 | ||
3641 | #ifdef CONFIG_HAVE_MEMBLOCK | 3701 | #ifdef CONFIG_HAVE_MEMBLOCK |
3702 | /* | ||
3703 | * Basic iterator support. Return the last range of PFNs for a node | ||
3704 | * Note: nid == MAX_NUMNODES returns last region regardless of node | ||
3705 | */ | ||
3706 | static int __meminit last_active_region_index_in_nid(int nid) | ||
3707 | { | ||
3708 | int i; | ||
3709 | |||
3710 | for (i = nr_nodemap_entries - 1; i >= 0; i--) | ||
3711 | if (nid == MAX_NUMNODES || early_node_map[i].nid == nid) | ||
3712 | return i; | ||
3713 | |||
3714 | return -1; | ||
3715 | } | ||
3716 | |||
3717 | /* | ||
3718 | * Basic iterator support. Return the previous active range of PFNs for a node | ||
3719 | * Note: nid == MAX_NUMNODES returns next region regardless of node | ||
3720 | */ | ||
3721 | static int __meminit previous_active_region_index_in_nid(int index, int nid) | ||
3722 | { | ||
3723 | for (index = index - 1; index >= 0; index--) | ||
3724 | if (nid == MAX_NUMNODES || early_node_map[index].nid == nid) | ||
3725 | return index; | ||
3726 | |||
3727 | return -1; | ||
3728 | } | ||
3729 | |||
3730 | #define for_each_active_range_index_in_nid_reverse(i, nid) \ | ||
3731 | for (i = last_active_region_index_in_nid(nid); i != -1; \ | ||
3732 | i = previous_active_region_index_in_nid(i, nid)) | ||
3733 | |||
3642 | u64 __init find_memory_core_early(int nid, u64 size, u64 align, | 3734 | u64 __init find_memory_core_early(int nid, u64 size, u64 align, |
3643 | u64 goal, u64 limit) | 3735 | u64 goal, u64 limit) |
3644 | { | 3736 | { |
3645 | int i; | 3737 | int i; |
3646 | 3738 | ||
3647 | /* Need to go over early_node_map to find out good range for node */ | 3739 | /* Need to go over early_node_map to find out good range for node */ |
3648 | for_each_active_range_index_in_nid(i, nid) { | 3740 | for_each_active_range_index_in_nid_reverse(i, nid) { |
3649 | u64 addr; | 3741 | u64 addr; |
3650 | u64 ei_start, ei_last; | 3742 | u64 ei_start, ei_last; |
3651 | u64 final_start, final_end; | 3743 | u64 final_start, final_end; |
@@ -3688,34 +3780,6 @@ int __init add_from_early_node_map(struct range *range, int az, | |||
3688 | return nr_range; | 3780 | return nr_range; |
3689 | } | 3781 | } |
3690 | 3782 | ||
3691 | #ifdef CONFIG_NO_BOOTMEM | ||
3692 | void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, | ||
3693 | u64 goal, u64 limit) | ||
3694 | { | ||
3695 | void *ptr; | ||
3696 | u64 addr; | ||
3697 | |||
3698 | if (limit > memblock.current_limit) | ||
3699 | limit = memblock.current_limit; | ||
3700 | |||
3701 | addr = find_memory_core_early(nid, size, align, goal, limit); | ||
3702 | |||
3703 | if (addr == MEMBLOCK_ERROR) | ||
3704 | return NULL; | ||
3705 | |||
3706 | ptr = phys_to_virt(addr); | ||
3707 | memset(ptr, 0, size); | ||
3708 | memblock_x86_reserve_range(addr, addr + size, "BOOTMEM"); | ||
3709 | /* | ||
3710 | * The min_count is set to 0 so that bootmem allocated blocks | ||
3711 | * are never reported as leaks. | ||
3712 | */ | ||
3713 | kmemleak_alloc(ptr, size, 0, 0); | ||
3714 | return ptr; | ||
3715 | } | ||
3716 | #endif | ||
3717 | |||
3718 | |||
3719 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) | 3783 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) |
3720 | { | 3784 | { |
3721 | int i; | 3785 | int i; |
@@ -4014,7 +4078,7 @@ static void __init setup_usemap(struct pglist_data *pgdat, | |||
4014 | zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); | 4078 | zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); |
4015 | } | 4079 | } |
4016 | #else | 4080 | #else |
4017 | static void inline setup_usemap(struct pglist_data *pgdat, | 4081 | static inline void setup_usemap(struct pglist_data *pgdat, |
4018 | struct zone *zone, unsigned long zonesize) {} | 4082 | struct zone *zone, unsigned long zonesize) {} |
4019 | #endif /* CONFIG_SPARSEMEM */ | 4083 | #endif /* CONFIG_SPARSEMEM */ |
4020 | 4084 | ||
@@ -4749,15 +4813,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) | |||
4749 | dma_reserve = new_dma_reserve; | 4813 | dma_reserve = new_dma_reserve; |
4750 | } | 4814 | } |
4751 | 4815 | ||
4752 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
4753 | struct pglist_data __refdata contig_page_data = { | ||
4754 | #ifndef CONFIG_NO_BOOTMEM | ||
4755 | .bdata = &bootmem_node_data[0] | ||
4756 | #endif | ||
4757 | }; | ||
4758 | EXPORT_SYMBOL(contig_page_data); | ||
4759 | #endif | ||
4760 | |||
4761 | void __init free_area_init(unsigned long *zones_size) | 4816 | void __init free_area_init(unsigned long *zones_size) |
4762 | { | 4817 | { |
4763 | free_area_init_node(0, zones_size, | 4818 | free_area_init_node(0, zones_size, |
@@ -5316,10 +5371,9 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count) | |||
5316 | for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { | 5371 | for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { |
5317 | unsigned long check = pfn + iter; | 5372 | unsigned long check = pfn + iter; |
5318 | 5373 | ||
5319 | if (!pfn_valid_within(check)) { | 5374 | if (!pfn_valid_within(check)) |
5320 | iter++; | ||
5321 | continue; | 5375 | continue; |
5322 | } | 5376 | |
5323 | page = pfn_to_page(check); | 5377 | page = pfn_to_page(check); |
5324 | if (!page_count(page)) { | 5378 | if (!page_count(page)) { |
5325 | if (PageBuddy(page)) | 5379 | if (PageBuddy(page)) |
@@ -5517,7 +5571,6 @@ static struct trace_print_flags pageflag_names[] = { | |||
5517 | {1UL << PG_swapcache, "swapcache" }, | 5571 | {1UL << PG_swapcache, "swapcache" }, |
5518 | {1UL << PG_mappedtodisk, "mappedtodisk" }, | 5572 | {1UL << PG_mappedtodisk, "mappedtodisk" }, |
5519 | {1UL << PG_reclaim, "reclaim" }, | 5573 | {1UL << PG_reclaim, "reclaim" }, |
5520 | {1UL << PG_buddy, "buddy" }, | ||
5521 | {1UL << PG_swapbacked, "swapbacked" }, | 5574 | {1UL << PG_swapbacked, "swapbacked" }, |
5522 | {1UL << PG_unevictable, "unevictable" }, | 5575 | {1UL << PG_unevictable, "unevictable" }, |
5523 | #ifdef CONFIG_MMU | 5576 | #ifdef CONFIG_MMU |
@@ -5565,7 +5618,7 @@ void dump_page(struct page *page) | |||
5565 | { | 5618 | { |
5566 | printk(KERN_ALERT | 5619 | printk(KERN_ALERT |
5567 | "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", | 5620 | "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", |
5568 | page, page_count(page), page_mapcount(page), | 5621 | page, atomic_read(&page->_count), page_mapcount(page), |
5569 | page->mapping, page->index); | 5622 | page->mapping, page->index); |
5570 | dump_page_flags(page->flags); | 5623 | dump_page_flags(page->flags); |
5571 | } | 5624 | } |