aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c259
1 files changed, 156 insertions, 103 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ff7e1587239..bd7625676a6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -357,6 +357,7 @@ void prep_compound_page(struct page *page, unsigned long order)
357 } 357 }
358} 358}
359 359
360/* update __split_huge_page_refcount if you change this function */
360static int destroy_compound_page(struct page *page, unsigned long order) 361static int destroy_compound_page(struct page *page, unsigned long order)
361{ 362{
362 int i; 363 int i;
@@ -426,18 +427,10 @@ static inline void rmv_page_order(struct page *page)
426 * 427 *
427 * Assumption: *_mem_map is contiguous at least up to MAX_ORDER 428 * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
428 */ 429 */
429static inline struct page *
430__page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order)
431{
432 unsigned long buddy_idx = page_idx ^ (1 << order);
433
434 return page + (buddy_idx - page_idx);
435}
436
437static inline unsigned long 430static inline unsigned long
438__find_combined_index(unsigned long page_idx, unsigned int order) 431__find_buddy_index(unsigned long page_idx, unsigned int order)
439{ 432{
440 return (page_idx & ~(1 << order)); 433 return page_idx ^ (1 << order);
441} 434}
442 435
443/* 436/*
@@ -448,8 +441,8 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
448 * (c) a page and its buddy have the same order && 441 * (c) a page and its buddy have the same order &&
449 * (d) a page and its buddy are in the same zone. 442 * (d) a page and its buddy are in the same zone.
450 * 443 *
451 * For recording whether a page is in the buddy system, we use PG_buddy. 444 * For recording whether a page is in the buddy system, we set ->_mapcount -2.
452 * Setting, clearing, and testing PG_buddy is serialized by zone->lock. 445 * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock.
453 * 446 *
454 * For recording page's order, we use page_private(page). 447 * For recording page's order, we use page_private(page).
455 */ 448 */
@@ -482,7 +475,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
482 * as necessary, plus some accounting needed to play nicely with other 475 * as necessary, plus some accounting needed to play nicely with other
483 * parts of the VM system. 476 * parts of the VM system.
484 * At each level, we keep a list of pages, which are heads of continuous 477 * At each level, we keep a list of pages, which are heads of continuous
485 * free pages of length of (1 << order) and marked with PG_buddy. Page's 478 * free pages of length of (1 << order) and marked with _mapcount -2. Page's
486 * order is recorded in page_private(page) field. 479 * order is recorded in page_private(page) field.
487 * So when we are allocating or freeing one, we can derive the state of the 480 * So when we are allocating or freeing one, we can derive the state of the
488 * other. That is, if we allocate a small block, and both were 481 * other. That is, if we allocate a small block, and both were
@@ -499,6 +492,7 @@ static inline void __free_one_page(struct page *page,
499{ 492{
500 unsigned long page_idx; 493 unsigned long page_idx;
501 unsigned long combined_idx; 494 unsigned long combined_idx;
495 unsigned long uninitialized_var(buddy_idx);
502 struct page *buddy; 496 struct page *buddy;
503 497
504 if (unlikely(PageCompound(page))) 498 if (unlikely(PageCompound(page)))
@@ -513,7 +507,8 @@ static inline void __free_one_page(struct page *page,
513 VM_BUG_ON(bad_range(zone, page)); 507 VM_BUG_ON(bad_range(zone, page));
514 508
515 while (order < MAX_ORDER-1) { 509 while (order < MAX_ORDER-1) {
516 buddy = __page_find_buddy(page, page_idx, order); 510 buddy_idx = __find_buddy_index(page_idx, order);
511 buddy = page + (buddy_idx - page_idx);
517 if (!page_is_buddy(page, buddy, order)) 512 if (!page_is_buddy(page, buddy, order))
518 break; 513 break;
519 514
@@ -521,7 +516,7 @@ static inline void __free_one_page(struct page *page,
521 list_del(&buddy->lru); 516 list_del(&buddy->lru);
522 zone->free_area[order].nr_free--; 517 zone->free_area[order].nr_free--;
523 rmv_page_order(buddy); 518 rmv_page_order(buddy);
524 combined_idx = __find_combined_index(page_idx, order); 519 combined_idx = buddy_idx & page_idx;
525 page = page + (combined_idx - page_idx); 520 page = page + (combined_idx - page_idx);
526 page_idx = combined_idx; 521 page_idx = combined_idx;
527 order++; 522 order++;
@@ -538,9 +533,10 @@ static inline void __free_one_page(struct page *page,
538 */ 533 */
539 if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) { 534 if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) {
540 struct page *higher_page, *higher_buddy; 535 struct page *higher_page, *higher_buddy;
541 combined_idx = __find_combined_index(page_idx, order); 536 combined_idx = buddy_idx & page_idx;
542 higher_page = page + combined_idx - page_idx; 537 higher_page = page + (combined_idx - page_idx);
543 higher_buddy = __page_find_buddy(higher_page, combined_idx, order + 1); 538 buddy_idx = __find_buddy_index(combined_idx, order + 1);
539 higher_buddy = page + (buddy_idx - combined_idx);
544 if (page_is_buddy(higher_page, higher_buddy, order + 1)) { 540 if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
545 list_add_tail(&page->lru, 541 list_add_tail(&page->lru,
546 &zone->free_area[order].free_list[migratetype]); 542 &zone->free_area[order].free_list[migratetype]);
@@ -651,13 +647,10 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
651 trace_mm_page_free_direct(page, order); 647 trace_mm_page_free_direct(page, order);
652 kmemcheck_free_shadow(page, order); 648 kmemcheck_free_shadow(page, order);
653 649
654 for (i = 0; i < (1 << order); i++) { 650 if (PageAnon(page))
655 struct page *pg = page + i; 651 page->mapping = NULL;
656 652 for (i = 0; i < (1 << order); i++)
657 if (PageAnon(pg)) 653 bad += free_pages_check(page + i);
658 pg->mapping = NULL;
659 bad += free_pages_check(pg);
660 }
661 if (bad) 654 if (bad)
662 return false; 655 return false;
663 656
@@ -1095,8 +1088,10 @@ static void drain_pages(unsigned int cpu)
1095 pset = per_cpu_ptr(zone->pageset, cpu); 1088 pset = per_cpu_ptr(zone->pageset, cpu);
1096 1089
1097 pcp = &pset->pcp; 1090 pcp = &pset->pcp;
1098 free_pcppages_bulk(zone, pcp->count, pcp); 1091 if (pcp->count) {
1099 pcp->count = 0; 1092 free_pcppages_bulk(zone, pcp->count, pcp);
1093 pcp->count = 0;
1094 }
1100 local_irq_restore(flags); 1095 local_irq_restore(flags);
1101 } 1096 }
1102} 1097}
@@ -1460,24 +1455,24 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
1460#endif /* CONFIG_FAIL_PAGE_ALLOC */ 1455#endif /* CONFIG_FAIL_PAGE_ALLOC */
1461 1456
1462/* 1457/*
1463 * Return 1 if free pages are above 'mark'. This takes into account the order 1458 * Return true if free pages are above 'mark'. This takes into account the order
1464 * of the allocation. 1459 * of the allocation.
1465 */ 1460 */
1466int zone_watermark_ok(struct zone *z, int order, unsigned long mark, 1461static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
1467 int classzone_idx, int alloc_flags) 1462 int classzone_idx, int alloc_flags, long free_pages)
1468{ 1463{
1469 /* free_pages my go negative - that's OK */ 1464 /* free_pages my go negative - that's OK */
1470 long min = mark; 1465 long min = mark;
1471 long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
1472 int o; 1466 int o;
1473 1467
1468 free_pages -= (1 << order) + 1;
1474 if (alloc_flags & ALLOC_HIGH) 1469 if (alloc_flags & ALLOC_HIGH)
1475 min -= min / 2; 1470 min -= min / 2;
1476 if (alloc_flags & ALLOC_HARDER) 1471 if (alloc_flags & ALLOC_HARDER)
1477 min -= min / 4; 1472 min -= min / 4;
1478 1473
1479 if (free_pages <= min + z->lowmem_reserve[classzone_idx]) 1474 if (free_pages <= min + z->lowmem_reserve[classzone_idx])
1480 return 0; 1475 return false;
1481 for (o = 0; o < order; o++) { 1476 for (o = 0; o < order; o++) {
1482 /* At the next order, this order's pages become unavailable */ 1477 /* At the next order, this order's pages become unavailable */
1483 free_pages -= z->free_area[o].nr_free << o; 1478 free_pages -= z->free_area[o].nr_free << o;
@@ -1486,9 +1481,28 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
1486 min >>= 1; 1481 min >>= 1;
1487 1482
1488 if (free_pages <= min) 1483 if (free_pages <= min)
1489 return 0; 1484 return false;
1490 } 1485 }
1491 return 1; 1486 return true;
1487}
1488
1489bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
1490 int classzone_idx, int alloc_flags)
1491{
1492 return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
1493 zone_page_state(z, NR_FREE_PAGES));
1494}
1495
1496bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
1497 int classzone_idx, int alloc_flags)
1498{
1499 long free_pages = zone_page_state(z, NR_FREE_PAGES);
1500
1501 if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
1502 free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
1503
1504 return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
1505 free_pages);
1492} 1506}
1493 1507
1494#ifdef CONFIG_NUMA 1508#ifdef CONFIG_NUMA
@@ -1793,15 +1807,18 @@ static struct page *
1793__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, 1807__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
1794 struct zonelist *zonelist, enum zone_type high_zoneidx, 1808 struct zonelist *zonelist, enum zone_type high_zoneidx,
1795 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, 1809 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
1796 int migratetype, unsigned long *did_some_progress) 1810 int migratetype, unsigned long *did_some_progress,
1811 bool sync_migration)
1797{ 1812{
1798 struct page *page; 1813 struct page *page;
1799 1814
1800 if (!order || compaction_deferred(preferred_zone)) 1815 if (!order || compaction_deferred(preferred_zone))
1801 return NULL; 1816 return NULL;
1802 1817
1818 current->flags |= PF_MEMALLOC;
1803 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, 1819 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
1804 nodemask); 1820 nodemask, sync_migration);
1821 current->flags &= ~PF_MEMALLOC;
1805 if (*did_some_progress != COMPACT_SKIPPED) { 1822 if (*did_some_progress != COMPACT_SKIPPED) {
1806 1823
1807 /* Page migration frees to the PCP lists but we want merging */ 1824 /* Page migration frees to the PCP lists but we want merging */
@@ -1837,7 +1854,8 @@ static inline struct page *
1837__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, 1854__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
1838 struct zonelist *zonelist, enum zone_type high_zoneidx, 1855 struct zonelist *zonelist, enum zone_type high_zoneidx,
1839 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, 1856 nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
1840 int migratetype, unsigned long *did_some_progress) 1857 int migratetype, unsigned long *did_some_progress,
1858 bool sync_migration)
1841{ 1859{
1842 return NULL; 1860 return NULL;
1843} 1861}
@@ -1852,23 +1870,22 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
1852{ 1870{
1853 struct page *page = NULL; 1871 struct page *page = NULL;
1854 struct reclaim_state reclaim_state; 1872 struct reclaim_state reclaim_state;
1855 struct task_struct *p = current;
1856 bool drained = false; 1873 bool drained = false;
1857 1874
1858 cond_resched(); 1875 cond_resched();
1859 1876
1860 /* We now go into synchronous reclaim */ 1877 /* We now go into synchronous reclaim */
1861 cpuset_memory_pressure_bump(); 1878 cpuset_memory_pressure_bump();
1862 p->flags |= PF_MEMALLOC; 1879 current->flags |= PF_MEMALLOC;
1863 lockdep_set_current_reclaim_state(gfp_mask); 1880 lockdep_set_current_reclaim_state(gfp_mask);
1864 reclaim_state.reclaimed_slab = 0; 1881 reclaim_state.reclaimed_slab = 0;
1865 p->reclaim_state = &reclaim_state; 1882 current->reclaim_state = &reclaim_state;
1866 1883
1867 *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); 1884 *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask);
1868 1885
1869 p->reclaim_state = NULL; 1886 current->reclaim_state = NULL;
1870 lockdep_clear_current_reclaim_state(); 1887 lockdep_clear_current_reclaim_state();
1871 p->flags &= ~PF_MEMALLOC; 1888 current->flags &= ~PF_MEMALLOC;
1872 1889
1873 cond_resched(); 1890 cond_resched();
1874 1891
@@ -1920,19 +1937,19 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
1920 1937
1921static inline 1938static inline
1922void wake_all_kswapd(unsigned int order, struct zonelist *zonelist, 1939void wake_all_kswapd(unsigned int order, struct zonelist *zonelist,
1923 enum zone_type high_zoneidx) 1940 enum zone_type high_zoneidx,
1941 enum zone_type classzone_idx)
1924{ 1942{
1925 struct zoneref *z; 1943 struct zoneref *z;
1926 struct zone *zone; 1944 struct zone *zone;
1927 1945
1928 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) 1946 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
1929 wakeup_kswapd(zone, order); 1947 wakeup_kswapd(zone, order, classzone_idx);
1930} 1948}
1931 1949
1932static inline int 1950static inline int
1933gfp_to_alloc_flags(gfp_t gfp_mask) 1951gfp_to_alloc_flags(gfp_t gfp_mask)
1934{ 1952{
1935 struct task_struct *p = current;
1936 int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; 1953 int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
1937 const gfp_t wait = gfp_mask & __GFP_WAIT; 1954 const gfp_t wait = gfp_mask & __GFP_WAIT;
1938 1955
@@ -1948,18 +1965,23 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
1948 alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); 1965 alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
1949 1966
1950 if (!wait) { 1967 if (!wait) {
1951 alloc_flags |= ALLOC_HARDER; 1968 /*
1969 * Not worth trying to allocate harder for
1970 * __GFP_NOMEMALLOC even if it can't schedule.
1971 */
1972 if (!(gfp_mask & __GFP_NOMEMALLOC))
1973 alloc_flags |= ALLOC_HARDER;
1952 /* 1974 /*
1953 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. 1975 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
1954 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 1976 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
1955 */ 1977 */
1956 alloc_flags &= ~ALLOC_CPUSET; 1978 alloc_flags &= ~ALLOC_CPUSET;
1957 } else if (unlikely(rt_task(p)) && !in_interrupt()) 1979 } else if (unlikely(rt_task(current)) && !in_interrupt())
1958 alloc_flags |= ALLOC_HARDER; 1980 alloc_flags |= ALLOC_HARDER;
1959 1981
1960 if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { 1982 if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) {
1961 if (!in_interrupt() && 1983 if (!in_interrupt() &&
1962 ((p->flags & PF_MEMALLOC) || 1984 ((current->flags & PF_MEMALLOC) ||
1963 unlikely(test_thread_flag(TIF_MEMDIE)))) 1985 unlikely(test_thread_flag(TIF_MEMDIE))))
1964 alloc_flags |= ALLOC_NO_WATERMARKS; 1986 alloc_flags |= ALLOC_NO_WATERMARKS;
1965 } 1987 }
@@ -1978,7 +2000,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1978 int alloc_flags; 2000 int alloc_flags;
1979 unsigned long pages_reclaimed = 0; 2001 unsigned long pages_reclaimed = 0;
1980 unsigned long did_some_progress; 2002 unsigned long did_some_progress;
1981 struct task_struct *p = current; 2003 bool sync_migration = false;
1982 2004
1983 /* 2005 /*
1984 * In the slowpath, we sanity check order to avoid ever trying to 2006 * In the slowpath, we sanity check order to avoid ever trying to
@@ -2003,7 +2025,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2003 goto nopage; 2025 goto nopage;
2004 2026
2005restart: 2027restart:
2006 wake_all_kswapd(order, zonelist, high_zoneidx); 2028 if (!(gfp_mask & __GFP_NO_KSWAPD))
2029 wake_all_kswapd(order, zonelist, high_zoneidx,
2030 zone_idx(preferred_zone));
2007 2031
2008 /* 2032 /*
2009 * OK, we're below the kswapd watermark and have kicked background 2033 * OK, we're below the kswapd watermark and have kicked background
@@ -2012,6 +2036,14 @@ restart:
2012 */ 2036 */
2013 alloc_flags = gfp_to_alloc_flags(gfp_mask); 2037 alloc_flags = gfp_to_alloc_flags(gfp_mask);
2014 2038
2039 /*
2040 * Find the true preferred zone if the allocation is unconstrained by
2041 * cpusets.
2042 */
2043 if (!(alloc_flags & ALLOC_CPUSET) && !nodemask)
2044 first_zones_zonelist(zonelist, high_zoneidx, NULL,
2045 &preferred_zone);
2046
2015 /* This is the last chance, in general, before the goto nopage. */ 2047 /* This is the last chance, in general, before the goto nopage. */
2016 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, 2048 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
2017 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, 2049 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2034,21 +2066,26 @@ rebalance:
2034 goto nopage; 2066 goto nopage;
2035 2067
2036 /* Avoid recursion of direct reclaim */ 2068 /* Avoid recursion of direct reclaim */
2037 if (p->flags & PF_MEMALLOC) 2069 if (current->flags & PF_MEMALLOC)
2038 goto nopage; 2070 goto nopage;
2039 2071
2040 /* Avoid allocations with no watermarks from looping endlessly */ 2072 /* Avoid allocations with no watermarks from looping endlessly */
2041 if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) 2073 if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
2042 goto nopage; 2074 goto nopage;
2043 2075
2044 /* Try direct compaction */ 2076 /*
2077 * Try direct compaction. The first pass is asynchronous. Subsequent
2078 * attempts after direct reclaim are synchronous
2079 */
2045 page = __alloc_pages_direct_compact(gfp_mask, order, 2080 page = __alloc_pages_direct_compact(gfp_mask, order,
2046 zonelist, high_zoneidx, 2081 zonelist, high_zoneidx,
2047 nodemask, 2082 nodemask,
2048 alloc_flags, preferred_zone, 2083 alloc_flags, preferred_zone,
2049 migratetype, &did_some_progress); 2084 migratetype, &did_some_progress,
2085 sync_migration);
2050 if (page) 2086 if (page)
2051 goto got_pg; 2087 goto got_pg;
2088 sync_migration = true;
2052 2089
2053 /* Try direct reclaim and then allocating */ 2090 /* Try direct reclaim and then allocating */
2054 page = __alloc_pages_direct_reclaim(gfp_mask, order, 2091 page = __alloc_pages_direct_reclaim(gfp_mask, order,
@@ -2102,13 +2139,27 @@ rebalance:
2102 /* Wait for some write requests to complete then retry */ 2139 /* Wait for some write requests to complete then retry */
2103 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); 2140 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
2104 goto rebalance; 2141 goto rebalance;
2142 } else {
2143 /*
2144 * High-order allocations do not necessarily loop after
2145 * direct reclaim and reclaim/compaction depends on compaction
2146 * being called after reclaim so call directly if necessary
2147 */
2148 page = __alloc_pages_direct_compact(gfp_mask, order,
2149 zonelist, high_zoneidx,
2150 nodemask,
2151 alloc_flags, preferred_zone,
2152 migratetype, &did_some_progress,
2153 sync_migration);
2154 if (page)
2155 goto got_pg;
2105 } 2156 }
2106 2157
2107nopage: 2158nopage:
2108 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { 2159 if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
2109 printk(KERN_WARNING "%s: page allocation failure." 2160 printk(KERN_WARNING "%s: page allocation failure."
2110 " order:%d, mode:0x%x\n", 2161 " order:%d, mode:0x%x\n",
2111 p->comm, order, gfp_mask); 2162 current->comm, order, gfp_mask);
2112 dump_stack(); 2163 dump_stack();
2113 show_mem(); 2164 show_mem();
2114 } 2165 }
@@ -2151,7 +2202,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2151 2202
2152 get_mems_allowed(); 2203 get_mems_allowed();
2153 /* The preferred zone is used for statistics later */ 2204 /* The preferred zone is used for statistics later */
2154 first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); 2205 first_zones_zonelist(zonelist, high_zoneidx,
2206 nodemask ? : &cpuset_current_mems_allowed,
2207 &preferred_zone);
2155 if (!preferred_zone) { 2208 if (!preferred_zone) {
2156 put_mems_allowed(); 2209 put_mems_allowed();
2157 return NULL; 2210 return NULL;
@@ -2442,7 +2495,7 @@ void show_free_areas(void)
2442 " all_unreclaimable? %s" 2495 " all_unreclaimable? %s"
2443 "\n", 2496 "\n",
2444 zone->name, 2497 zone->name,
2445 K(zone_nr_free_pages(zone)), 2498 K(zone_page_state(zone, NR_FREE_PAGES)),
2446 K(min_wmark_pages(zone)), 2499 K(min_wmark_pages(zone)),
2447 K(low_wmark_pages(zone)), 2500 K(low_wmark_pages(zone)),
2448 K(high_wmark_pages(zone)), 2501 K(high_wmark_pages(zone)),
@@ -2585,9 +2638,16 @@ static int __parse_numa_zonelist_order(char *s)
2585 2638
2586static __init int setup_numa_zonelist_order(char *s) 2639static __init int setup_numa_zonelist_order(char *s)
2587{ 2640{
2588 if (s) 2641 int ret;
2589 return __parse_numa_zonelist_order(s); 2642
2590 return 0; 2643 if (!s)
2644 return 0;
2645
2646 ret = __parse_numa_zonelist_order(s);
2647 if (ret == 0)
2648 strlcpy(numa_zonelist_order, s, NUMA_ZONELIST_ORDER_LEN);
2649
2650 return ret;
2591} 2651}
2592early_param("numa_zonelist_order", setup_numa_zonelist_order); 2652early_param("numa_zonelist_order", setup_numa_zonelist_order);
2593 2653
@@ -3639,13 +3699,45 @@ void __init free_bootmem_with_active_regions(int nid,
3639} 3699}
3640 3700
3641#ifdef CONFIG_HAVE_MEMBLOCK 3701#ifdef CONFIG_HAVE_MEMBLOCK
3702/*
3703 * Basic iterator support. Return the last range of PFNs for a node
3704 * Note: nid == MAX_NUMNODES returns last region regardless of node
3705 */
3706static int __meminit last_active_region_index_in_nid(int nid)
3707{
3708 int i;
3709
3710 for (i = nr_nodemap_entries - 1; i >= 0; i--)
3711 if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3712 return i;
3713
3714 return -1;
3715}
3716
3717/*
3718 * Basic iterator support. Return the previous active range of PFNs for a node
3719 * Note: nid == MAX_NUMNODES returns next region regardless of node
3720 */
3721static int __meminit previous_active_region_index_in_nid(int index, int nid)
3722{
3723 for (index = index - 1; index >= 0; index--)
3724 if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3725 return index;
3726
3727 return -1;
3728}
3729
3730#define for_each_active_range_index_in_nid_reverse(i, nid) \
3731 for (i = last_active_region_index_in_nid(nid); i != -1; \
3732 i = previous_active_region_index_in_nid(i, nid))
3733
3642u64 __init find_memory_core_early(int nid, u64 size, u64 align, 3734u64 __init find_memory_core_early(int nid, u64 size, u64 align,
3643 u64 goal, u64 limit) 3735 u64 goal, u64 limit)
3644{ 3736{
3645 int i; 3737 int i;
3646 3738
3647 /* Need to go over early_node_map to find out good range for node */ 3739 /* Need to go over early_node_map to find out good range for node */
3648 for_each_active_range_index_in_nid(i, nid) { 3740 for_each_active_range_index_in_nid_reverse(i, nid) {
3649 u64 addr; 3741 u64 addr;
3650 u64 ei_start, ei_last; 3742 u64 ei_start, ei_last;
3651 u64 final_start, final_end; 3743 u64 final_start, final_end;
@@ -3688,34 +3780,6 @@ int __init add_from_early_node_map(struct range *range, int az,
3688 return nr_range; 3780 return nr_range;
3689} 3781}
3690 3782
3691#ifdef CONFIG_NO_BOOTMEM
3692void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3693 u64 goal, u64 limit)
3694{
3695 void *ptr;
3696 u64 addr;
3697
3698 if (limit > memblock.current_limit)
3699 limit = memblock.current_limit;
3700
3701 addr = find_memory_core_early(nid, size, align, goal, limit);
3702
3703 if (addr == MEMBLOCK_ERROR)
3704 return NULL;
3705
3706 ptr = phys_to_virt(addr);
3707 memset(ptr, 0, size);
3708 memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
3709 /*
3710 * The min_count is set to 0 so that bootmem allocated blocks
3711 * are never reported as leaks.
3712 */
3713 kmemleak_alloc(ptr, size, 0, 0);
3714 return ptr;
3715}
3716#endif
3717
3718
3719void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) 3783void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3720{ 3784{
3721 int i; 3785 int i;
@@ -4014,7 +4078,7 @@ static void __init setup_usemap(struct pglist_data *pgdat,
4014 zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); 4078 zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
4015} 4079}
4016#else 4080#else
4017static void inline setup_usemap(struct pglist_data *pgdat, 4081static inline void setup_usemap(struct pglist_data *pgdat,
4018 struct zone *zone, unsigned long zonesize) {} 4082 struct zone *zone, unsigned long zonesize) {}
4019#endif /* CONFIG_SPARSEMEM */ 4083#endif /* CONFIG_SPARSEMEM */
4020 4084
@@ -4749,15 +4813,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
4749 dma_reserve = new_dma_reserve; 4813 dma_reserve = new_dma_reserve;
4750} 4814}
4751 4815
4752#ifndef CONFIG_NEED_MULTIPLE_NODES
4753struct pglist_data __refdata contig_page_data = {
4754#ifndef CONFIG_NO_BOOTMEM
4755 .bdata = &bootmem_node_data[0]
4756#endif
4757 };
4758EXPORT_SYMBOL(contig_page_data);
4759#endif
4760
4761void __init free_area_init(unsigned long *zones_size) 4816void __init free_area_init(unsigned long *zones_size)
4762{ 4817{
4763 free_area_init_node(0, zones_size, 4818 free_area_init_node(0, zones_size,
@@ -5316,10 +5371,9 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count)
5316 for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { 5371 for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
5317 unsigned long check = pfn + iter; 5372 unsigned long check = pfn + iter;
5318 5373
5319 if (!pfn_valid_within(check)) { 5374 if (!pfn_valid_within(check))
5320 iter++;
5321 continue; 5375 continue;
5322 } 5376
5323 page = pfn_to_page(check); 5377 page = pfn_to_page(check);
5324 if (!page_count(page)) { 5378 if (!page_count(page)) {
5325 if (PageBuddy(page)) 5379 if (PageBuddy(page))
@@ -5517,7 +5571,6 @@ static struct trace_print_flags pageflag_names[] = {
5517 {1UL << PG_swapcache, "swapcache" }, 5571 {1UL << PG_swapcache, "swapcache" },
5518 {1UL << PG_mappedtodisk, "mappedtodisk" }, 5572 {1UL << PG_mappedtodisk, "mappedtodisk" },
5519 {1UL << PG_reclaim, "reclaim" }, 5573 {1UL << PG_reclaim, "reclaim" },
5520 {1UL << PG_buddy, "buddy" },
5521 {1UL << PG_swapbacked, "swapbacked" }, 5574 {1UL << PG_swapbacked, "swapbacked" },
5522 {1UL << PG_unevictable, "unevictable" }, 5575 {1UL << PG_unevictable, "unevictable" },
5523#ifdef CONFIG_MMU 5576#ifdef CONFIG_MMU
@@ -5565,7 +5618,7 @@ void dump_page(struct page *page)
5565{ 5618{
5566 printk(KERN_ALERT 5619 printk(KERN_ALERT
5567 "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", 5620 "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
5568 page, page_count(page), page_mapcount(page), 5621 page, atomic_read(&page->_count), page_mapcount(page),
5569 page->mapping, page->index); 5622 page->mapping, page->index);
5570 dump_page_flags(page->flags); 5623 dump_page_flags(page->flags);
5571} 5624}