diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-02-14 05:55:18 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-02-14 05:55:18 -0500 |
commit | d2137d5af4259f50c19addb8246a186c9ffac325 (patch) | |
tree | 2f7e309f9cf8ef2f2698532c226edda38021fe69 /mm/page_alloc.c | |
parent | f005fe12b90c5b9fe180a09209a893e09affa8aa (diff) | |
parent | 795abaf1e4e188c4171e3cd3dbb11a9fcacaf505 (diff) |
Merge branch 'linus' into x86/bootmem
Conflicts:
arch/x86/mm/numa_64.c
Merge reason: fix the conflict, update to latest -rc and pick up this
dependent fix from Yinghai:
e6d2e2b2b1e1: memblock: don't adjust size in memblock_find_base()
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 216 |
1 files changed, 138 insertions, 78 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 19413bfdef92..887ce3bd823d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -104,19 +104,24 @@ gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; | |||
104 | * only be modified with pm_mutex held, unless the suspend/hibernate code is | 104 | * only be modified with pm_mutex held, unless the suspend/hibernate code is |
105 | * guaranteed not to run in parallel with that modification). | 105 | * guaranteed not to run in parallel with that modification). |
106 | */ | 106 | */ |
107 | void set_gfp_allowed_mask(gfp_t mask) | 107 | |
108 | static gfp_t saved_gfp_mask; | ||
109 | |||
110 | void pm_restore_gfp_mask(void) | ||
108 | { | 111 | { |
109 | WARN_ON(!mutex_is_locked(&pm_mutex)); | 112 | WARN_ON(!mutex_is_locked(&pm_mutex)); |
110 | gfp_allowed_mask = mask; | 113 | if (saved_gfp_mask) { |
114 | gfp_allowed_mask = saved_gfp_mask; | ||
115 | saved_gfp_mask = 0; | ||
116 | } | ||
111 | } | 117 | } |
112 | 118 | ||
113 | gfp_t clear_gfp_allowed_mask(gfp_t mask) | 119 | void pm_restrict_gfp_mask(void) |
114 | { | 120 | { |
115 | gfp_t ret = gfp_allowed_mask; | ||
116 | |||
117 | WARN_ON(!mutex_is_locked(&pm_mutex)); | 121 | WARN_ON(!mutex_is_locked(&pm_mutex)); |
118 | gfp_allowed_mask &= ~mask; | 122 | WARN_ON(saved_gfp_mask); |
119 | return ret; | 123 | saved_gfp_mask = gfp_allowed_mask; |
124 | gfp_allowed_mask &= ~GFP_IOFS; | ||
120 | } | 125 | } |
121 | #endif /* CONFIG_PM_SLEEP */ | 126 | #endif /* CONFIG_PM_SLEEP */ |
122 | 127 | ||
@@ -352,6 +357,7 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
352 | } | 357 | } |
353 | } | 358 | } |
354 | 359 | ||
360 | /* update __split_huge_page_refcount if you change this function */ | ||
355 | static int destroy_compound_page(struct page *page, unsigned long order) | 361 | static int destroy_compound_page(struct page *page, unsigned long order) |
356 | { | 362 | { |
357 | int i; | 363 | int i; |
@@ -421,18 +427,10 @@ static inline void rmv_page_order(struct page *page) | |||
421 | * | 427 | * |
422 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER | 428 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER |
423 | */ | 429 | */ |
424 | static inline struct page * | ||
425 | __page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order) | ||
426 | { | ||
427 | unsigned long buddy_idx = page_idx ^ (1 << order); | ||
428 | |||
429 | return page + (buddy_idx - page_idx); | ||
430 | } | ||
431 | |||
432 | static inline unsigned long | 430 | static inline unsigned long |
433 | __find_combined_index(unsigned long page_idx, unsigned int order) | 431 | __find_buddy_index(unsigned long page_idx, unsigned int order) |
434 | { | 432 | { |
435 | return (page_idx & ~(1 << order)); | 433 | return page_idx ^ (1 << order); |
436 | } | 434 | } |
437 | 435 | ||
438 | /* | 436 | /* |
@@ -443,8 +441,8 @@ __find_combined_index(unsigned long page_idx, unsigned int order) | |||
443 | * (c) a page and its buddy have the same order && | 441 | * (c) a page and its buddy have the same order && |
444 | * (d) a page and its buddy are in the same zone. | 442 | * (d) a page and its buddy are in the same zone. |
445 | * | 443 | * |
446 | * For recording whether a page is in the buddy system, we use PG_buddy. | 444 | * For recording whether a page is in the buddy system, we set ->_mapcount -2. |
447 | * Setting, clearing, and testing PG_buddy is serialized by zone->lock. | 445 | * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock. |
448 | * | 446 | * |
449 | * For recording page's order, we use page_private(page). | 447 | * For recording page's order, we use page_private(page). |
450 | */ | 448 | */ |
@@ -477,7 +475,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
477 | * as necessary, plus some accounting needed to play nicely with other | 475 | * as necessary, plus some accounting needed to play nicely with other |
478 | * parts of the VM system. | 476 | * parts of the VM system. |
479 | * At each level, we keep a list of pages, which are heads of continuous | 477 | * At each level, we keep a list of pages, which are heads of continuous |
480 | * free pages of length of (1 << order) and marked with PG_buddy. Page's | 478 | * free pages of length of (1 << order) and marked with _mapcount -2. Page's |
481 | * order is recorded in page_private(page) field. | 479 | * order is recorded in page_private(page) field. |
482 | * So when we are allocating or freeing one, we can derive the state of the | 480 | * So when we are allocating or freeing one, we can derive the state of the |
483 | * other. That is, if we allocate a small block, and both were | 481 | * other. That is, if we allocate a small block, and both were |
@@ -494,6 +492,7 @@ static inline void __free_one_page(struct page *page, | |||
494 | { | 492 | { |
495 | unsigned long page_idx; | 493 | unsigned long page_idx; |
496 | unsigned long combined_idx; | 494 | unsigned long combined_idx; |
495 | unsigned long uninitialized_var(buddy_idx); | ||
497 | struct page *buddy; | 496 | struct page *buddy; |
498 | 497 | ||
499 | if (unlikely(PageCompound(page))) | 498 | if (unlikely(PageCompound(page))) |
@@ -508,7 +507,8 @@ static inline void __free_one_page(struct page *page, | |||
508 | VM_BUG_ON(bad_range(zone, page)); | 507 | VM_BUG_ON(bad_range(zone, page)); |
509 | 508 | ||
510 | while (order < MAX_ORDER-1) { | 509 | while (order < MAX_ORDER-1) { |
511 | buddy = __page_find_buddy(page, page_idx, order); | 510 | buddy_idx = __find_buddy_index(page_idx, order); |
511 | buddy = page + (buddy_idx - page_idx); | ||
512 | if (!page_is_buddy(page, buddy, order)) | 512 | if (!page_is_buddy(page, buddy, order)) |
513 | break; | 513 | break; |
514 | 514 | ||
@@ -516,7 +516,7 @@ static inline void __free_one_page(struct page *page, | |||
516 | list_del(&buddy->lru); | 516 | list_del(&buddy->lru); |
517 | zone->free_area[order].nr_free--; | 517 | zone->free_area[order].nr_free--; |
518 | rmv_page_order(buddy); | 518 | rmv_page_order(buddy); |
519 | combined_idx = __find_combined_index(page_idx, order); | 519 | combined_idx = buddy_idx & page_idx; |
520 | page = page + (combined_idx - page_idx); | 520 | page = page + (combined_idx - page_idx); |
521 | page_idx = combined_idx; | 521 | page_idx = combined_idx; |
522 | order++; | 522 | order++; |
@@ -533,9 +533,10 @@ static inline void __free_one_page(struct page *page, | |||
533 | */ | 533 | */ |
534 | if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) { | 534 | if ((order < MAX_ORDER-2) && pfn_valid_within(page_to_pfn(buddy))) { |
535 | struct page *higher_page, *higher_buddy; | 535 | struct page *higher_page, *higher_buddy; |
536 | combined_idx = __find_combined_index(page_idx, order); | 536 | combined_idx = buddy_idx & page_idx; |
537 | higher_page = page + combined_idx - page_idx; | 537 | higher_page = page + (combined_idx - page_idx); |
538 | higher_buddy = __page_find_buddy(higher_page, combined_idx, order + 1); | 538 | buddy_idx = __find_buddy_index(combined_idx, order + 1); |
539 | higher_buddy = page + (buddy_idx - combined_idx); | ||
539 | if (page_is_buddy(higher_page, higher_buddy, order + 1)) { | 540 | if (page_is_buddy(higher_page, higher_buddy, order + 1)) { |
540 | list_add_tail(&page->lru, | 541 | list_add_tail(&page->lru, |
541 | &zone->free_area[order].free_list[migratetype]); | 542 | &zone->free_area[order].free_list[migratetype]); |
@@ -646,13 +647,10 @@ static bool free_pages_prepare(struct page *page, unsigned int order) | |||
646 | trace_mm_page_free_direct(page, order); | 647 | trace_mm_page_free_direct(page, order); |
647 | kmemcheck_free_shadow(page, order); | 648 | kmemcheck_free_shadow(page, order); |
648 | 649 | ||
649 | for (i = 0; i < (1 << order); i++) { | 650 | if (PageAnon(page)) |
650 | struct page *pg = page + i; | 651 | page->mapping = NULL; |
651 | 652 | for (i = 0; i < (1 << order); i++) | |
652 | if (PageAnon(pg)) | 653 | bad += free_pages_check(page + i); |
653 | pg->mapping = NULL; | ||
654 | bad += free_pages_check(pg); | ||
655 | } | ||
656 | if (bad) | 654 | if (bad) |
657 | return false; | 655 | return false; |
658 | 656 | ||
@@ -1090,8 +1088,10 @@ static void drain_pages(unsigned int cpu) | |||
1090 | pset = per_cpu_ptr(zone->pageset, cpu); | 1088 | pset = per_cpu_ptr(zone->pageset, cpu); |
1091 | 1089 | ||
1092 | pcp = &pset->pcp; | 1090 | pcp = &pset->pcp; |
1093 | free_pcppages_bulk(zone, pcp->count, pcp); | 1091 | if (pcp->count) { |
1094 | pcp->count = 0; | 1092 | free_pcppages_bulk(zone, pcp->count, pcp); |
1093 | pcp->count = 0; | ||
1094 | } | ||
1095 | local_irq_restore(flags); | 1095 | local_irq_restore(flags); |
1096 | } | 1096 | } |
1097 | } | 1097 | } |
@@ -1455,24 +1455,24 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) | |||
1455 | #endif /* CONFIG_FAIL_PAGE_ALLOC */ | 1455 | #endif /* CONFIG_FAIL_PAGE_ALLOC */ |
1456 | 1456 | ||
1457 | /* | 1457 | /* |
1458 | * Return 1 if free pages are above 'mark'. This takes into account the order | 1458 | * Return true if free pages are above 'mark'. This takes into account the order |
1459 | * of the allocation. | 1459 | * of the allocation. |
1460 | */ | 1460 | */ |
1461 | int zone_watermark_ok(struct zone *z, int order, unsigned long mark, | 1461 | static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, |
1462 | int classzone_idx, int alloc_flags) | 1462 | int classzone_idx, int alloc_flags, long free_pages) |
1463 | { | 1463 | { |
1464 | /* free_pages my go negative - that's OK */ | 1464 | /* free_pages my go negative - that's OK */ |
1465 | long min = mark; | 1465 | long min = mark; |
1466 | long free_pages = zone_nr_free_pages(z) - (1 << order) + 1; | ||
1467 | int o; | 1466 | int o; |
1468 | 1467 | ||
1468 | free_pages -= (1 << order) + 1; | ||
1469 | if (alloc_flags & ALLOC_HIGH) | 1469 | if (alloc_flags & ALLOC_HIGH) |
1470 | min -= min / 2; | 1470 | min -= min / 2; |
1471 | if (alloc_flags & ALLOC_HARDER) | 1471 | if (alloc_flags & ALLOC_HARDER) |
1472 | min -= min / 4; | 1472 | min -= min / 4; |
1473 | 1473 | ||
1474 | if (free_pages <= min + z->lowmem_reserve[classzone_idx]) | 1474 | if (free_pages <= min + z->lowmem_reserve[classzone_idx]) |
1475 | return 0; | 1475 | return false; |
1476 | for (o = 0; o < order; o++) { | 1476 | for (o = 0; o < order; o++) { |
1477 | /* At the next order, this order's pages become unavailable */ | 1477 | /* At the next order, this order's pages become unavailable */ |
1478 | free_pages -= z->free_area[o].nr_free << o; | 1478 | free_pages -= z->free_area[o].nr_free << o; |
@@ -1481,9 +1481,28 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, | |||
1481 | min >>= 1; | 1481 | min >>= 1; |
1482 | 1482 | ||
1483 | if (free_pages <= min) | 1483 | if (free_pages <= min) |
1484 | return 0; | 1484 | return false; |
1485 | } | 1485 | } |
1486 | return 1; | 1486 | return true; |
1487 | } | ||
1488 | |||
1489 | bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, | ||
1490 | int classzone_idx, int alloc_flags) | ||
1491 | { | ||
1492 | return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, | ||
1493 | zone_page_state(z, NR_FREE_PAGES)); | ||
1494 | } | ||
1495 | |||
1496 | bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, | ||
1497 | int classzone_idx, int alloc_flags) | ||
1498 | { | ||
1499 | long free_pages = zone_page_state(z, NR_FREE_PAGES); | ||
1500 | |||
1501 | if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark) | ||
1502 | free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES); | ||
1503 | |||
1504 | return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, | ||
1505 | free_pages); | ||
1487 | } | 1506 | } |
1488 | 1507 | ||
1489 | #ifdef CONFIG_NUMA | 1508 | #ifdef CONFIG_NUMA |
@@ -1788,15 +1807,18 @@ static struct page * | |||
1788 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | 1807 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, |
1789 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 1808 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
1790 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | 1809 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, |
1791 | int migratetype, unsigned long *did_some_progress) | 1810 | int migratetype, unsigned long *did_some_progress, |
1811 | bool sync_migration) | ||
1792 | { | 1812 | { |
1793 | struct page *page; | 1813 | struct page *page; |
1794 | 1814 | ||
1795 | if (!order || compaction_deferred(preferred_zone)) | 1815 | if (!order || compaction_deferred(preferred_zone)) |
1796 | return NULL; | 1816 | return NULL; |
1797 | 1817 | ||
1818 | current->flags |= PF_MEMALLOC; | ||
1798 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | 1819 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, |
1799 | nodemask); | 1820 | nodemask, sync_migration); |
1821 | current->flags &= ~PF_MEMALLOC; | ||
1800 | if (*did_some_progress != COMPACT_SKIPPED) { | 1822 | if (*did_some_progress != COMPACT_SKIPPED) { |
1801 | 1823 | ||
1802 | /* Page migration frees to the PCP lists but we want merging */ | 1824 | /* Page migration frees to the PCP lists but we want merging */ |
@@ -1832,7 +1854,8 @@ static inline struct page * | |||
1832 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | 1854 | __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, |
1833 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 1855 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
1834 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, | 1856 | nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, |
1835 | int migratetype, unsigned long *did_some_progress) | 1857 | int migratetype, unsigned long *did_some_progress, |
1858 | bool sync_migration) | ||
1836 | { | 1859 | { |
1837 | return NULL; | 1860 | return NULL; |
1838 | } | 1861 | } |
@@ -1847,23 +1870,22 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, | |||
1847 | { | 1870 | { |
1848 | struct page *page = NULL; | 1871 | struct page *page = NULL; |
1849 | struct reclaim_state reclaim_state; | 1872 | struct reclaim_state reclaim_state; |
1850 | struct task_struct *p = current; | ||
1851 | bool drained = false; | 1873 | bool drained = false; |
1852 | 1874 | ||
1853 | cond_resched(); | 1875 | cond_resched(); |
1854 | 1876 | ||
1855 | /* We now go into synchronous reclaim */ | 1877 | /* We now go into synchronous reclaim */ |
1856 | cpuset_memory_pressure_bump(); | 1878 | cpuset_memory_pressure_bump(); |
1857 | p->flags |= PF_MEMALLOC; | 1879 | current->flags |= PF_MEMALLOC; |
1858 | lockdep_set_current_reclaim_state(gfp_mask); | 1880 | lockdep_set_current_reclaim_state(gfp_mask); |
1859 | reclaim_state.reclaimed_slab = 0; | 1881 | reclaim_state.reclaimed_slab = 0; |
1860 | p->reclaim_state = &reclaim_state; | 1882 | current->reclaim_state = &reclaim_state; |
1861 | 1883 | ||
1862 | *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); | 1884 | *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); |
1863 | 1885 | ||
1864 | p->reclaim_state = NULL; | 1886 | current->reclaim_state = NULL; |
1865 | lockdep_clear_current_reclaim_state(); | 1887 | lockdep_clear_current_reclaim_state(); |
1866 | p->flags &= ~PF_MEMALLOC; | 1888 | current->flags &= ~PF_MEMALLOC; |
1867 | 1889 | ||
1868 | cond_resched(); | 1890 | cond_resched(); |
1869 | 1891 | ||
@@ -1915,19 +1937,19 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, | |||
1915 | 1937 | ||
1916 | static inline | 1938 | static inline |
1917 | void wake_all_kswapd(unsigned int order, struct zonelist *zonelist, | 1939 | void wake_all_kswapd(unsigned int order, struct zonelist *zonelist, |
1918 | enum zone_type high_zoneidx) | 1940 | enum zone_type high_zoneidx, |
1941 | enum zone_type classzone_idx) | ||
1919 | { | 1942 | { |
1920 | struct zoneref *z; | 1943 | struct zoneref *z; |
1921 | struct zone *zone; | 1944 | struct zone *zone; |
1922 | 1945 | ||
1923 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) | 1946 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) |
1924 | wakeup_kswapd(zone, order); | 1947 | wakeup_kswapd(zone, order, classzone_idx); |
1925 | } | 1948 | } |
1926 | 1949 | ||
1927 | static inline int | 1950 | static inline int |
1928 | gfp_to_alloc_flags(gfp_t gfp_mask) | 1951 | gfp_to_alloc_flags(gfp_t gfp_mask) |
1929 | { | 1952 | { |
1930 | struct task_struct *p = current; | ||
1931 | int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; | 1953 | int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; |
1932 | const gfp_t wait = gfp_mask & __GFP_WAIT; | 1954 | const gfp_t wait = gfp_mask & __GFP_WAIT; |
1933 | 1955 | ||
@@ -1943,18 +1965,23 @@ gfp_to_alloc_flags(gfp_t gfp_mask) | |||
1943 | alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); | 1965 | alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); |
1944 | 1966 | ||
1945 | if (!wait) { | 1967 | if (!wait) { |
1946 | alloc_flags |= ALLOC_HARDER; | 1968 | /* |
1969 | * Not worth trying to allocate harder for | ||
1970 | * __GFP_NOMEMALLOC even if it can't schedule. | ||
1971 | */ | ||
1972 | if (!(gfp_mask & __GFP_NOMEMALLOC)) | ||
1973 | alloc_flags |= ALLOC_HARDER; | ||
1947 | /* | 1974 | /* |
1948 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. | 1975 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. |
1949 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 1976 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
1950 | */ | 1977 | */ |
1951 | alloc_flags &= ~ALLOC_CPUSET; | 1978 | alloc_flags &= ~ALLOC_CPUSET; |
1952 | } else if (unlikely(rt_task(p)) && !in_interrupt()) | 1979 | } else if (unlikely(rt_task(current)) && !in_interrupt()) |
1953 | alloc_flags |= ALLOC_HARDER; | 1980 | alloc_flags |= ALLOC_HARDER; |
1954 | 1981 | ||
1955 | if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { | 1982 | if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { |
1956 | if (!in_interrupt() && | 1983 | if (!in_interrupt() && |
1957 | ((p->flags & PF_MEMALLOC) || | 1984 | ((current->flags & PF_MEMALLOC) || |
1958 | unlikely(test_thread_flag(TIF_MEMDIE)))) | 1985 | unlikely(test_thread_flag(TIF_MEMDIE)))) |
1959 | alloc_flags |= ALLOC_NO_WATERMARKS; | 1986 | alloc_flags |= ALLOC_NO_WATERMARKS; |
1960 | } | 1987 | } |
@@ -1973,7 +2000,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
1973 | int alloc_flags; | 2000 | int alloc_flags; |
1974 | unsigned long pages_reclaimed = 0; | 2001 | unsigned long pages_reclaimed = 0; |
1975 | unsigned long did_some_progress; | 2002 | unsigned long did_some_progress; |
1976 | struct task_struct *p = current; | 2003 | bool sync_migration = false; |
1977 | 2004 | ||
1978 | /* | 2005 | /* |
1979 | * In the slowpath, we sanity check order to avoid ever trying to | 2006 | * In the slowpath, we sanity check order to avoid ever trying to |
@@ -1998,7 +2025,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
1998 | goto nopage; | 2025 | goto nopage; |
1999 | 2026 | ||
2000 | restart: | 2027 | restart: |
2001 | wake_all_kswapd(order, zonelist, high_zoneidx); | 2028 | if (!(gfp_mask & __GFP_NO_KSWAPD)) |
2029 | wake_all_kswapd(order, zonelist, high_zoneidx, | ||
2030 | zone_idx(preferred_zone)); | ||
2002 | 2031 | ||
2003 | /* | 2032 | /* |
2004 | * OK, we're below the kswapd watermark and have kicked background | 2033 | * OK, we're below the kswapd watermark and have kicked background |
@@ -2007,6 +2036,14 @@ restart: | |||
2007 | */ | 2036 | */ |
2008 | alloc_flags = gfp_to_alloc_flags(gfp_mask); | 2037 | alloc_flags = gfp_to_alloc_flags(gfp_mask); |
2009 | 2038 | ||
2039 | /* | ||
2040 | * Find the true preferred zone if the allocation is unconstrained by | ||
2041 | * cpusets. | ||
2042 | */ | ||
2043 | if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) | ||
2044 | first_zones_zonelist(zonelist, high_zoneidx, NULL, | ||
2045 | &preferred_zone); | ||
2046 | |||
2010 | /* This is the last chance, in general, before the goto nopage. */ | 2047 | /* This is the last chance, in general, before the goto nopage. */ |
2011 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, | 2048 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, |
2012 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, | 2049 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, |
@@ -2029,21 +2066,26 @@ rebalance: | |||
2029 | goto nopage; | 2066 | goto nopage; |
2030 | 2067 | ||
2031 | /* Avoid recursion of direct reclaim */ | 2068 | /* Avoid recursion of direct reclaim */ |
2032 | if (p->flags & PF_MEMALLOC) | 2069 | if (current->flags & PF_MEMALLOC) |
2033 | goto nopage; | 2070 | goto nopage; |
2034 | 2071 | ||
2035 | /* Avoid allocations with no watermarks from looping endlessly */ | 2072 | /* Avoid allocations with no watermarks from looping endlessly */ |
2036 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) | 2073 | if (test_thread_flag(TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL)) |
2037 | goto nopage; | 2074 | goto nopage; |
2038 | 2075 | ||
2039 | /* Try direct compaction */ | 2076 | /* |
2077 | * Try direct compaction. The first pass is asynchronous. Subsequent | ||
2078 | * attempts after direct reclaim are synchronous | ||
2079 | */ | ||
2040 | page = __alloc_pages_direct_compact(gfp_mask, order, | 2080 | page = __alloc_pages_direct_compact(gfp_mask, order, |
2041 | zonelist, high_zoneidx, | 2081 | zonelist, high_zoneidx, |
2042 | nodemask, | 2082 | nodemask, |
2043 | alloc_flags, preferred_zone, | 2083 | alloc_flags, preferred_zone, |
2044 | migratetype, &did_some_progress); | 2084 | migratetype, &did_some_progress, |
2085 | sync_migration); | ||
2045 | if (page) | 2086 | if (page) |
2046 | goto got_pg; | 2087 | goto got_pg; |
2088 | sync_migration = true; | ||
2047 | 2089 | ||
2048 | /* Try direct reclaim and then allocating */ | 2090 | /* Try direct reclaim and then allocating */ |
2049 | page = __alloc_pages_direct_reclaim(gfp_mask, order, | 2091 | page = __alloc_pages_direct_reclaim(gfp_mask, order, |
@@ -2097,13 +2139,27 @@ rebalance: | |||
2097 | /* Wait for some write requests to complete then retry */ | 2139 | /* Wait for some write requests to complete then retry */ |
2098 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); | 2140 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); |
2099 | goto rebalance; | 2141 | goto rebalance; |
2142 | } else { | ||
2143 | /* | ||
2144 | * High-order allocations do not necessarily loop after | ||
2145 | * direct reclaim and reclaim/compaction depends on compaction | ||
2146 | * being called after reclaim so call directly if necessary | ||
2147 | */ | ||
2148 | page = __alloc_pages_direct_compact(gfp_mask, order, | ||
2149 | zonelist, high_zoneidx, | ||
2150 | nodemask, | ||
2151 | alloc_flags, preferred_zone, | ||
2152 | migratetype, &did_some_progress, | ||
2153 | sync_migration); | ||
2154 | if (page) | ||
2155 | goto got_pg; | ||
2100 | } | 2156 | } |
2101 | 2157 | ||
2102 | nopage: | 2158 | nopage: |
2103 | if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { | 2159 | if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { |
2104 | printk(KERN_WARNING "%s: page allocation failure." | 2160 | printk(KERN_WARNING "%s: page allocation failure." |
2105 | " order:%d, mode:0x%x\n", | 2161 | " order:%d, mode:0x%x\n", |
2106 | p->comm, order, gfp_mask); | 2162 | current->comm, order, gfp_mask); |
2107 | dump_stack(); | 2163 | dump_stack(); |
2108 | show_mem(); | 2164 | show_mem(); |
2109 | } | 2165 | } |
@@ -2146,7 +2202,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2146 | 2202 | ||
2147 | get_mems_allowed(); | 2203 | get_mems_allowed(); |
2148 | /* The preferred zone is used for statistics later */ | 2204 | /* The preferred zone is used for statistics later */ |
2149 | first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); | 2205 | first_zones_zonelist(zonelist, high_zoneidx, |
2206 | nodemask ? : &cpuset_current_mems_allowed, | ||
2207 | &preferred_zone); | ||
2150 | if (!preferred_zone) { | 2208 | if (!preferred_zone) { |
2151 | put_mems_allowed(); | 2209 | put_mems_allowed(); |
2152 | return NULL; | 2210 | return NULL; |
@@ -2437,7 +2495,7 @@ void show_free_areas(void) | |||
2437 | " all_unreclaimable? %s" | 2495 | " all_unreclaimable? %s" |
2438 | "\n", | 2496 | "\n", |
2439 | zone->name, | 2497 | zone->name, |
2440 | K(zone_nr_free_pages(zone)), | 2498 | K(zone_page_state(zone, NR_FREE_PAGES)), |
2441 | K(min_wmark_pages(zone)), | 2499 | K(min_wmark_pages(zone)), |
2442 | K(low_wmark_pages(zone)), | 2500 | K(low_wmark_pages(zone)), |
2443 | K(high_wmark_pages(zone)), | 2501 | K(high_wmark_pages(zone)), |
@@ -2580,9 +2638,16 @@ static int __parse_numa_zonelist_order(char *s) | |||
2580 | 2638 | ||
2581 | static __init int setup_numa_zonelist_order(char *s) | 2639 | static __init int setup_numa_zonelist_order(char *s) |
2582 | { | 2640 | { |
2583 | if (s) | 2641 | int ret; |
2584 | return __parse_numa_zonelist_order(s); | 2642 | |
2585 | return 0; | 2643 | if (!s) |
2644 | return 0; | ||
2645 | |||
2646 | ret = __parse_numa_zonelist_order(s); | ||
2647 | if (ret == 0) | ||
2648 | strlcpy(numa_zonelist_order, s, NUMA_ZONELIST_ORDER_LEN); | ||
2649 | |||
2650 | return ret; | ||
2586 | } | 2651 | } |
2587 | early_param("numa_zonelist_order", setup_numa_zonelist_order); | 2652 | early_param("numa_zonelist_order", setup_numa_zonelist_order); |
2588 | 2653 | ||
@@ -3008,14 +3073,6 @@ static __init_refok int __build_all_zonelists(void *data) | |||
3008 | build_zonelist_cache(pgdat); | 3073 | build_zonelist_cache(pgdat); |
3009 | } | 3074 | } |
3010 | 3075 | ||
3011 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
3012 | /* Setup real pagesets for the new zone */ | ||
3013 | if (data) { | ||
3014 | struct zone *zone = data; | ||
3015 | setup_zone_pageset(zone); | ||
3016 | } | ||
3017 | #endif | ||
3018 | |||
3019 | /* | 3076 | /* |
3020 | * Initialize the boot_pagesets that are going to be used | 3077 | * Initialize the boot_pagesets that are going to be used |
3021 | * for bootstrapping processors. The real pagesets for | 3078 | * for bootstrapping processors. The real pagesets for |
@@ -3064,7 +3121,11 @@ void build_all_zonelists(void *data) | |||
3064 | } else { | 3121 | } else { |
3065 | /* we have to stop all cpus to guarantee there is no user | 3122 | /* we have to stop all cpus to guarantee there is no user |
3066 | of zonelist */ | 3123 | of zonelist */ |
3067 | stop_machine(__build_all_zonelists, data, NULL); | 3124 | #ifdef CONFIG_MEMORY_HOTPLUG |
3125 | if (data) | ||
3126 | setup_zone_pageset((struct zone *)data); | ||
3127 | #endif | ||
3128 | stop_machine(__build_all_zonelists, NULL, NULL); | ||
3068 | /* cpuset refresh routine should be here */ | 3129 | /* cpuset refresh routine should be here */ |
3069 | } | 3130 | } |
3070 | vm_total_pages = nr_free_pagecache_pages(); | 3131 | vm_total_pages = nr_free_pagecache_pages(); |
@@ -4045,7 +4106,7 @@ static void __init setup_usemap(struct pglist_data *pgdat, | |||
4045 | zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); | 4106 | zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); |
4046 | } | 4107 | } |
4047 | #else | 4108 | #else |
4048 | static void inline setup_usemap(struct pglist_data *pgdat, | 4109 | static inline void setup_usemap(struct pglist_data *pgdat, |
4049 | struct zone *zone, unsigned long zonesize) {} | 4110 | struct zone *zone, unsigned long zonesize) {} |
4050 | #endif /* CONFIG_SPARSEMEM */ | 4111 | #endif /* CONFIG_SPARSEMEM */ |
4051 | 4112 | ||
@@ -5548,7 +5609,6 @@ static struct trace_print_flags pageflag_names[] = { | |||
5548 | {1UL << PG_swapcache, "swapcache" }, | 5609 | {1UL << PG_swapcache, "swapcache" }, |
5549 | {1UL << PG_mappedtodisk, "mappedtodisk" }, | 5610 | {1UL << PG_mappedtodisk, "mappedtodisk" }, |
5550 | {1UL << PG_reclaim, "reclaim" }, | 5611 | {1UL << PG_reclaim, "reclaim" }, |
5551 | {1UL << PG_buddy, "buddy" }, | ||
5552 | {1UL << PG_swapbacked, "swapbacked" }, | 5612 | {1UL << PG_swapbacked, "swapbacked" }, |
5553 | {1UL << PG_unevictable, "unevictable" }, | 5613 | {1UL << PG_unevictable, "unevictable" }, |
5554 | #ifdef CONFIG_MMU | 5614 | #ifdef CONFIG_MMU |
@@ -5596,7 +5656,7 @@ void dump_page(struct page *page) | |||
5596 | { | 5656 | { |
5597 | printk(KERN_ALERT | 5657 | printk(KERN_ALERT |
5598 | "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", | 5658 | "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", |
5599 | page, page_count(page), page_mapcount(page), | 5659 | page, atomic_read(&page->_count), page_mapcount(page), |
5600 | page->mapping, page->index); | 5660 | page->mapping, page->index); |
5601 | dump_page_flags(page->flags); | 5661 | dump_page_flags(page->flags); |
5602 | } | 5662 | } |