aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c280
1 files changed, 158 insertions, 122 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f3d603cef2c0..f9e450c6b6e4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -65,6 +65,7 @@
65#include <linux/page_owner.h> 65#include <linux/page_owner.h>
66#include <linux/kthread.h> 66#include <linux/kthread.h>
67#include <linux/memcontrol.h> 67#include <linux/memcontrol.h>
68#include <linux/ftrace.h>
68 69
69#include <asm/sections.h> 70#include <asm/sections.h>
70#include <asm/tlbflush.h> 71#include <asm/tlbflush.h>
@@ -1090,14 +1091,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
1090{ 1091{
1091 int migratetype = 0; 1092 int migratetype = 0;
1092 int batch_free = 0; 1093 int batch_free = 0;
1093 unsigned long nr_scanned, flags;
1094 bool isolated_pageblocks; 1094 bool isolated_pageblocks;
1095 1095
1096 spin_lock_irqsave(&zone->lock, flags); 1096 spin_lock(&zone->lock);
1097 isolated_pageblocks = has_isolate_pageblock(zone); 1097 isolated_pageblocks = has_isolate_pageblock(zone);
1098 nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
1099 if (nr_scanned)
1100 __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
1101 1098
1102 while (count) { 1099 while (count) {
1103 struct page *page; 1100 struct page *page;
@@ -1142,7 +1139,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
1142 trace_mm_page_pcpu_drain(page, 0, mt); 1139 trace_mm_page_pcpu_drain(page, 0, mt);
1143 } while (--count && --batch_free && !list_empty(list)); 1140 } while (--count && --batch_free && !list_empty(list));
1144 } 1141 }
1145 spin_unlock_irqrestore(&zone->lock, flags); 1142 spin_unlock(&zone->lock);
1146} 1143}
1147 1144
1148static void free_one_page(struct zone *zone, 1145static void free_one_page(struct zone *zone,
@@ -1150,19 +1147,13 @@ static void free_one_page(struct zone *zone,
1150 unsigned int order, 1147 unsigned int order,
1151 int migratetype) 1148 int migratetype)
1152{ 1149{
1153 unsigned long nr_scanned, flags; 1150 spin_lock(&zone->lock);
1154 spin_lock_irqsave(&zone->lock, flags);
1155 __count_vm_events(PGFREE, 1 << order);
1156 nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
1157 if (nr_scanned)
1158 __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
1159
1160 if (unlikely(has_isolate_pageblock(zone) || 1151 if (unlikely(has_isolate_pageblock(zone) ||
1161 is_migrate_isolate(migratetype))) { 1152 is_migrate_isolate(migratetype))) {
1162 migratetype = get_pfnblock_migratetype(page, pfn); 1153 migratetype = get_pfnblock_migratetype(page, pfn);
1163 } 1154 }
1164 __free_one_page(page, pfn, zone, order, migratetype); 1155 __free_one_page(page, pfn, zone, order, migratetype);
1165 spin_unlock_irqrestore(&zone->lock, flags); 1156 spin_unlock(&zone->lock);
1166} 1157}
1167 1158
1168static void __meminit __init_single_page(struct page *page, unsigned long pfn, 1159static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -1240,6 +1231,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
1240 1231
1241static void __free_pages_ok(struct page *page, unsigned int order) 1232static void __free_pages_ok(struct page *page, unsigned int order)
1242{ 1233{
1234 unsigned long flags;
1243 int migratetype; 1235 int migratetype;
1244 unsigned long pfn = page_to_pfn(page); 1236 unsigned long pfn = page_to_pfn(page);
1245 1237
@@ -1247,7 +1239,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
1247 return; 1239 return;
1248 1240
1249 migratetype = get_pfnblock_migratetype(page, pfn); 1241 migratetype = get_pfnblock_migratetype(page, pfn);
1242 local_irq_save(flags);
1243 __count_vm_events(PGFREE, 1 << order);
1250 free_one_page(page_zone(page), page, pfn, order, migratetype); 1244 free_one_page(page_zone(page), page, pfn, order, migratetype);
1245 local_irq_restore(flags);
1251} 1246}
1252 1247
1253static void __init __free_pages_boot_core(struct page *page, unsigned int order) 1248static void __init __free_pages_boot_core(struct page *page, unsigned int order)
@@ -1695,10 +1690,10 @@ static inline int check_new_page(struct page *page)
1695 return 1; 1690 return 1;
1696} 1691}
1697 1692
1698static inline bool free_pages_prezeroed(bool poisoned) 1693static inline bool free_pages_prezeroed(void)
1699{ 1694{
1700 return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && 1695 return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
1701 page_poisoning_enabled() && poisoned; 1696 page_poisoning_enabled();
1702} 1697}
1703 1698
1704#ifdef CONFIG_DEBUG_VM 1699#ifdef CONFIG_DEBUG_VM
@@ -1752,17 +1747,10 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags
1752 unsigned int alloc_flags) 1747 unsigned int alloc_flags)
1753{ 1748{
1754 int i; 1749 int i;
1755 bool poisoned = true;
1756
1757 for (i = 0; i < (1 << order); i++) {
1758 struct page *p = page + i;
1759 if (poisoned)
1760 poisoned &= page_is_poisoned(p);
1761 }
1762 1750
1763 post_alloc_hook(page, order, gfp_flags); 1751 post_alloc_hook(page, order, gfp_flags);
1764 1752
1765 if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO)) 1753 if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO))
1766 for (i = 0; i < (1 << order); i++) 1754 for (i = 0; i < (1 << order); i++)
1767 clear_highpage(page + i); 1755 clear_highpage(page + i);
1768 1756
@@ -1844,9 +1832,9 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
1844 * Note that start_page and end_pages are not aligned on a pageblock 1832 * Note that start_page and end_pages are not aligned on a pageblock
1845 * boundary. If alignment is required, use move_freepages_block() 1833 * boundary. If alignment is required, use move_freepages_block()
1846 */ 1834 */
1847int move_freepages(struct zone *zone, 1835static int move_freepages(struct zone *zone,
1848 struct page *start_page, struct page *end_page, 1836 struct page *start_page, struct page *end_page,
1849 int migratetype) 1837 int migratetype, int *num_movable)
1850{ 1838{
1851 struct page *page; 1839 struct page *page;
1852 unsigned int order; 1840 unsigned int order;
@@ -1863,6 +1851,9 @@ int move_freepages(struct zone *zone,
1863 VM_BUG_ON(page_zone(start_page) != page_zone(end_page)); 1851 VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
1864#endif 1852#endif
1865 1853
1854 if (num_movable)
1855 *num_movable = 0;
1856
1866 for (page = start_page; page <= end_page;) { 1857 for (page = start_page; page <= end_page;) {
1867 if (!pfn_valid_within(page_to_pfn(page))) { 1858 if (!pfn_valid_within(page_to_pfn(page))) {
1868 page++; 1859 page++;
@@ -1873,6 +1864,15 @@ int move_freepages(struct zone *zone,
1873 VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); 1864 VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
1874 1865
1875 if (!PageBuddy(page)) { 1866 if (!PageBuddy(page)) {
1867 /*
1868 * We assume that pages that could be isolated for
1869 * migration are movable. But we don't actually try
1870 * isolating, as that would be expensive.
1871 */
1872 if (num_movable &&
1873 (PageLRU(page) || __PageMovable(page)))
1874 (*num_movable)++;
1875
1876 page++; 1876 page++;
1877 continue; 1877 continue;
1878 } 1878 }
@@ -1888,7 +1888,7 @@ int move_freepages(struct zone *zone,
1888} 1888}
1889 1889
1890int move_freepages_block(struct zone *zone, struct page *page, 1890int move_freepages_block(struct zone *zone, struct page *page,
1891 int migratetype) 1891 int migratetype, int *num_movable)
1892{ 1892{
1893 unsigned long start_pfn, end_pfn; 1893 unsigned long start_pfn, end_pfn;
1894 struct page *start_page, *end_page; 1894 struct page *start_page, *end_page;
@@ -1905,7 +1905,8 @@ int move_freepages_block(struct zone *zone, struct page *page,
1905 if (!zone_spans_pfn(zone, end_pfn)) 1905 if (!zone_spans_pfn(zone, end_pfn))
1906 return 0; 1906 return 0;
1907 1907
1908 return move_freepages(zone, start_page, end_page, migratetype); 1908 return move_freepages(zone, start_page, end_page, migratetype,
1909 num_movable);
1909} 1910}
1910 1911
1911static void change_pageblock_range(struct page *pageblock_page, 1912static void change_pageblock_range(struct page *pageblock_page,
@@ -1955,28 +1956,79 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
1955/* 1956/*
1956 * This function implements actual steal behaviour. If order is large enough, 1957 * This function implements actual steal behaviour. If order is large enough,
1957 * we can steal whole pageblock. If not, we first move freepages in this 1958 * we can steal whole pageblock. If not, we first move freepages in this
1958 * pageblock and check whether half of pages are moved or not. If half of 1959 * pageblock to our migratetype and determine how many already-allocated pages
1959 * pages are moved, we can change migratetype of pageblock and permanently 1960 * are there in the pageblock with a compatible migratetype. If at least half
1960 * use it's pages as requested migratetype in the future. 1961 * of pages are free or compatible, we can change migratetype of the pageblock
1962 * itself, so pages freed in the future will be put on the correct free list.
1961 */ 1963 */
1962static void steal_suitable_fallback(struct zone *zone, struct page *page, 1964static void steal_suitable_fallback(struct zone *zone, struct page *page,
1963 int start_type) 1965 int start_type, bool whole_block)
1964{ 1966{
1965 unsigned int current_order = page_order(page); 1967 unsigned int current_order = page_order(page);
1966 int pages; 1968 struct free_area *area;
1969 int free_pages, movable_pages, alike_pages;
1970 int old_block_type;
1971
1972 old_block_type = get_pageblock_migratetype(page);
1973
1974 /*
1975 * This can happen due to races and we want to prevent broken
1976 * highatomic accounting.
1977 */
1978 if (is_migrate_highatomic(old_block_type))
1979 goto single_page;
1967 1980
1968 /* Take ownership for orders >= pageblock_order */ 1981 /* Take ownership for orders >= pageblock_order */
1969 if (current_order >= pageblock_order) { 1982 if (current_order >= pageblock_order) {
1970 change_pageblock_range(page, current_order, start_type); 1983 change_pageblock_range(page, current_order, start_type);
1971 return; 1984 goto single_page;
1985 }
1986
1987 /* We are not allowed to try stealing from the whole block */
1988 if (!whole_block)
1989 goto single_page;
1990
1991 free_pages = move_freepages_block(zone, page, start_type,
1992 &movable_pages);
1993 /*
1994 * Determine how many pages are compatible with our allocation.
1995 * For movable allocation, it's the number of movable pages which
1996 * we just obtained. For other types it's a bit more tricky.
1997 */
1998 if (start_type == MIGRATE_MOVABLE) {
1999 alike_pages = movable_pages;
2000 } else {
2001 /*
2002 * If we are falling back a RECLAIMABLE or UNMOVABLE allocation
2003 * to MOVABLE pageblock, consider all non-movable pages as
2004 * compatible. If it's UNMOVABLE falling back to RECLAIMABLE or
2005 * vice versa, be conservative since we can't distinguish the
2006 * exact migratetype of non-movable pages.
2007 */
2008 if (old_block_type == MIGRATE_MOVABLE)
2009 alike_pages = pageblock_nr_pages
2010 - (free_pages + movable_pages);
2011 else
2012 alike_pages = 0;
1972 } 2013 }
1973 2014
1974 pages = move_freepages_block(zone, page, start_type); 2015 /* moving whole block can fail due to zone boundary conditions */
2016 if (!free_pages)
2017 goto single_page;
1975 2018
1976 /* Claim the whole block if over half of it is free */ 2019 /*
1977 if (pages >= (1 << (pageblock_order-1)) || 2020 * If a sufficient number of pages in the block are either free or of
2021 * comparable migratability as our allocation, claim the whole block.
2022 */
2023 if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
1978 page_group_by_mobility_disabled) 2024 page_group_by_mobility_disabled)
1979 set_pageblock_migratetype(page, start_type); 2025 set_pageblock_migratetype(page, start_type);
2026
2027 return;
2028
2029single_page:
2030 area = &zone->free_area[current_order];
2031 list_move(&page->lru, &area->free_list[start_type]);
1980} 2032}
1981 2033
1982/* 2034/*
@@ -2042,11 +2094,11 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
2042 2094
2043 /* Yoink! */ 2095 /* Yoink! */
2044 mt = get_pageblock_migratetype(page); 2096 mt = get_pageblock_migratetype(page);
2045 if (mt != MIGRATE_HIGHATOMIC && 2097 if (!is_migrate_highatomic(mt) && !is_migrate_isolate(mt)
2046 !is_migrate_isolate(mt) && !is_migrate_cma(mt)) { 2098 && !is_migrate_cma(mt)) {
2047 zone->nr_reserved_highatomic += pageblock_nr_pages; 2099 zone->nr_reserved_highatomic += pageblock_nr_pages;
2048 set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); 2100 set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
2049 move_freepages_block(zone, page, MIGRATE_HIGHATOMIC); 2101 move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL);
2050 } 2102 }
2051 2103
2052out_unlock: 2104out_unlock:
@@ -2100,8 +2152,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
2100 * from highatomic to ac->migratetype. So we should 2152 * from highatomic to ac->migratetype. So we should
2101 * adjust the count once. 2153 * adjust the count once.
2102 */ 2154 */
2103 if (get_pageblock_migratetype(page) == 2155 if (is_migrate_highatomic_page(page)) {
2104 MIGRATE_HIGHATOMIC) {
2105 /* 2156 /*
2106 * It should never happen but changes to 2157 * It should never happen but changes to
2107 * locking could inadvertently allow a per-cpu 2158 * locking could inadvertently allow a per-cpu
@@ -2124,7 +2175,8 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
2124 * may increase. 2175 * may increase.
2125 */ 2176 */
2126 set_pageblock_migratetype(page, ac->migratetype); 2177 set_pageblock_migratetype(page, ac->migratetype);
2127 ret = move_freepages_block(zone, page, ac->migratetype); 2178 ret = move_freepages_block(zone, page, ac->migratetype,
2179 NULL);
2128 if (ret) { 2180 if (ret) {
2129 spin_unlock_irqrestore(&zone->lock, flags); 2181 spin_unlock_irqrestore(&zone->lock, flags);
2130 return ret; 2182 return ret;
@@ -2136,8 +2188,13 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
2136 return false; 2188 return false;
2137} 2189}
2138 2190
2139/* Remove an element from the buddy allocator from the fallback list */ 2191/*
2140static inline struct page * 2192 * Try finding a free buddy page on the fallback list and put it on the free
2193 * list of requested migratetype, possibly along with other pages from the same
2194 * block, depending on fragmentation avoidance heuristics. Returns true if
2195 * fallback was found so that __rmqueue_smallest() can grab it.
2196 */
2197static inline bool
2141__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) 2198__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
2142{ 2199{
2143 struct free_area *area; 2200 struct free_area *area;
@@ -2158,33 +2215,17 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
2158 2215
2159 page = list_first_entry(&area->free_list[fallback_mt], 2216 page = list_first_entry(&area->free_list[fallback_mt],
2160 struct page, lru); 2217 struct page, lru);
2161 if (can_steal &&
2162 get_pageblock_migratetype(page) != MIGRATE_HIGHATOMIC)
2163 steal_suitable_fallback(zone, page, start_migratetype);
2164
2165 /* Remove the page from the freelists */
2166 area->nr_free--;
2167 list_del(&page->lru);
2168 rmv_page_order(page);
2169 2218
2170 expand(zone, page, order, current_order, area, 2219 steal_suitable_fallback(zone, page, start_migratetype,
2171 start_migratetype); 2220 can_steal);
2172 /*
2173 * The pcppage_migratetype may differ from pageblock's
2174 * migratetype depending on the decisions in
2175 * find_suitable_fallback(). This is OK as long as it does not
2176 * differ for MIGRATE_CMA pageblocks. Those can be used as
2177 * fallback only via special __rmqueue_cma_fallback() function
2178 */
2179 set_pcppage_migratetype(page, start_migratetype);
2180 2221
2181 trace_mm_page_alloc_extfrag(page, order, current_order, 2222 trace_mm_page_alloc_extfrag(page, order, current_order,
2182 start_migratetype, fallback_mt); 2223 start_migratetype, fallback_mt);
2183 2224
2184 return page; 2225 return true;
2185 } 2226 }
2186 2227
2187 return NULL; 2228 return false;
2188} 2229}
2189 2230
2190/* 2231/*
@@ -2196,13 +2237,14 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
2196{ 2237{
2197 struct page *page; 2238 struct page *page;
2198 2239
2240retry:
2199 page = __rmqueue_smallest(zone, order, migratetype); 2241 page = __rmqueue_smallest(zone, order, migratetype);
2200 if (unlikely(!page)) { 2242 if (unlikely(!page)) {
2201 if (migratetype == MIGRATE_MOVABLE) 2243 if (migratetype == MIGRATE_MOVABLE)
2202 page = __rmqueue_cma_fallback(zone, order); 2244 page = __rmqueue_cma_fallback(zone, order);
2203 2245
2204 if (!page) 2246 if (!page && __rmqueue_fallback(zone, order, migratetype))
2205 page = __rmqueue_fallback(zone, order, migratetype); 2247 goto retry;
2206 } 2248 }
2207 2249
2208 trace_mm_page_alloc_zone_locked(page, order, migratetype); 2250 trace_mm_page_alloc_zone_locked(page, order, migratetype);
@@ -2219,9 +2261,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
2219 int migratetype, bool cold) 2261 int migratetype, bool cold)
2220{ 2262{
2221 int i, alloced = 0; 2263 int i, alloced = 0;
2222 unsigned long flags;
2223 2264
2224 spin_lock_irqsave(&zone->lock, flags); 2265 spin_lock(&zone->lock);
2225 for (i = 0; i < count; ++i) { 2266 for (i = 0; i < count; ++i) {
2226 struct page *page = __rmqueue(zone, order, migratetype); 2267 struct page *page = __rmqueue(zone, order, migratetype);
2227 if (unlikely(page == NULL)) 2268 if (unlikely(page == NULL))
@@ -2257,7 +2298,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
2257 * pages added to the pcp list. 2298 * pages added to the pcp list.
2258 */ 2299 */
2259 __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); 2300 __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
2260 spin_unlock_irqrestore(&zone->lock, flags); 2301 spin_unlock(&zone->lock);
2261 return alloced; 2302 return alloced;
2262} 2303}
2263 2304
@@ -2485,25 +2526,22 @@ void free_hot_cold_page(struct page *page, bool cold)
2485{ 2526{
2486 struct zone *zone = page_zone(page); 2527 struct zone *zone = page_zone(page);
2487 struct per_cpu_pages *pcp; 2528 struct per_cpu_pages *pcp;
2529 unsigned long flags;
2488 unsigned long pfn = page_to_pfn(page); 2530 unsigned long pfn = page_to_pfn(page);
2489 int migratetype; 2531 int migratetype;
2490 2532
2491 if (in_interrupt()) {
2492 __free_pages_ok(page, 0);
2493 return;
2494 }
2495
2496 if (!free_pcp_prepare(page)) 2533 if (!free_pcp_prepare(page))
2497 return; 2534 return;
2498 2535
2499 migratetype = get_pfnblock_migratetype(page, pfn); 2536 migratetype = get_pfnblock_migratetype(page, pfn);
2500 set_pcppage_migratetype(page, migratetype); 2537 set_pcppage_migratetype(page, migratetype);
2501 preempt_disable(); 2538 local_irq_save(flags);
2539 __count_vm_event(PGFREE);
2502 2540
2503 /* 2541 /*
2504 * We only track unmovable, reclaimable and movable on pcp lists. 2542 * We only track unmovable, reclaimable and movable on pcp lists.
2505 * Free ISOLATE pages back to the allocator because they are being 2543 * Free ISOLATE pages back to the allocator because they are being
2506 * offlined but treat RESERVE as movable pages so we can get those 2544 * offlined but treat HIGHATOMIC as movable pages so we can get those
2507 * areas back if necessary. Otherwise, we may have to free 2545 * areas back if necessary. Otherwise, we may have to free
2508 * excessively into the page allocator 2546 * excessively into the page allocator
2509 */ 2547 */
@@ -2515,7 +2553,6 @@ void free_hot_cold_page(struct page *page, bool cold)
2515 migratetype = MIGRATE_MOVABLE; 2553 migratetype = MIGRATE_MOVABLE;
2516 } 2554 }
2517 2555
2518 __count_vm_event(PGFREE);
2519 pcp = &this_cpu_ptr(zone->pageset)->pcp; 2556 pcp = &this_cpu_ptr(zone->pageset)->pcp;
2520 if (!cold) 2557 if (!cold)
2521 list_add(&page->lru, &pcp->lists[migratetype]); 2558 list_add(&page->lru, &pcp->lists[migratetype]);
@@ -2529,7 +2566,7 @@ void free_hot_cold_page(struct page *page, bool cold)
2529 } 2566 }
2530 2567
2531out: 2568out:
2532 preempt_enable(); 2569 local_irq_restore(flags);
2533} 2570}
2534 2571
2535/* 2572/*
@@ -2614,7 +2651,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
2614 for (; page < endpage; page += pageblock_nr_pages) { 2651 for (; page < endpage; page += pageblock_nr_pages) {
2615 int mt = get_pageblock_migratetype(page); 2652 int mt = get_pageblock_migratetype(page);
2616 if (!is_migrate_isolate(mt) && !is_migrate_cma(mt) 2653 if (!is_migrate_isolate(mt) && !is_migrate_cma(mt)
2617 && mt != MIGRATE_HIGHATOMIC) 2654 && !is_migrate_highatomic(mt))
2618 set_pageblock_migratetype(page, 2655 set_pageblock_migratetype(page,
2619 MIGRATE_MOVABLE); 2656 MIGRATE_MOVABLE);
2620 } 2657 }
@@ -2654,8 +2691,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
2654{ 2691{
2655 struct page *page; 2692 struct page *page;
2656 2693
2657 VM_BUG_ON(in_interrupt());
2658
2659 do { 2694 do {
2660 if (list_empty(list)) { 2695 if (list_empty(list)) {
2661 pcp->count += rmqueue_bulk(zone, 0, 2696 pcp->count += rmqueue_bulk(zone, 0,
@@ -2686,8 +2721,9 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
2686 struct list_head *list; 2721 struct list_head *list;
2687 bool cold = ((gfp_flags & __GFP_COLD) != 0); 2722 bool cold = ((gfp_flags & __GFP_COLD) != 0);
2688 struct page *page; 2723 struct page *page;
2724 unsigned long flags;
2689 2725
2690 preempt_disable(); 2726 local_irq_save(flags);
2691 pcp = &this_cpu_ptr(zone->pageset)->pcp; 2727 pcp = &this_cpu_ptr(zone->pageset)->pcp;
2692 list = &pcp->lists[migratetype]; 2728 list = &pcp->lists[migratetype];
2693 page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); 2729 page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
@@ -2695,7 +2731,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
2695 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); 2731 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
2696 zone_statistics(preferred_zone, zone); 2732 zone_statistics(preferred_zone, zone);
2697 } 2733 }
2698 preempt_enable(); 2734 local_irq_restore(flags);
2699 return page; 2735 return page;
2700} 2736}
2701 2737
@@ -2711,7 +2747,7 @@ struct page *rmqueue(struct zone *preferred_zone,
2711 unsigned long flags; 2747 unsigned long flags;
2712 struct page *page; 2748 struct page *page;
2713 2749
2714 if (likely(order == 0) && !in_interrupt()) { 2750 if (likely(order == 0)) {
2715 page = rmqueue_pcplist(preferred_zone, zone, order, 2751 page = rmqueue_pcplist(preferred_zone, zone, order,
2716 gfp_flags, migratetype); 2752 gfp_flags, migratetype);
2717 goto out; 2753 goto out;
@@ -3113,8 +3149,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
3113 static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, 3149 static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL,
3114 DEFAULT_RATELIMIT_BURST); 3150 DEFAULT_RATELIMIT_BURST);
3115 3151
3116 if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || 3152 if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
3117 debug_guardpage_minorder() > 0)
3118 return; 3153 return;
3119 3154
3120 pr_warn("%s: ", current->comm); 3155 pr_warn("%s: ", current->comm);
@@ -3248,14 +3283,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
3248 enum compact_priority prio, enum compact_result *compact_result) 3283 enum compact_priority prio, enum compact_result *compact_result)
3249{ 3284{
3250 struct page *page; 3285 struct page *page;
3286 unsigned int noreclaim_flag;
3251 3287
3252 if (!order) 3288 if (!order)
3253 return NULL; 3289 return NULL;
3254 3290
3255 current->flags |= PF_MEMALLOC; 3291 noreclaim_flag = memalloc_noreclaim_save();
3256 *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, 3292 *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
3257 prio); 3293 prio);
3258 current->flags &= ~PF_MEMALLOC; 3294 memalloc_noreclaim_restore(noreclaim_flag);
3259 3295
3260 if (*compact_result <= COMPACT_INACTIVE) 3296 if (*compact_result <= COMPACT_INACTIVE)
3261 return NULL; 3297 return NULL;
@@ -3402,12 +3438,13 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
3402{ 3438{
3403 struct reclaim_state reclaim_state; 3439 struct reclaim_state reclaim_state;
3404 int progress; 3440 int progress;
3441 unsigned int noreclaim_flag;
3405 3442
3406 cond_resched(); 3443 cond_resched();
3407 3444
3408 /* We now go into synchronous reclaim */ 3445 /* We now go into synchronous reclaim */
3409 cpuset_memory_pressure_bump(); 3446 cpuset_memory_pressure_bump();
3410 current->flags |= PF_MEMALLOC; 3447 noreclaim_flag = memalloc_noreclaim_save();
3411 lockdep_set_current_reclaim_state(gfp_mask); 3448 lockdep_set_current_reclaim_state(gfp_mask);
3412 reclaim_state.reclaimed_slab = 0; 3449 reclaim_state.reclaimed_slab = 0;
3413 current->reclaim_state = &reclaim_state; 3450 current->reclaim_state = &reclaim_state;
@@ -3417,7 +3454,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
3417 3454
3418 current->reclaim_state = NULL; 3455 current->reclaim_state = NULL;
3419 lockdep_clear_current_reclaim_state(); 3456 lockdep_clear_current_reclaim_state();
3420 current->flags &= ~PF_MEMALLOC; 3457 memalloc_noreclaim_restore(noreclaim_flag);
3421 3458
3422 cond_resched(); 3459 cond_resched();
3423 3460
@@ -3525,19 +3562,12 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
3525} 3562}
3526 3563
3527/* 3564/*
3528 * Maximum number of reclaim retries without any progress before OOM killer
3529 * is consider as the only way to move forward.
3530 */
3531#define MAX_RECLAIM_RETRIES 16
3532
3533/*
3534 * Checks whether it makes sense to retry the reclaim to make a forward progress 3565 * Checks whether it makes sense to retry the reclaim to make a forward progress
3535 * for the given allocation request. 3566 * for the given allocation request.
3536 * The reclaim feedback represented by did_some_progress (any progress during 3567 *
3537 * the last reclaim round) and no_progress_loops (number of reclaim rounds without 3568 * We give up when we either have tried MAX_RECLAIM_RETRIES in a row
3538 * any progress in a row) is considered as well as the reclaimable pages on the 3569 * without success, or when we couldn't even meet the watermark if we
3539 * applicable zone list (with a backoff mechanism which is a function of 3570 * reclaimed all remaining pages on the LRU lists.
3540 * no_progress_loops).
3541 * 3571 *
3542 * Returns true if a retry is viable or false to enter the oom path. 3572 * Returns true if a retry is viable or false to enter the oom path.
3543 */ 3573 */
@@ -3582,13 +3612,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
3582 bool wmark; 3612 bool wmark;
3583 3613
3584 available = reclaimable = zone_reclaimable_pages(zone); 3614 available = reclaimable = zone_reclaimable_pages(zone);
3585 available -= DIV_ROUND_UP((*no_progress_loops) * available,
3586 MAX_RECLAIM_RETRIES);
3587 available += zone_page_state_snapshot(zone, NR_FREE_PAGES); 3615 available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
3588 3616
3589 /* 3617 /*
3590 * Would the allocation succeed if we reclaimed the whole 3618 * Would the allocation succeed if we reclaimed all
3591 * available? 3619 * reclaimable pages?
3592 */ 3620 */
3593 wmark = __zone_watermark_ok(zone, order, min_wmark, 3621 wmark = __zone_watermark_ok(zone, order, min_wmark,
3594 ac_classzone_idx(ac), alloc_flags, available); 3622 ac_classzone_idx(ac), alloc_flags, available);
@@ -3639,6 +3667,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3639 struct alloc_context *ac) 3667 struct alloc_context *ac)
3640{ 3668{
3641 bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; 3669 bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
3670 const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
3642 struct page *page = NULL; 3671 struct page *page = NULL;
3643 unsigned int alloc_flags; 3672 unsigned int alloc_flags;
3644 unsigned long did_some_progress; 3673 unsigned long did_some_progress;
@@ -3706,12 +3735,17 @@ retry_cpuset:
3706 3735
3707 /* 3736 /*
3708 * For costly allocations, try direct compaction first, as it's likely 3737 * For costly allocations, try direct compaction first, as it's likely
3709 * that we have enough base pages and don't need to reclaim. Don't try 3738 * that we have enough base pages and don't need to reclaim. For non-
3710 * that for allocations that are allowed to ignore watermarks, as the 3739 * movable high-order allocations, do that as well, as compaction will
3711 * ALLOC_NO_WATERMARKS attempt didn't yet happen. 3740 * try prevent permanent fragmentation by migrating from blocks of the
3741 * same migratetype.
3742 * Don't try this for allocations that are allowed to ignore
3743 * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
3712 */ 3744 */
3713 if (can_direct_reclaim && order > PAGE_ALLOC_COSTLY_ORDER && 3745 if (can_direct_reclaim &&
3714 !gfp_pfmemalloc_allowed(gfp_mask)) { 3746 (costly_order ||
3747 (order > 0 && ac->migratetype != MIGRATE_MOVABLE))
3748 && !gfp_pfmemalloc_allowed(gfp_mask)) {
3715 page = __alloc_pages_direct_compact(gfp_mask, order, 3749 page = __alloc_pages_direct_compact(gfp_mask, order,
3716 alloc_flags, ac, 3750 alloc_flags, ac,
3717 INIT_COMPACT_PRIORITY, 3751 INIT_COMPACT_PRIORITY,
@@ -3723,7 +3757,7 @@ retry_cpuset:
3723 * Checks for costly allocations with __GFP_NORETRY, which 3757 * Checks for costly allocations with __GFP_NORETRY, which
3724 * includes THP page fault allocations 3758 * includes THP page fault allocations
3725 */ 3759 */
3726 if (gfp_mask & __GFP_NORETRY) { 3760 if (costly_order && (gfp_mask & __GFP_NORETRY)) {
3727 /* 3761 /*
3728 * If compaction is deferred for high-order allocations, 3762 * If compaction is deferred for high-order allocations,
3729 * it is because sync compaction recently failed. If 3763 * it is because sync compaction recently failed. If
@@ -3774,7 +3808,7 @@ retry:
3774 3808
3775 /* Make sure we know about allocations which stall for too long */ 3809 /* Make sure we know about allocations which stall for too long */
3776 if (time_after(jiffies, alloc_start + stall_timeout)) { 3810 if (time_after(jiffies, alloc_start + stall_timeout)) {
3777 warn_alloc(gfp_mask, ac->nodemask, 3811 warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask,
3778 "page allocation stalls for %ums, order:%u", 3812 "page allocation stalls for %ums, order:%u",
3779 jiffies_to_msecs(jiffies-alloc_start), order); 3813 jiffies_to_msecs(jiffies-alloc_start), order);
3780 stall_timeout += 10 * HZ; 3814 stall_timeout += 10 * HZ;
@@ -3804,7 +3838,7 @@ retry:
3804 * Do not retry costly high order allocations unless they are 3838 * Do not retry costly high order allocations unless they are
3805 * __GFP_REPEAT 3839 * __GFP_REPEAT
3806 */ 3840 */
3807 if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT)) 3841 if (costly_order && !(gfp_mask & __GFP_REPEAT))
3808 goto nopage; 3842 goto nopage;
3809 3843
3810 if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, 3844 if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
@@ -3974,10 +4008,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3974 goto out; 4008 goto out;
3975 4009
3976 /* 4010 /*
3977 * Runtime PM, block IO and its error handling path can deadlock 4011 * Apply scoped allocation constraints. This is mainly about GFP_NOFS
3978 * because I/O on the device might not complete. 4012 * resp. GFP_NOIO which has to be inherited for all allocation requests
4013 * from a particular context which has been marked by
4014 * memalloc_no{fs,io}_{save,restore}.
3979 */ 4015 */
3980 alloc_mask = memalloc_noio_flags(gfp_mask); 4016 alloc_mask = current_gfp_context(gfp_mask);
3981 ac.spread_dirty_pages = false; 4017 ac.spread_dirty_pages = false;
3982 4018
3983 /* 4019 /*
@@ -4250,7 +4286,8 @@ EXPORT_SYMBOL(free_pages_exact);
4250 * nr_free_zone_pages() counts the number of counts pages which are beyond the 4286 * nr_free_zone_pages() counts the number of counts pages which are beyond the
4251 * high watermark within all zones at or below a given zone index. For each 4287 * high watermark within all zones at or below a given zone index. For each
4252 * zone, the number of pages is calculated as: 4288 * zone, the number of pages is calculated as:
4253 * managed_pages - high_pages 4289 *
4290 * nr_free_zone_pages = managed_pages - high_pages
4254 */ 4291 */
4255static unsigned long nr_free_zone_pages(int offset) 4292static unsigned long nr_free_zone_pages(int offset)
4256{ 4293{
@@ -4512,7 +4549,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
4512#endif 4549#endif
4513 " writeback_tmp:%lukB" 4550 " writeback_tmp:%lukB"
4514 " unstable:%lukB" 4551 " unstable:%lukB"
4515 " pages_scanned:%lu"
4516 " all_unreclaimable? %s" 4552 " all_unreclaimable? %s"
4517 "\n", 4553 "\n",
4518 pgdat->node_id, 4554 pgdat->node_id,
@@ -4535,8 +4571,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
4535#endif 4571#endif
4536 K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), 4572 K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
4537 K(node_page_state(pgdat, NR_UNSTABLE_NFS)), 4573 K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
4538 node_page_state(pgdat, NR_PAGES_SCANNED), 4574 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
4539 !pgdat_reclaimable(pgdat) ? "yes" : "no"); 4575 "yes" : "no");
4540 } 4576 }
4541 4577
4542 for_each_populated_zone(zone) { 4578 for_each_populated_zone(zone) {
@@ -7431,7 +7467,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
7431 .zone = page_zone(pfn_to_page(start)), 7467 .zone = page_zone(pfn_to_page(start)),
7432 .mode = MIGRATE_SYNC, 7468 .mode = MIGRATE_SYNC,
7433 .ignore_skip_hint = true, 7469 .ignore_skip_hint = true,
7434 .gfp_mask = memalloc_noio_flags(gfp_mask), 7470 .gfp_mask = current_gfp_context(gfp_mask),
7435 }; 7471 };
7436 INIT_LIST_HEAD(&cc.migratepages); 7472 INIT_LIST_HEAD(&cc.migratepages);
7437 7473