diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 280 |
1 files changed, 158 insertions, 122 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f3d603cef2c0..f9e450c6b6e4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -65,6 +65,7 @@ | |||
65 | #include <linux/page_owner.h> | 65 | #include <linux/page_owner.h> |
66 | #include <linux/kthread.h> | 66 | #include <linux/kthread.h> |
67 | #include <linux/memcontrol.h> | 67 | #include <linux/memcontrol.h> |
68 | #include <linux/ftrace.h> | ||
68 | 69 | ||
69 | #include <asm/sections.h> | 70 | #include <asm/sections.h> |
70 | #include <asm/tlbflush.h> | 71 | #include <asm/tlbflush.h> |
@@ -1090,14 +1091,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
1090 | { | 1091 | { |
1091 | int migratetype = 0; | 1092 | int migratetype = 0; |
1092 | int batch_free = 0; | 1093 | int batch_free = 0; |
1093 | unsigned long nr_scanned, flags; | ||
1094 | bool isolated_pageblocks; | 1094 | bool isolated_pageblocks; |
1095 | 1095 | ||
1096 | spin_lock_irqsave(&zone->lock, flags); | 1096 | spin_lock(&zone->lock); |
1097 | isolated_pageblocks = has_isolate_pageblock(zone); | 1097 | isolated_pageblocks = has_isolate_pageblock(zone); |
1098 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | ||
1099 | if (nr_scanned) | ||
1100 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | ||
1101 | 1098 | ||
1102 | while (count) { | 1099 | while (count) { |
1103 | struct page *page; | 1100 | struct page *page; |
@@ -1142,7 +1139,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
1142 | trace_mm_page_pcpu_drain(page, 0, mt); | 1139 | trace_mm_page_pcpu_drain(page, 0, mt); |
1143 | } while (--count && --batch_free && !list_empty(list)); | 1140 | } while (--count && --batch_free && !list_empty(list)); |
1144 | } | 1141 | } |
1145 | spin_unlock_irqrestore(&zone->lock, flags); | 1142 | spin_unlock(&zone->lock); |
1146 | } | 1143 | } |
1147 | 1144 | ||
1148 | static void free_one_page(struct zone *zone, | 1145 | static void free_one_page(struct zone *zone, |
@@ -1150,19 +1147,13 @@ static void free_one_page(struct zone *zone, | |||
1150 | unsigned int order, | 1147 | unsigned int order, |
1151 | int migratetype) | 1148 | int migratetype) |
1152 | { | 1149 | { |
1153 | unsigned long nr_scanned, flags; | 1150 | spin_lock(&zone->lock); |
1154 | spin_lock_irqsave(&zone->lock, flags); | ||
1155 | __count_vm_events(PGFREE, 1 << order); | ||
1156 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | ||
1157 | if (nr_scanned) | ||
1158 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | ||
1159 | |||
1160 | if (unlikely(has_isolate_pageblock(zone) || | 1151 | if (unlikely(has_isolate_pageblock(zone) || |
1161 | is_migrate_isolate(migratetype))) { | 1152 | is_migrate_isolate(migratetype))) { |
1162 | migratetype = get_pfnblock_migratetype(page, pfn); | 1153 | migratetype = get_pfnblock_migratetype(page, pfn); |
1163 | } | 1154 | } |
1164 | __free_one_page(page, pfn, zone, order, migratetype); | 1155 | __free_one_page(page, pfn, zone, order, migratetype); |
1165 | spin_unlock_irqrestore(&zone->lock, flags); | 1156 | spin_unlock(&zone->lock); |
1166 | } | 1157 | } |
1167 | 1158 | ||
1168 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | 1159 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, |
@@ -1240,6 +1231,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end) | |||
1240 | 1231 | ||
1241 | static void __free_pages_ok(struct page *page, unsigned int order) | 1232 | static void __free_pages_ok(struct page *page, unsigned int order) |
1242 | { | 1233 | { |
1234 | unsigned long flags; | ||
1243 | int migratetype; | 1235 | int migratetype; |
1244 | unsigned long pfn = page_to_pfn(page); | 1236 | unsigned long pfn = page_to_pfn(page); |
1245 | 1237 | ||
@@ -1247,7 +1239,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
1247 | return; | 1239 | return; |
1248 | 1240 | ||
1249 | migratetype = get_pfnblock_migratetype(page, pfn); | 1241 | migratetype = get_pfnblock_migratetype(page, pfn); |
1242 | local_irq_save(flags); | ||
1243 | __count_vm_events(PGFREE, 1 << order); | ||
1250 | free_one_page(page_zone(page), page, pfn, order, migratetype); | 1244 | free_one_page(page_zone(page), page, pfn, order, migratetype); |
1245 | local_irq_restore(flags); | ||
1251 | } | 1246 | } |
1252 | 1247 | ||
1253 | static void __init __free_pages_boot_core(struct page *page, unsigned int order) | 1248 | static void __init __free_pages_boot_core(struct page *page, unsigned int order) |
@@ -1695,10 +1690,10 @@ static inline int check_new_page(struct page *page) | |||
1695 | return 1; | 1690 | return 1; |
1696 | } | 1691 | } |
1697 | 1692 | ||
1698 | static inline bool free_pages_prezeroed(bool poisoned) | 1693 | static inline bool free_pages_prezeroed(void) |
1699 | { | 1694 | { |
1700 | return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && | 1695 | return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && |
1701 | page_poisoning_enabled() && poisoned; | 1696 | page_poisoning_enabled(); |
1702 | } | 1697 | } |
1703 | 1698 | ||
1704 | #ifdef CONFIG_DEBUG_VM | 1699 | #ifdef CONFIG_DEBUG_VM |
@@ -1752,17 +1747,10 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags | |||
1752 | unsigned int alloc_flags) | 1747 | unsigned int alloc_flags) |
1753 | { | 1748 | { |
1754 | int i; | 1749 | int i; |
1755 | bool poisoned = true; | ||
1756 | |||
1757 | for (i = 0; i < (1 << order); i++) { | ||
1758 | struct page *p = page + i; | ||
1759 | if (poisoned) | ||
1760 | poisoned &= page_is_poisoned(p); | ||
1761 | } | ||
1762 | 1750 | ||
1763 | post_alloc_hook(page, order, gfp_flags); | 1751 | post_alloc_hook(page, order, gfp_flags); |
1764 | 1752 | ||
1765 | if (!free_pages_prezeroed(poisoned) && (gfp_flags & __GFP_ZERO)) | 1753 | if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO)) |
1766 | for (i = 0; i < (1 << order); i++) | 1754 | for (i = 0; i < (1 << order); i++) |
1767 | clear_highpage(page + i); | 1755 | clear_highpage(page + i); |
1768 | 1756 | ||
@@ -1844,9 +1832,9 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, | |||
1844 | * Note that start_page and end_pages are not aligned on a pageblock | 1832 | * Note that start_page and end_pages are not aligned on a pageblock |
1845 | * boundary. If alignment is required, use move_freepages_block() | 1833 | * boundary. If alignment is required, use move_freepages_block() |
1846 | */ | 1834 | */ |
1847 | int move_freepages(struct zone *zone, | 1835 | static int move_freepages(struct zone *zone, |
1848 | struct page *start_page, struct page *end_page, | 1836 | struct page *start_page, struct page *end_page, |
1849 | int migratetype) | 1837 | int migratetype, int *num_movable) |
1850 | { | 1838 | { |
1851 | struct page *page; | 1839 | struct page *page; |
1852 | unsigned int order; | 1840 | unsigned int order; |
@@ -1863,6 +1851,9 @@ int move_freepages(struct zone *zone, | |||
1863 | VM_BUG_ON(page_zone(start_page) != page_zone(end_page)); | 1851 | VM_BUG_ON(page_zone(start_page) != page_zone(end_page)); |
1864 | #endif | 1852 | #endif |
1865 | 1853 | ||
1854 | if (num_movable) | ||
1855 | *num_movable = 0; | ||
1856 | |||
1866 | for (page = start_page; page <= end_page;) { | 1857 | for (page = start_page; page <= end_page;) { |
1867 | if (!pfn_valid_within(page_to_pfn(page))) { | 1858 | if (!pfn_valid_within(page_to_pfn(page))) { |
1868 | page++; | 1859 | page++; |
@@ -1873,6 +1864,15 @@ int move_freepages(struct zone *zone, | |||
1873 | VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); | 1864 | VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); |
1874 | 1865 | ||
1875 | if (!PageBuddy(page)) { | 1866 | if (!PageBuddy(page)) { |
1867 | /* | ||
1868 | * We assume that pages that could be isolated for | ||
1869 | * migration are movable. But we don't actually try | ||
1870 | * isolating, as that would be expensive. | ||
1871 | */ | ||
1872 | if (num_movable && | ||
1873 | (PageLRU(page) || __PageMovable(page))) | ||
1874 | (*num_movable)++; | ||
1875 | |||
1876 | page++; | 1876 | page++; |
1877 | continue; | 1877 | continue; |
1878 | } | 1878 | } |
@@ -1888,7 +1888,7 @@ int move_freepages(struct zone *zone, | |||
1888 | } | 1888 | } |
1889 | 1889 | ||
1890 | int move_freepages_block(struct zone *zone, struct page *page, | 1890 | int move_freepages_block(struct zone *zone, struct page *page, |
1891 | int migratetype) | 1891 | int migratetype, int *num_movable) |
1892 | { | 1892 | { |
1893 | unsigned long start_pfn, end_pfn; | 1893 | unsigned long start_pfn, end_pfn; |
1894 | struct page *start_page, *end_page; | 1894 | struct page *start_page, *end_page; |
@@ -1905,7 +1905,8 @@ int move_freepages_block(struct zone *zone, struct page *page, | |||
1905 | if (!zone_spans_pfn(zone, end_pfn)) | 1905 | if (!zone_spans_pfn(zone, end_pfn)) |
1906 | return 0; | 1906 | return 0; |
1907 | 1907 | ||
1908 | return move_freepages(zone, start_page, end_page, migratetype); | 1908 | return move_freepages(zone, start_page, end_page, migratetype, |
1909 | num_movable); | ||
1909 | } | 1910 | } |
1910 | 1911 | ||
1911 | static void change_pageblock_range(struct page *pageblock_page, | 1912 | static void change_pageblock_range(struct page *pageblock_page, |
@@ -1955,28 +1956,79 @@ static bool can_steal_fallback(unsigned int order, int start_mt) | |||
1955 | /* | 1956 | /* |
1956 | * This function implements actual steal behaviour. If order is large enough, | 1957 | * This function implements actual steal behaviour. If order is large enough, |
1957 | * we can steal whole pageblock. If not, we first move freepages in this | 1958 | * we can steal whole pageblock. If not, we first move freepages in this |
1958 | * pageblock and check whether half of pages are moved or not. If half of | 1959 | * pageblock to our migratetype and determine how many already-allocated pages |
1959 | * pages are moved, we can change migratetype of pageblock and permanently | 1960 | * are there in the pageblock with a compatible migratetype. If at least half |
1960 | * use it's pages as requested migratetype in the future. | 1961 | * of pages are free or compatible, we can change migratetype of the pageblock |
1962 | * itself, so pages freed in the future will be put on the correct free list. | ||
1961 | */ | 1963 | */ |
1962 | static void steal_suitable_fallback(struct zone *zone, struct page *page, | 1964 | static void steal_suitable_fallback(struct zone *zone, struct page *page, |
1963 | int start_type) | 1965 | int start_type, bool whole_block) |
1964 | { | 1966 | { |
1965 | unsigned int current_order = page_order(page); | 1967 | unsigned int current_order = page_order(page); |
1966 | int pages; | 1968 | struct free_area *area; |
1969 | int free_pages, movable_pages, alike_pages; | ||
1970 | int old_block_type; | ||
1971 | |||
1972 | old_block_type = get_pageblock_migratetype(page); | ||
1973 | |||
1974 | /* | ||
1975 | * This can happen due to races and we want to prevent broken | ||
1976 | * highatomic accounting. | ||
1977 | */ | ||
1978 | if (is_migrate_highatomic(old_block_type)) | ||
1979 | goto single_page; | ||
1967 | 1980 | ||
1968 | /* Take ownership for orders >= pageblock_order */ | 1981 | /* Take ownership for orders >= pageblock_order */ |
1969 | if (current_order >= pageblock_order) { | 1982 | if (current_order >= pageblock_order) { |
1970 | change_pageblock_range(page, current_order, start_type); | 1983 | change_pageblock_range(page, current_order, start_type); |
1971 | return; | 1984 | goto single_page; |
1985 | } | ||
1986 | |||
1987 | /* We are not allowed to try stealing from the whole block */ | ||
1988 | if (!whole_block) | ||
1989 | goto single_page; | ||
1990 | |||
1991 | free_pages = move_freepages_block(zone, page, start_type, | ||
1992 | &movable_pages); | ||
1993 | /* | ||
1994 | * Determine how many pages are compatible with our allocation. | ||
1995 | * For movable allocation, it's the number of movable pages which | ||
1996 | * we just obtained. For other types it's a bit more tricky. | ||
1997 | */ | ||
1998 | if (start_type == MIGRATE_MOVABLE) { | ||
1999 | alike_pages = movable_pages; | ||
2000 | } else { | ||
2001 | /* | ||
2002 | * If we are falling back a RECLAIMABLE or UNMOVABLE allocation | ||
2003 | * to MOVABLE pageblock, consider all non-movable pages as | ||
2004 | * compatible. If it's UNMOVABLE falling back to RECLAIMABLE or | ||
2005 | * vice versa, be conservative since we can't distinguish the | ||
2006 | * exact migratetype of non-movable pages. | ||
2007 | */ | ||
2008 | if (old_block_type == MIGRATE_MOVABLE) | ||
2009 | alike_pages = pageblock_nr_pages | ||
2010 | - (free_pages + movable_pages); | ||
2011 | else | ||
2012 | alike_pages = 0; | ||
1972 | } | 2013 | } |
1973 | 2014 | ||
1974 | pages = move_freepages_block(zone, page, start_type); | 2015 | /* moving whole block can fail due to zone boundary conditions */ |
2016 | if (!free_pages) | ||
2017 | goto single_page; | ||
1975 | 2018 | ||
1976 | /* Claim the whole block if over half of it is free */ | 2019 | /* |
1977 | if (pages >= (1 << (pageblock_order-1)) || | 2020 | * If a sufficient number of pages in the block are either free or of |
2021 | * comparable migratability as our allocation, claim the whole block. | ||
2022 | */ | ||
2023 | if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || | ||
1978 | page_group_by_mobility_disabled) | 2024 | page_group_by_mobility_disabled) |
1979 | set_pageblock_migratetype(page, start_type); | 2025 | set_pageblock_migratetype(page, start_type); |
2026 | |||
2027 | return; | ||
2028 | |||
2029 | single_page: | ||
2030 | area = &zone->free_area[current_order]; | ||
2031 | list_move(&page->lru, &area->free_list[start_type]); | ||
1980 | } | 2032 | } |
1981 | 2033 | ||
1982 | /* | 2034 | /* |
@@ -2042,11 +2094,11 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, | |||
2042 | 2094 | ||
2043 | /* Yoink! */ | 2095 | /* Yoink! */ |
2044 | mt = get_pageblock_migratetype(page); | 2096 | mt = get_pageblock_migratetype(page); |
2045 | if (mt != MIGRATE_HIGHATOMIC && | 2097 | if (!is_migrate_highatomic(mt) && !is_migrate_isolate(mt) |
2046 | !is_migrate_isolate(mt) && !is_migrate_cma(mt)) { | 2098 | && !is_migrate_cma(mt)) { |
2047 | zone->nr_reserved_highatomic += pageblock_nr_pages; | 2099 | zone->nr_reserved_highatomic += pageblock_nr_pages; |
2048 | set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); | 2100 | set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); |
2049 | move_freepages_block(zone, page, MIGRATE_HIGHATOMIC); | 2101 | move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL); |
2050 | } | 2102 | } |
2051 | 2103 | ||
2052 | out_unlock: | 2104 | out_unlock: |
@@ -2100,8 +2152,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, | |||
2100 | * from highatomic to ac->migratetype. So we should | 2152 | * from highatomic to ac->migratetype. So we should |
2101 | * adjust the count once. | 2153 | * adjust the count once. |
2102 | */ | 2154 | */ |
2103 | if (get_pageblock_migratetype(page) == | 2155 | if (is_migrate_highatomic_page(page)) { |
2104 | MIGRATE_HIGHATOMIC) { | ||
2105 | /* | 2156 | /* |
2106 | * It should never happen but changes to | 2157 | * It should never happen but changes to |
2107 | * locking could inadvertently allow a per-cpu | 2158 | * locking could inadvertently allow a per-cpu |
@@ -2124,7 +2175,8 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, | |||
2124 | * may increase. | 2175 | * may increase. |
2125 | */ | 2176 | */ |
2126 | set_pageblock_migratetype(page, ac->migratetype); | 2177 | set_pageblock_migratetype(page, ac->migratetype); |
2127 | ret = move_freepages_block(zone, page, ac->migratetype); | 2178 | ret = move_freepages_block(zone, page, ac->migratetype, |
2179 | NULL); | ||
2128 | if (ret) { | 2180 | if (ret) { |
2129 | spin_unlock_irqrestore(&zone->lock, flags); | 2181 | spin_unlock_irqrestore(&zone->lock, flags); |
2130 | return ret; | 2182 | return ret; |
@@ -2136,8 +2188,13 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, | |||
2136 | return false; | 2188 | return false; |
2137 | } | 2189 | } |
2138 | 2190 | ||
2139 | /* Remove an element from the buddy allocator from the fallback list */ | 2191 | /* |
2140 | static inline struct page * | 2192 | * Try finding a free buddy page on the fallback list and put it on the free |
2193 | * list of requested migratetype, possibly along with other pages from the same | ||
2194 | * block, depending on fragmentation avoidance heuristics. Returns true if | ||
2195 | * fallback was found so that __rmqueue_smallest() can grab it. | ||
2196 | */ | ||
2197 | static inline bool | ||
2141 | __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) | 2198 | __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) |
2142 | { | 2199 | { |
2143 | struct free_area *area; | 2200 | struct free_area *area; |
@@ -2158,33 +2215,17 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) | |||
2158 | 2215 | ||
2159 | page = list_first_entry(&area->free_list[fallback_mt], | 2216 | page = list_first_entry(&area->free_list[fallback_mt], |
2160 | struct page, lru); | 2217 | struct page, lru); |
2161 | if (can_steal && | ||
2162 | get_pageblock_migratetype(page) != MIGRATE_HIGHATOMIC) | ||
2163 | steal_suitable_fallback(zone, page, start_migratetype); | ||
2164 | |||
2165 | /* Remove the page from the freelists */ | ||
2166 | area->nr_free--; | ||
2167 | list_del(&page->lru); | ||
2168 | rmv_page_order(page); | ||
2169 | 2218 | ||
2170 | expand(zone, page, order, current_order, area, | 2219 | steal_suitable_fallback(zone, page, start_migratetype, |
2171 | start_migratetype); | 2220 | can_steal); |
2172 | /* | ||
2173 | * The pcppage_migratetype may differ from pageblock's | ||
2174 | * migratetype depending on the decisions in | ||
2175 | * find_suitable_fallback(). This is OK as long as it does not | ||
2176 | * differ for MIGRATE_CMA pageblocks. Those can be used as | ||
2177 | * fallback only via special __rmqueue_cma_fallback() function | ||
2178 | */ | ||
2179 | set_pcppage_migratetype(page, start_migratetype); | ||
2180 | 2221 | ||
2181 | trace_mm_page_alloc_extfrag(page, order, current_order, | 2222 | trace_mm_page_alloc_extfrag(page, order, current_order, |
2182 | start_migratetype, fallback_mt); | 2223 | start_migratetype, fallback_mt); |
2183 | 2224 | ||
2184 | return page; | 2225 | return true; |
2185 | } | 2226 | } |
2186 | 2227 | ||
2187 | return NULL; | 2228 | return false; |
2188 | } | 2229 | } |
2189 | 2230 | ||
2190 | /* | 2231 | /* |
@@ -2196,13 +2237,14 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order, | |||
2196 | { | 2237 | { |
2197 | struct page *page; | 2238 | struct page *page; |
2198 | 2239 | ||
2240 | retry: | ||
2199 | page = __rmqueue_smallest(zone, order, migratetype); | 2241 | page = __rmqueue_smallest(zone, order, migratetype); |
2200 | if (unlikely(!page)) { | 2242 | if (unlikely(!page)) { |
2201 | if (migratetype == MIGRATE_MOVABLE) | 2243 | if (migratetype == MIGRATE_MOVABLE) |
2202 | page = __rmqueue_cma_fallback(zone, order); | 2244 | page = __rmqueue_cma_fallback(zone, order); |
2203 | 2245 | ||
2204 | if (!page) | 2246 | if (!page && __rmqueue_fallback(zone, order, migratetype)) |
2205 | page = __rmqueue_fallback(zone, order, migratetype); | 2247 | goto retry; |
2206 | } | 2248 | } |
2207 | 2249 | ||
2208 | trace_mm_page_alloc_zone_locked(page, order, migratetype); | 2250 | trace_mm_page_alloc_zone_locked(page, order, migratetype); |
@@ -2219,9 +2261,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
2219 | int migratetype, bool cold) | 2261 | int migratetype, bool cold) |
2220 | { | 2262 | { |
2221 | int i, alloced = 0; | 2263 | int i, alloced = 0; |
2222 | unsigned long flags; | ||
2223 | 2264 | ||
2224 | spin_lock_irqsave(&zone->lock, flags); | 2265 | spin_lock(&zone->lock); |
2225 | for (i = 0; i < count; ++i) { | 2266 | for (i = 0; i < count; ++i) { |
2226 | struct page *page = __rmqueue(zone, order, migratetype); | 2267 | struct page *page = __rmqueue(zone, order, migratetype); |
2227 | if (unlikely(page == NULL)) | 2268 | if (unlikely(page == NULL)) |
@@ -2257,7 +2298,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
2257 | * pages added to the pcp list. | 2298 | * pages added to the pcp list. |
2258 | */ | 2299 | */ |
2259 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); | 2300 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); |
2260 | spin_unlock_irqrestore(&zone->lock, flags); | 2301 | spin_unlock(&zone->lock); |
2261 | return alloced; | 2302 | return alloced; |
2262 | } | 2303 | } |
2263 | 2304 | ||
@@ -2485,25 +2526,22 @@ void free_hot_cold_page(struct page *page, bool cold) | |||
2485 | { | 2526 | { |
2486 | struct zone *zone = page_zone(page); | 2527 | struct zone *zone = page_zone(page); |
2487 | struct per_cpu_pages *pcp; | 2528 | struct per_cpu_pages *pcp; |
2529 | unsigned long flags; | ||
2488 | unsigned long pfn = page_to_pfn(page); | 2530 | unsigned long pfn = page_to_pfn(page); |
2489 | int migratetype; | 2531 | int migratetype; |
2490 | 2532 | ||
2491 | if (in_interrupt()) { | ||
2492 | __free_pages_ok(page, 0); | ||
2493 | return; | ||
2494 | } | ||
2495 | |||
2496 | if (!free_pcp_prepare(page)) | 2533 | if (!free_pcp_prepare(page)) |
2497 | return; | 2534 | return; |
2498 | 2535 | ||
2499 | migratetype = get_pfnblock_migratetype(page, pfn); | 2536 | migratetype = get_pfnblock_migratetype(page, pfn); |
2500 | set_pcppage_migratetype(page, migratetype); | 2537 | set_pcppage_migratetype(page, migratetype); |
2501 | preempt_disable(); | 2538 | local_irq_save(flags); |
2539 | __count_vm_event(PGFREE); | ||
2502 | 2540 | ||
2503 | /* | 2541 | /* |
2504 | * We only track unmovable, reclaimable and movable on pcp lists. | 2542 | * We only track unmovable, reclaimable and movable on pcp lists. |
2505 | * Free ISOLATE pages back to the allocator because they are being | 2543 | * Free ISOLATE pages back to the allocator because they are being |
2506 | * offlined but treat RESERVE as movable pages so we can get those | 2544 | * offlined but treat HIGHATOMIC as movable pages so we can get those |
2507 | * areas back if necessary. Otherwise, we may have to free | 2545 | * areas back if necessary. Otherwise, we may have to free |
2508 | * excessively into the page allocator | 2546 | * excessively into the page allocator |
2509 | */ | 2547 | */ |
@@ -2515,7 +2553,6 @@ void free_hot_cold_page(struct page *page, bool cold) | |||
2515 | migratetype = MIGRATE_MOVABLE; | 2553 | migratetype = MIGRATE_MOVABLE; |
2516 | } | 2554 | } |
2517 | 2555 | ||
2518 | __count_vm_event(PGFREE); | ||
2519 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | 2556 | pcp = &this_cpu_ptr(zone->pageset)->pcp; |
2520 | if (!cold) | 2557 | if (!cold) |
2521 | list_add(&page->lru, &pcp->lists[migratetype]); | 2558 | list_add(&page->lru, &pcp->lists[migratetype]); |
@@ -2529,7 +2566,7 @@ void free_hot_cold_page(struct page *page, bool cold) | |||
2529 | } | 2566 | } |
2530 | 2567 | ||
2531 | out: | 2568 | out: |
2532 | preempt_enable(); | 2569 | local_irq_restore(flags); |
2533 | } | 2570 | } |
2534 | 2571 | ||
2535 | /* | 2572 | /* |
@@ -2614,7 +2651,7 @@ int __isolate_free_page(struct page *page, unsigned int order) | |||
2614 | for (; page < endpage; page += pageblock_nr_pages) { | 2651 | for (; page < endpage; page += pageblock_nr_pages) { |
2615 | int mt = get_pageblock_migratetype(page); | 2652 | int mt = get_pageblock_migratetype(page); |
2616 | if (!is_migrate_isolate(mt) && !is_migrate_cma(mt) | 2653 | if (!is_migrate_isolate(mt) && !is_migrate_cma(mt) |
2617 | && mt != MIGRATE_HIGHATOMIC) | 2654 | && !is_migrate_highatomic(mt)) |
2618 | set_pageblock_migratetype(page, | 2655 | set_pageblock_migratetype(page, |
2619 | MIGRATE_MOVABLE); | 2656 | MIGRATE_MOVABLE); |
2620 | } | 2657 | } |
@@ -2654,8 +2691,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | |||
2654 | { | 2691 | { |
2655 | struct page *page; | 2692 | struct page *page; |
2656 | 2693 | ||
2657 | VM_BUG_ON(in_interrupt()); | ||
2658 | |||
2659 | do { | 2694 | do { |
2660 | if (list_empty(list)) { | 2695 | if (list_empty(list)) { |
2661 | pcp->count += rmqueue_bulk(zone, 0, | 2696 | pcp->count += rmqueue_bulk(zone, 0, |
@@ -2686,8 +2721,9 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, | |||
2686 | struct list_head *list; | 2721 | struct list_head *list; |
2687 | bool cold = ((gfp_flags & __GFP_COLD) != 0); | 2722 | bool cold = ((gfp_flags & __GFP_COLD) != 0); |
2688 | struct page *page; | 2723 | struct page *page; |
2724 | unsigned long flags; | ||
2689 | 2725 | ||
2690 | preempt_disable(); | 2726 | local_irq_save(flags); |
2691 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | 2727 | pcp = &this_cpu_ptr(zone->pageset)->pcp; |
2692 | list = &pcp->lists[migratetype]; | 2728 | list = &pcp->lists[migratetype]; |
2693 | page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); | 2729 | page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); |
@@ -2695,7 +2731,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, | |||
2695 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | 2731 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); |
2696 | zone_statistics(preferred_zone, zone); | 2732 | zone_statistics(preferred_zone, zone); |
2697 | } | 2733 | } |
2698 | preempt_enable(); | 2734 | local_irq_restore(flags); |
2699 | return page; | 2735 | return page; |
2700 | } | 2736 | } |
2701 | 2737 | ||
@@ -2711,7 +2747,7 @@ struct page *rmqueue(struct zone *preferred_zone, | |||
2711 | unsigned long flags; | 2747 | unsigned long flags; |
2712 | struct page *page; | 2748 | struct page *page; |
2713 | 2749 | ||
2714 | if (likely(order == 0) && !in_interrupt()) { | 2750 | if (likely(order == 0)) { |
2715 | page = rmqueue_pcplist(preferred_zone, zone, order, | 2751 | page = rmqueue_pcplist(preferred_zone, zone, order, |
2716 | gfp_flags, migratetype); | 2752 | gfp_flags, migratetype); |
2717 | goto out; | 2753 | goto out; |
@@ -3113,8 +3149,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) | |||
3113 | static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, | 3149 | static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL, |
3114 | DEFAULT_RATELIMIT_BURST); | 3150 | DEFAULT_RATELIMIT_BURST); |
3115 | 3151 | ||
3116 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) || | 3152 | if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) |
3117 | debug_guardpage_minorder() > 0) | ||
3118 | return; | 3153 | return; |
3119 | 3154 | ||
3120 | pr_warn("%s: ", current->comm); | 3155 | pr_warn("%s: ", current->comm); |
@@ -3248,14 +3283,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
3248 | enum compact_priority prio, enum compact_result *compact_result) | 3283 | enum compact_priority prio, enum compact_result *compact_result) |
3249 | { | 3284 | { |
3250 | struct page *page; | 3285 | struct page *page; |
3286 | unsigned int noreclaim_flag; | ||
3251 | 3287 | ||
3252 | if (!order) | 3288 | if (!order) |
3253 | return NULL; | 3289 | return NULL; |
3254 | 3290 | ||
3255 | current->flags |= PF_MEMALLOC; | 3291 | noreclaim_flag = memalloc_noreclaim_save(); |
3256 | *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, | 3292 | *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, |
3257 | prio); | 3293 | prio); |
3258 | current->flags &= ~PF_MEMALLOC; | 3294 | memalloc_noreclaim_restore(noreclaim_flag); |
3259 | 3295 | ||
3260 | if (*compact_result <= COMPACT_INACTIVE) | 3296 | if (*compact_result <= COMPACT_INACTIVE) |
3261 | return NULL; | 3297 | return NULL; |
@@ -3402,12 +3438,13 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, | |||
3402 | { | 3438 | { |
3403 | struct reclaim_state reclaim_state; | 3439 | struct reclaim_state reclaim_state; |
3404 | int progress; | 3440 | int progress; |
3441 | unsigned int noreclaim_flag; | ||
3405 | 3442 | ||
3406 | cond_resched(); | 3443 | cond_resched(); |
3407 | 3444 | ||
3408 | /* We now go into synchronous reclaim */ | 3445 | /* We now go into synchronous reclaim */ |
3409 | cpuset_memory_pressure_bump(); | 3446 | cpuset_memory_pressure_bump(); |
3410 | current->flags |= PF_MEMALLOC; | 3447 | noreclaim_flag = memalloc_noreclaim_save(); |
3411 | lockdep_set_current_reclaim_state(gfp_mask); | 3448 | lockdep_set_current_reclaim_state(gfp_mask); |
3412 | reclaim_state.reclaimed_slab = 0; | 3449 | reclaim_state.reclaimed_slab = 0; |
3413 | current->reclaim_state = &reclaim_state; | 3450 | current->reclaim_state = &reclaim_state; |
@@ -3417,7 +3454,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, | |||
3417 | 3454 | ||
3418 | current->reclaim_state = NULL; | 3455 | current->reclaim_state = NULL; |
3419 | lockdep_clear_current_reclaim_state(); | 3456 | lockdep_clear_current_reclaim_state(); |
3420 | current->flags &= ~PF_MEMALLOC; | 3457 | memalloc_noreclaim_restore(noreclaim_flag); |
3421 | 3458 | ||
3422 | cond_resched(); | 3459 | cond_resched(); |
3423 | 3460 | ||
@@ -3525,19 +3562,12 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) | |||
3525 | } | 3562 | } |
3526 | 3563 | ||
3527 | /* | 3564 | /* |
3528 | * Maximum number of reclaim retries without any progress before OOM killer | ||
3529 | * is consider as the only way to move forward. | ||
3530 | */ | ||
3531 | #define MAX_RECLAIM_RETRIES 16 | ||
3532 | |||
3533 | /* | ||
3534 | * Checks whether it makes sense to retry the reclaim to make a forward progress | 3565 | * Checks whether it makes sense to retry the reclaim to make a forward progress |
3535 | * for the given allocation request. | 3566 | * for the given allocation request. |
3536 | * The reclaim feedback represented by did_some_progress (any progress during | 3567 | * |
3537 | * the last reclaim round) and no_progress_loops (number of reclaim rounds without | 3568 | * We give up when we either have tried MAX_RECLAIM_RETRIES in a row |
3538 | * any progress in a row) is considered as well as the reclaimable pages on the | 3569 | * without success, or when we couldn't even meet the watermark if we |
3539 | * applicable zone list (with a backoff mechanism which is a function of | 3570 | * reclaimed all remaining pages on the LRU lists. |
3540 | * no_progress_loops). | ||
3541 | * | 3571 | * |
3542 | * Returns true if a retry is viable or false to enter the oom path. | 3572 | * Returns true if a retry is viable or false to enter the oom path. |
3543 | */ | 3573 | */ |
@@ -3582,13 +3612,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order, | |||
3582 | bool wmark; | 3612 | bool wmark; |
3583 | 3613 | ||
3584 | available = reclaimable = zone_reclaimable_pages(zone); | 3614 | available = reclaimable = zone_reclaimable_pages(zone); |
3585 | available -= DIV_ROUND_UP((*no_progress_loops) * available, | ||
3586 | MAX_RECLAIM_RETRIES); | ||
3587 | available += zone_page_state_snapshot(zone, NR_FREE_PAGES); | 3615 | available += zone_page_state_snapshot(zone, NR_FREE_PAGES); |
3588 | 3616 | ||
3589 | /* | 3617 | /* |
3590 | * Would the allocation succeed if we reclaimed the whole | 3618 | * Would the allocation succeed if we reclaimed all |
3591 | * available? | 3619 | * reclaimable pages? |
3592 | */ | 3620 | */ |
3593 | wmark = __zone_watermark_ok(zone, order, min_wmark, | 3621 | wmark = __zone_watermark_ok(zone, order, min_wmark, |
3594 | ac_classzone_idx(ac), alloc_flags, available); | 3622 | ac_classzone_idx(ac), alloc_flags, available); |
@@ -3639,6 +3667,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
3639 | struct alloc_context *ac) | 3667 | struct alloc_context *ac) |
3640 | { | 3668 | { |
3641 | bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; | 3669 | bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; |
3670 | const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER; | ||
3642 | struct page *page = NULL; | 3671 | struct page *page = NULL; |
3643 | unsigned int alloc_flags; | 3672 | unsigned int alloc_flags; |
3644 | unsigned long did_some_progress; | 3673 | unsigned long did_some_progress; |
@@ -3706,12 +3735,17 @@ retry_cpuset: | |||
3706 | 3735 | ||
3707 | /* | 3736 | /* |
3708 | * For costly allocations, try direct compaction first, as it's likely | 3737 | * For costly allocations, try direct compaction first, as it's likely |
3709 | * that we have enough base pages and don't need to reclaim. Don't try | 3738 | * that we have enough base pages and don't need to reclaim. For non- |
3710 | * that for allocations that are allowed to ignore watermarks, as the | 3739 | * movable high-order allocations, do that as well, as compaction will |
3711 | * ALLOC_NO_WATERMARKS attempt didn't yet happen. | 3740 | * try prevent permanent fragmentation by migrating from blocks of the |
3741 | * same migratetype. | ||
3742 | * Don't try this for allocations that are allowed to ignore | ||
3743 | * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen. | ||
3712 | */ | 3744 | */ |
3713 | if (can_direct_reclaim && order > PAGE_ALLOC_COSTLY_ORDER && | 3745 | if (can_direct_reclaim && |
3714 | !gfp_pfmemalloc_allowed(gfp_mask)) { | 3746 | (costly_order || |
3747 | (order > 0 && ac->migratetype != MIGRATE_MOVABLE)) | ||
3748 | && !gfp_pfmemalloc_allowed(gfp_mask)) { | ||
3715 | page = __alloc_pages_direct_compact(gfp_mask, order, | 3749 | page = __alloc_pages_direct_compact(gfp_mask, order, |
3716 | alloc_flags, ac, | 3750 | alloc_flags, ac, |
3717 | INIT_COMPACT_PRIORITY, | 3751 | INIT_COMPACT_PRIORITY, |
@@ -3723,7 +3757,7 @@ retry_cpuset: | |||
3723 | * Checks for costly allocations with __GFP_NORETRY, which | 3757 | * Checks for costly allocations with __GFP_NORETRY, which |
3724 | * includes THP page fault allocations | 3758 | * includes THP page fault allocations |
3725 | */ | 3759 | */ |
3726 | if (gfp_mask & __GFP_NORETRY) { | 3760 | if (costly_order && (gfp_mask & __GFP_NORETRY)) { |
3727 | /* | 3761 | /* |
3728 | * If compaction is deferred for high-order allocations, | 3762 | * If compaction is deferred for high-order allocations, |
3729 | * it is because sync compaction recently failed. If | 3763 | * it is because sync compaction recently failed. If |
@@ -3774,7 +3808,7 @@ retry: | |||
3774 | 3808 | ||
3775 | /* Make sure we know about allocations which stall for too long */ | 3809 | /* Make sure we know about allocations which stall for too long */ |
3776 | if (time_after(jiffies, alloc_start + stall_timeout)) { | 3810 | if (time_after(jiffies, alloc_start + stall_timeout)) { |
3777 | warn_alloc(gfp_mask, ac->nodemask, | 3811 | warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask, |
3778 | "page allocation stalls for %ums, order:%u", | 3812 | "page allocation stalls for %ums, order:%u", |
3779 | jiffies_to_msecs(jiffies-alloc_start), order); | 3813 | jiffies_to_msecs(jiffies-alloc_start), order); |
3780 | stall_timeout += 10 * HZ; | 3814 | stall_timeout += 10 * HZ; |
@@ -3804,7 +3838,7 @@ retry: | |||
3804 | * Do not retry costly high order allocations unless they are | 3838 | * Do not retry costly high order allocations unless they are |
3805 | * __GFP_REPEAT | 3839 | * __GFP_REPEAT |
3806 | */ | 3840 | */ |
3807 | if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT)) | 3841 | if (costly_order && !(gfp_mask & __GFP_REPEAT)) |
3808 | goto nopage; | 3842 | goto nopage; |
3809 | 3843 | ||
3810 | if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, | 3844 | if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, |
@@ -3974,10 +4008,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
3974 | goto out; | 4008 | goto out; |
3975 | 4009 | ||
3976 | /* | 4010 | /* |
3977 | * Runtime PM, block IO and its error handling path can deadlock | 4011 | * Apply scoped allocation constraints. This is mainly about GFP_NOFS |
3978 | * because I/O on the device might not complete. | 4012 | * resp. GFP_NOIO which has to be inherited for all allocation requests |
4013 | * from a particular context which has been marked by | ||
4014 | * memalloc_no{fs,io}_{save,restore}. | ||
3979 | */ | 4015 | */ |
3980 | alloc_mask = memalloc_noio_flags(gfp_mask); | 4016 | alloc_mask = current_gfp_context(gfp_mask); |
3981 | ac.spread_dirty_pages = false; | 4017 | ac.spread_dirty_pages = false; |
3982 | 4018 | ||
3983 | /* | 4019 | /* |
@@ -4250,7 +4286,8 @@ EXPORT_SYMBOL(free_pages_exact); | |||
4250 | * nr_free_zone_pages() counts the number of counts pages which are beyond the | 4286 | * nr_free_zone_pages() counts the number of counts pages which are beyond the |
4251 | * high watermark within all zones at or below a given zone index. For each | 4287 | * high watermark within all zones at or below a given zone index. For each |
4252 | * zone, the number of pages is calculated as: | 4288 | * zone, the number of pages is calculated as: |
4253 | * managed_pages - high_pages | 4289 | * |
4290 | * nr_free_zone_pages = managed_pages - high_pages | ||
4254 | */ | 4291 | */ |
4255 | static unsigned long nr_free_zone_pages(int offset) | 4292 | static unsigned long nr_free_zone_pages(int offset) |
4256 | { | 4293 | { |
@@ -4512,7 +4549,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) | |||
4512 | #endif | 4549 | #endif |
4513 | " writeback_tmp:%lukB" | 4550 | " writeback_tmp:%lukB" |
4514 | " unstable:%lukB" | 4551 | " unstable:%lukB" |
4515 | " pages_scanned:%lu" | ||
4516 | " all_unreclaimable? %s" | 4552 | " all_unreclaimable? %s" |
4517 | "\n", | 4553 | "\n", |
4518 | pgdat->node_id, | 4554 | pgdat->node_id, |
@@ -4535,8 +4571,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) | |||
4535 | #endif | 4571 | #endif |
4536 | K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), | 4572 | K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), |
4537 | K(node_page_state(pgdat, NR_UNSTABLE_NFS)), | 4573 | K(node_page_state(pgdat, NR_UNSTABLE_NFS)), |
4538 | node_page_state(pgdat, NR_PAGES_SCANNED), | 4574 | pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? |
4539 | !pgdat_reclaimable(pgdat) ? "yes" : "no"); | 4575 | "yes" : "no"); |
4540 | } | 4576 | } |
4541 | 4577 | ||
4542 | for_each_populated_zone(zone) { | 4578 | for_each_populated_zone(zone) { |
@@ -7431,7 +7467,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
7431 | .zone = page_zone(pfn_to_page(start)), | 7467 | .zone = page_zone(pfn_to_page(start)), |
7432 | .mode = MIGRATE_SYNC, | 7468 | .mode = MIGRATE_SYNC, |
7433 | .ignore_skip_hint = true, | 7469 | .ignore_skip_hint = true, |
7434 | .gfp_mask = memalloc_noio_flags(gfp_mask), | 7470 | .gfp_mask = current_gfp_context(gfp_mask), |
7435 | }; | 7471 | }; |
7436 | INIT_LIST_HEAD(&cc.migratepages); | 7472 | INIT_LIST_HEAD(&cc.migratepages); |
7437 | 7473 | ||