diff options
Diffstat (limited to 'mm/page_alloc.c')
| -rw-r--r-- | mm/page_alloc.c | 404 |
1 files changed, 281 insertions, 123 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e95b5b7c9c3d..cde5dac6229a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | 16 | ||
| 17 | #include <linux/stddef.h> | 17 | #include <linux/stddef.h> |
| 18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
| 19 | #include <linux/highmem.h> | ||
| 19 | #include <linux/swap.h> | 20 | #include <linux/swap.h> |
| 20 | #include <linux/interrupt.h> | 21 | #include <linux/interrupt.h> |
| 21 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
| @@ -96,8 +97,12 @@ int _node_numa_mem_[MAX_NUMNODES]; | |||
| 96 | #endif | 97 | #endif |
| 97 | 98 | ||
| 98 | /* work_structs for global per-cpu drains */ | 99 | /* work_structs for global per-cpu drains */ |
| 100 | struct pcpu_drain { | ||
| 101 | struct zone *zone; | ||
| 102 | struct work_struct work; | ||
| 103 | }; | ||
| 99 | DEFINE_MUTEX(pcpu_drain_mutex); | 104 | DEFINE_MUTEX(pcpu_drain_mutex); |
| 100 | DEFINE_PER_CPU(struct work_struct, pcpu_drain); | 105 | DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain); |
| 101 | 106 | ||
| 102 | #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY | 107 | #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY |
| 103 | volatile unsigned long latent_entropy __latent_entropy; | 108 | volatile unsigned long latent_entropy __latent_entropy; |
| @@ -121,10 +126,8 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = { | |||
| 121 | }; | 126 | }; |
| 122 | EXPORT_SYMBOL(node_states); | 127 | EXPORT_SYMBOL(node_states); |
| 123 | 128 | ||
| 124 | /* Protect totalram_pages and zone->managed_pages */ | 129 | atomic_long_t _totalram_pages __read_mostly; |
| 125 | static DEFINE_SPINLOCK(managed_page_count_lock); | 130 | EXPORT_SYMBOL(_totalram_pages); |
| 126 | |||
| 127 | unsigned long totalram_pages __read_mostly; | ||
| 128 | unsigned long totalreserve_pages __read_mostly; | 131 | unsigned long totalreserve_pages __read_mostly; |
| 129 | unsigned long totalcma_pages __read_mostly; | 132 | unsigned long totalcma_pages __read_mostly; |
| 130 | 133 | ||
| @@ -237,7 +240,7 @@ static char * const zone_names[MAX_NR_ZONES] = { | |||
| 237 | #endif | 240 | #endif |
| 238 | }; | 241 | }; |
| 239 | 242 | ||
| 240 | char * const migratetype_names[MIGRATE_TYPES] = { | 243 | const char * const migratetype_names[MIGRATE_TYPES] = { |
| 241 | "Unmovable", | 244 | "Unmovable", |
| 242 | "Movable", | 245 | "Movable", |
| 243 | "Reclaimable", | 246 | "Reclaimable", |
| @@ -263,20 +266,21 @@ compound_page_dtor * const compound_page_dtors[] = { | |||
| 263 | 266 | ||
| 264 | int min_free_kbytes = 1024; | 267 | int min_free_kbytes = 1024; |
| 265 | int user_min_free_kbytes = -1; | 268 | int user_min_free_kbytes = -1; |
| 269 | int watermark_boost_factor __read_mostly = 15000; | ||
| 266 | int watermark_scale_factor = 10; | 270 | int watermark_scale_factor = 10; |
| 267 | 271 | ||
| 268 | static unsigned long nr_kernel_pages __meminitdata; | 272 | static unsigned long nr_kernel_pages __initdata; |
| 269 | static unsigned long nr_all_pages __meminitdata; | 273 | static unsigned long nr_all_pages __initdata; |
| 270 | static unsigned long dma_reserve __meminitdata; | 274 | static unsigned long dma_reserve __initdata; |
| 271 | 275 | ||
| 272 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 276 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
| 273 | static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __meminitdata; | 277 | static unsigned long arch_zone_lowest_possible_pfn[MAX_NR_ZONES] __initdata; |
| 274 | static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __meminitdata; | 278 | static unsigned long arch_zone_highest_possible_pfn[MAX_NR_ZONES] __initdata; |
| 275 | static unsigned long required_kernelcore __initdata; | 279 | static unsigned long required_kernelcore __initdata; |
| 276 | static unsigned long required_kernelcore_percent __initdata; | 280 | static unsigned long required_kernelcore_percent __initdata; |
| 277 | static unsigned long required_movablecore __initdata; | 281 | static unsigned long required_movablecore __initdata; |
| 278 | static unsigned long required_movablecore_percent __initdata; | 282 | static unsigned long required_movablecore_percent __initdata; |
| 279 | static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata; | 283 | static unsigned long zone_movable_pfn[MAX_NUMNODES] __initdata; |
| 280 | static bool mirrored_kernelcore __meminitdata; | 284 | static bool mirrored_kernelcore __meminitdata; |
| 281 | 285 | ||
| 282 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ | 286 | /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ |
| @@ -294,6 +298,32 @@ EXPORT_SYMBOL(nr_online_nodes); | |||
| 294 | int page_group_by_mobility_disabled __read_mostly; | 298 | int page_group_by_mobility_disabled __read_mostly; |
| 295 | 299 | ||
| 296 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 300 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
| 301 | /* | ||
| 302 | * During boot we initialize deferred pages on-demand, as needed, but once | ||
| 303 | * page_alloc_init_late() has finished, the deferred pages are all initialized, | ||
| 304 | * and we can permanently disable that path. | ||
| 305 | */ | ||
| 306 | static DEFINE_STATIC_KEY_TRUE(deferred_pages); | ||
| 307 | |||
| 308 | /* | ||
| 309 | * Calling kasan_free_pages() only after deferred memory initialization | ||
| 310 | * has completed. Poisoning pages during deferred memory init will greatly | ||
| 311 | * lengthen the process and cause problem in large memory systems as the | ||
| 312 | * deferred pages initialization is done with interrupt disabled. | ||
| 313 | * | ||
| 314 | * Assuming that there will be no reference to those newly initialized | ||
| 315 | * pages before they are ever allocated, this should have no effect on | ||
| 316 | * KASAN memory tracking as the poison will be properly inserted at page | ||
| 317 | * allocation time. The only corner case is when pages are allocated by | ||
| 318 | * on-demand allocation and then freed again before the deferred pages | ||
| 319 | * initialization is done, but this is not likely to happen. | ||
| 320 | */ | ||
| 321 | static inline void kasan_free_nondeferred_pages(struct page *page, int order) | ||
| 322 | { | ||
| 323 | if (!static_branch_unlikely(&deferred_pages)) | ||
| 324 | kasan_free_pages(page, order); | ||
| 325 | } | ||
| 326 | |||
| 297 | /* Returns true if the struct page for the pfn is uninitialised */ | 327 | /* Returns true if the struct page for the pfn is uninitialised */ |
| 298 | static inline bool __meminit early_page_uninitialised(unsigned long pfn) | 328 | static inline bool __meminit early_page_uninitialised(unsigned long pfn) |
| 299 | { | 329 | { |
| @@ -326,8 +356,13 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) | |||
| 326 | /* Always populate low zones for address-constrained allocations */ | 356 | /* Always populate low zones for address-constrained allocations */ |
| 327 | if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) | 357 | if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) |
| 328 | return false; | 358 | return false; |
| 359 | |||
| 360 | /* | ||
| 361 | * We start only with one section of pages, more pages are added as | ||
| 362 | * needed until the rest of deferred pages are initialized. | ||
| 363 | */ | ||
| 329 | nr_initialised++; | 364 | nr_initialised++; |
| 330 | if ((nr_initialised > NODE_DATA(nid)->static_init_pgcnt) && | 365 | if ((nr_initialised > PAGES_PER_SECTION) && |
| 331 | (pfn & (PAGES_PER_SECTION - 1)) == 0) { | 366 | (pfn & (PAGES_PER_SECTION - 1)) == 0) { |
| 332 | NODE_DATA(nid)->first_deferred_pfn = pfn; | 367 | NODE_DATA(nid)->first_deferred_pfn = pfn; |
| 333 | return true; | 368 | return true; |
| @@ -335,6 +370,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) | |||
| 335 | return false; | 370 | return false; |
| 336 | } | 371 | } |
| 337 | #else | 372 | #else |
| 373 | #define kasan_free_nondeferred_pages(p, o) kasan_free_pages(p, o) | ||
| 374 | |||
| 338 | static inline bool early_page_uninitialised(unsigned long pfn) | 375 | static inline bool early_page_uninitialised(unsigned long pfn) |
| 339 | { | 376 | { |
| 340 | return false; | 377 | return false; |
| @@ -426,6 +463,7 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags, | |||
| 426 | unsigned long old_word, word; | 463 | unsigned long old_word, word; |
| 427 | 464 | ||
| 428 | BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4); | 465 | BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4); |
| 466 | BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits)); | ||
| 429 | 467 | ||
| 430 | bitmap = get_pageblock_bitmap(page, pfn); | 468 | bitmap = get_pageblock_bitmap(page, pfn); |
| 431 | bitidx = pfn_to_bitidx(page, pfn); | 469 | bitidx = pfn_to_bitidx(page, pfn); |
| @@ -1037,7 +1075,7 @@ static __always_inline bool free_pages_prepare(struct page *page, | |||
| 1037 | arch_free_page(page, order); | 1075 | arch_free_page(page, order); |
| 1038 | kernel_poison_pages(page, 1 << order, 0); | 1076 | kernel_poison_pages(page, 1 << order, 0); |
| 1039 | kernel_map_pages(page, 1 << order, 0); | 1077 | kernel_map_pages(page, 1 << order, 0); |
| 1040 | kasan_free_pages(page, order); | 1078 | kasan_free_nondeferred_pages(page, order); |
| 1041 | 1079 | ||
| 1042 | return true; | 1080 | return true; |
| 1043 | } | 1081 | } |
| @@ -1183,6 +1221,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, | |||
| 1183 | init_page_count(page); | 1221 | init_page_count(page); |
| 1184 | page_mapcount_reset(page); | 1222 | page_mapcount_reset(page); |
| 1185 | page_cpupid_reset_last(page); | 1223 | page_cpupid_reset_last(page); |
| 1224 | page_kasan_tag_reset(page); | ||
| 1186 | 1225 | ||
| 1187 | INIT_LIST_HEAD(&page->lru); | 1226 | INIT_LIST_HEAD(&page->lru); |
| 1188 | #ifdef WANT_PAGE_VIRTUAL | 1227 | #ifdef WANT_PAGE_VIRTUAL |
| @@ -1279,7 +1318,7 @@ static void __init __free_pages_boot_core(struct page *page, unsigned int order) | |||
| 1279 | __ClearPageReserved(p); | 1318 | __ClearPageReserved(p); |
| 1280 | set_page_count(p, 0); | 1319 | set_page_count(p, 0); |
| 1281 | 1320 | ||
| 1282 | page_zone(page)->managed_pages += nr_pages; | 1321 | atomic_long_add(nr_pages, &page_zone(page)->managed_pages); |
| 1283 | set_page_refcounted(page); | 1322 | set_page_refcounted(page); |
| 1284 | __free_pages(page, order); | 1323 | __free_pages(page, order); |
| 1285 | } | 1324 | } |
| @@ -1606,13 +1645,6 @@ static int __init deferred_init_memmap(void *data) | |||
| 1606 | } | 1645 | } |
| 1607 | 1646 | ||
| 1608 | /* | 1647 | /* |
| 1609 | * During boot we initialize deferred pages on-demand, as needed, but once | ||
| 1610 | * page_alloc_init_late() has finished, the deferred pages are all initialized, | ||
| 1611 | * and we can permanently disable that path. | ||
| 1612 | */ | ||
| 1613 | static DEFINE_STATIC_KEY_TRUE(deferred_pages); | ||
| 1614 | |||
| 1615 | /* | ||
| 1616 | * If this zone has deferred pages, try to grow it by initializing enough | 1648 | * If this zone has deferred pages, try to grow it by initializing enough |
| 1617 | * deferred pages to satisfy the allocation specified by order, rounded up to | 1649 | * deferred pages to satisfy the allocation specified by order, rounded up to |
| 1618 | * the nearest PAGES_PER_SECTION boundary. So we're adding memory in increments | 1650 | * the nearest PAGES_PER_SECTION boundary. So we're adding memory in increments |
| @@ -1981,8 +2013,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, | |||
| 1981 | */ | 2013 | */ |
| 1982 | static int fallbacks[MIGRATE_TYPES][4] = { | 2014 | static int fallbacks[MIGRATE_TYPES][4] = { |
| 1983 | [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, | 2015 | [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, |
| 1984 | [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, | ||
| 1985 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES }, | 2016 | [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES }, |
| 2017 | [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, | ||
| 1986 | #ifdef CONFIG_CMA | 2018 | #ifdef CONFIG_CMA |
| 1987 | [MIGRATE_CMA] = { MIGRATE_TYPES }, /* Never used */ | 2019 | [MIGRATE_CMA] = { MIGRATE_TYPES }, /* Never used */ |
| 1988 | #endif | 2020 | #endif |
| @@ -2129,6 +2161,21 @@ static bool can_steal_fallback(unsigned int order, int start_mt) | |||
| 2129 | return false; | 2161 | return false; |
| 2130 | } | 2162 | } |
| 2131 | 2163 | ||
| 2164 | static inline void boost_watermark(struct zone *zone) | ||
| 2165 | { | ||
| 2166 | unsigned long max_boost; | ||
| 2167 | |||
| 2168 | if (!watermark_boost_factor) | ||
| 2169 | return; | ||
| 2170 | |||
| 2171 | max_boost = mult_frac(zone->_watermark[WMARK_HIGH], | ||
| 2172 | watermark_boost_factor, 10000); | ||
| 2173 | max_boost = max(pageblock_nr_pages, max_boost); | ||
| 2174 | |||
| 2175 | zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, | ||
| 2176 | max_boost); | ||
| 2177 | } | ||
| 2178 | |||
| 2132 | /* | 2179 | /* |
| 2133 | * This function implements actual steal behaviour. If order is large enough, | 2180 | * This function implements actual steal behaviour. If order is large enough, |
| 2134 | * we can steal whole pageblock. If not, we first move freepages in this | 2181 | * we can steal whole pageblock. If not, we first move freepages in this |
| @@ -2138,7 +2185,7 @@ static bool can_steal_fallback(unsigned int order, int start_mt) | |||
| 2138 | * itself, so pages freed in the future will be put on the correct free list. | 2185 | * itself, so pages freed in the future will be put on the correct free list. |
| 2139 | */ | 2186 | */ |
| 2140 | static void steal_suitable_fallback(struct zone *zone, struct page *page, | 2187 | static void steal_suitable_fallback(struct zone *zone, struct page *page, |
| 2141 | int start_type, bool whole_block) | 2188 | unsigned int alloc_flags, int start_type, bool whole_block) |
| 2142 | { | 2189 | { |
| 2143 | unsigned int current_order = page_order(page); | 2190 | unsigned int current_order = page_order(page); |
| 2144 | struct free_area *area; | 2191 | struct free_area *area; |
| @@ -2160,6 +2207,15 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, | |||
| 2160 | goto single_page; | 2207 | goto single_page; |
| 2161 | } | 2208 | } |
| 2162 | 2209 | ||
| 2210 | /* | ||
| 2211 | * Boost watermarks to increase reclaim pressure to reduce the | ||
| 2212 | * likelihood of future fallbacks. Wake kswapd now as the node | ||
| 2213 | * may be balanced overall and kswapd will not wake naturally. | ||
| 2214 | */ | ||
| 2215 | boost_watermark(zone); | ||
| 2216 | if (alloc_flags & ALLOC_KSWAPD) | ||
| 2217 | wakeup_kswapd(zone, 0, 0, zone_idx(zone)); | ||
| 2218 | |||
| 2163 | /* We are not allowed to try stealing from the whole block */ | 2219 | /* We are not allowed to try stealing from the whole block */ |
| 2164 | if (!whole_block) | 2220 | if (!whole_block) |
| 2165 | goto single_page; | 2221 | goto single_page; |
| @@ -2258,7 +2314,7 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, | |||
| 2258 | * Limit the number reserved to 1 pageblock or roughly 1% of a zone. | 2314 | * Limit the number reserved to 1 pageblock or roughly 1% of a zone. |
| 2259 | * Check is race-prone but harmless. | 2315 | * Check is race-prone but harmless. |
| 2260 | */ | 2316 | */ |
| 2261 | max_managed = (zone->managed_pages / 100) + pageblock_nr_pages; | 2317 | max_managed = (zone_managed_pages(zone) / 100) + pageblock_nr_pages; |
| 2262 | if (zone->nr_reserved_highatomic >= max_managed) | 2318 | if (zone->nr_reserved_highatomic >= max_managed) |
| 2263 | return; | 2319 | return; |
| 2264 | 2320 | ||
| @@ -2375,20 +2431,30 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, | |||
| 2375 | * condition simpler. | 2431 | * condition simpler. |
| 2376 | */ | 2432 | */ |
| 2377 | static __always_inline bool | 2433 | static __always_inline bool |
| 2378 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | 2434 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, |
| 2435 | unsigned int alloc_flags) | ||
| 2379 | { | 2436 | { |
| 2380 | struct free_area *area; | 2437 | struct free_area *area; |
| 2381 | int current_order; | 2438 | int current_order; |
| 2439 | int min_order = order; | ||
| 2382 | struct page *page; | 2440 | struct page *page; |
| 2383 | int fallback_mt; | 2441 | int fallback_mt; |
| 2384 | bool can_steal; | 2442 | bool can_steal; |
| 2385 | 2443 | ||
| 2386 | /* | 2444 | /* |
| 2445 | * Do not steal pages from freelists belonging to other pageblocks | ||
| 2446 | * i.e. orders < pageblock_order. If there are no local zones free, | ||
| 2447 | * the zonelists will be reiterated without ALLOC_NOFRAGMENT. | ||
| 2448 | */ | ||
| 2449 | if (alloc_flags & ALLOC_NOFRAGMENT) | ||
| 2450 | min_order = pageblock_order; | ||
| 2451 | |||
| 2452 | /* | ||
| 2387 | * Find the largest available free page in the other list. This roughly | 2453 | * Find the largest available free page in the other list. This roughly |
| 2388 | * approximates finding the pageblock with the most free pages, which | 2454 | * approximates finding the pageblock with the most free pages, which |
| 2389 | * would be too costly to do exactly. | 2455 | * would be too costly to do exactly. |
| 2390 | */ | 2456 | */ |
| 2391 | for (current_order = MAX_ORDER - 1; current_order >= order; | 2457 | for (current_order = MAX_ORDER - 1; current_order >= min_order; |
| 2392 | --current_order) { | 2458 | --current_order) { |
| 2393 | area = &(zone->free_area[current_order]); | 2459 | area = &(zone->free_area[current_order]); |
| 2394 | fallback_mt = find_suitable_fallback(area, current_order, | 2460 | fallback_mt = find_suitable_fallback(area, current_order, |
| @@ -2433,7 +2499,8 @@ do_steal: | |||
| 2433 | page = list_first_entry(&area->free_list[fallback_mt], | 2499 | page = list_first_entry(&area->free_list[fallback_mt], |
| 2434 | struct page, lru); | 2500 | struct page, lru); |
| 2435 | 2501 | ||
| 2436 | steal_suitable_fallback(zone, page, start_migratetype, can_steal); | 2502 | steal_suitable_fallback(zone, page, alloc_flags, start_migratetype, |
| 2503 | can_steal); | ||
| 2437 | 2504 | ||
| 2438 | trace_mm_page_alloc_extfrag(page, order, current_order, | 2505 | trace_mm_page_alloc_extfrag(page, order, current_order, |
| 2439 | start_migratetype, fallback_mt); | 2506 | start_migratetype, fallback_mt); |
| @@ -2447,7 +2514,8 @@ do_steal: | |||
| 2447 | * Call me with the zone->lock already held. | 2514 | * Call me with the zone->lock already held. |
| 2448 | */ | 2515 | */ |
| 2449 | static __always_inline struct page * | 2516 | static __always_inline struct page * |
| 2450 | __rmqueue(struct zone *zone, unsigned int order, int migratetype) | 2517 | __rmqueue(struct zone *zone, unsigned int order, int migratetype, |
| 2518 | unsigned int alloc_flags) | ||
| 2451 | { | 2519 | { |
| 2452 | struct page *page; | 2520 | struct page *page; |
| 2453 | 2521 | ||
| @@ -2457,7 +2525,8 @@ retry: | |||
| 2457 | if (migratetype == MIGRATE_MOVABLE) | 2525 | if (migratetype == MIGRATE_MOVABLE) |
| 2458 | page = __rmqueue_cma_fallback(zone, order); | 2526 | page = __rmqueue_cma_fallback(zone, order); |
| 2459 | 2527 | ||
| 2460 | if (!page && __rmqueue_fallback(zone, order, migratetype)) | 2528 | if (!page && __rmqueue_fallback(zone, order, migratetype, |
| 2529 | alloc_flags)) | ||
| 2461 | goto retry; | 2530 | goto retry; |
| 2462 | } | 2531 | } |
| 2463 | 2532 | ||
| @@ -2472,13 +2541,14 @@ retry: | |||
| 2472 | */ | 2541 | */ |
| 2473 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 2542 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
| 2474 | unsigned long count, struct list_head *list, | 2543 | unsigned long count, struct list_head *list, |
| 2475 | int migratetype) | 2544 | int migratetype, unsigned int alloc_flags) |
| 2476 | { | 2545 | { |
| 2477 | int i, alloced = 0; | 2546 | int i, alloced = 0; |
| 2478 | 2547 | ||
| 2479 | spin_lock(&zone->lock); | 2548 | spin_lock(&zone->lock); |
| 2480 | for (i = 0; i < count; ++i) { | 2549 | for (i = 0; i < count; ++i) { |
| 2481 | struct page *page = __rmqueue(zone, order, migratetype); | 2550 | struct page *page = __rmqueue(zone, order, migratetype, |
| 2551 | alloc_flags); | ||
| 2482 | if (unlikely(page == NULL)) | 2552 | if (unlikely(page == NULL)) |
| 2483 | break; | 2553 | break; |
| 2484 | 2554 | ||
| @@ -2592,6 +2662,10 @@ void drain_local_pages(struct zone *zone) | |||
| 2592 | 2662 | ||
| 2593 | static void drain_local_pages_wq(struct work_struct *work) | 2663 | static void drain_local_pages_wq(struct work_struct *work) |
| 2594 | { | 2664 | { |
| 2665 | struct pcpu_drain *drain; | ||
| 2666 | |||
| 2667 | drain = container_of(work, struct pcpu_drain, work); | ||
| 2668 | |||
| 2595 | /* | 2669 | /* |
| 2596 | * drain_all_pages doesn't use proper cpu hotplug protection so | 2670 | * drain_all_pages doesn't use proper cpu hotplug protection so |
| 2597 | * we can race with cpu offline when the WQ can move this from | 2671 | * we can race with cpu offline when the WQ can move this from |
| @@ -2600,7 +2674,7 @@ static void drain_local_pages_wq(struct work_struct *work) | |||
| 2600 | * a different one. | 2674 | * a different one. |
| 2601 | */ | 2675 | */ |
| 2602 | preempt_disable(); | 2676 | preempt_disable(); |
| 2603 | drain_local_pages(NULL); | 2677 | drain_local_pages(drain->zone); |
| 2604 | preempt_enable(); | 2678 | preempt_enable(); |
| 2605 | } | 2679 | } |
| 2606 | 2680 | ||
| @@ -2671,12 +2745,14 @@ void drain_all_pages(struct zone *zone) | |||
| 2671 | } | 2745 | } |
| 2672 | 2746 | ||
| 2673 | for_each_cpu(cpu, &cpus_with_pcps) { | 2747 | for_each_cpu(cpu, &cpus_with_pcps) { |
| 2674 | struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu); | 2748 | struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu); |
| 2675 | INIT_WORK(work, drain_local_pages_wq); | 2749 | |
| 2676 | queue_work_on(cpu, mm_percpu_wq, work); | 2750 | drain->zone = zone; |
| 2751 | INIT_WORK(&drain->work, drain_local_pages_wq); | ||
| 2752 | queue_work_on(cpu, mm_percpu_wq, &drain->work); | ||
| 2677 | } | 2753 | } |
| 2678 | for_each_cpu(cpu, &cpus_with_pcps) | 2754 | for_each_cpu(cpu, &cpus_with_pcps) |
| 2679 | flush_work(per_cpu_ptr(&pcpu_drain, cpu)); | 2755 | flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work); |
| 2680 | 2756 | ||
| 2681 | mutex_unlock(&pcpu_drain_mutex); | 2757 | mutex_unlock(&pcpu_drain_mutex); |
| 2682 | } | 2758 | } |
| @@ -2934,6 +3010,7 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) | |||
| 2934 | 3010 | ||
| 2935 | /* Remove page from the per-cpu list, caller must protect the list */ | 3011 | /* Remove page from the per-cpu list, caller must protect the list */ |
| 2936 | static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | 3012 | static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, |
| 3013 | unsigned int alloc_flags, | ||
| 2937 | struct per_cpu_pages *pcp, | 3014 | struct per_cpu_pages *pcp, |
| 2938 | struct list_head *list) | 3015 | struct list_head *list) |
| 2939 | { | 3016 | { |
| @@ -2943,7 +3020,7 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | |||
| 2943 | if (list_empty(list)) { | 3020 | if (list_empty(list)) { |
| 2944 | pcp->count += rmqueue_bulk(zone, 0, | 3021 | pcp->count += rmqueue_bulk(zone, 0, |
| 2945 | pcp->batch, list, | 3022 | pcp->batch, list, |
| 2946 | migratetype); | 3023 | migratetype, alloc_flags); |
| 2947 | if (unlikely(list_empty(list))) | 3024 | if (unlikely(list_empty(list))) |
| 2948 | return NULL; | 3025 | return NULL; |
| 2949 | } | 3026 | } |
| @@ -2959,7 +3036,8 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | |||
| 2959 | /* Lock and remove page from the per-cpu list */ | 3036 | /* Lock and remove page from the per-cpu list */ |
| 2960 | static struct page *rmqueue_pcplist(struct zone *preferred_zone, | 3037 | static struct page *rmqueue_pcplist(struct zone *preferred_zone, |
| 2961 | struct zone *zone, unsigned int order, | 3038 | struct zone *zone, unsigned int order, |
| 2962 | gfp_t gfp_flags, int migratetype) | 3039 | gfp_t gfp_flags, int migratetype, |
| 3040 | unsigned int alloc_flags) | ||
| 2963 | { | 3041 | { |
| 2964 | struct per_cpu_pages *pcp; | 3042 | struct per_cpu_pages *pcp; |
| 2965 | struct list_head *list; | 3043 | struct list_head *list; |
| @@ -2969,7 +3047,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, | |||
| 2969 | local_irq_save(flags); | 3047 | local_irq_save(flags); |
| 2970 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | 3048 | pcp = &this_cpu_ptr(zone->pageset)->pcp; |
| 2971 | list = &pcp->lists[migratetype]; | 3049 | list = &pcp->lists[migratetype]; |
| 2972 | page = __rmqueue_pcplist(zone, migratetype, pcp, list); | 3050 | page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); |
| 2973 | if (page) { | 3051 | if (page) { |
| 2974 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | 3052 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); |
| 2975 | zone_statistics(preferred_zone, zone); | 3053 | zone_statistics(preferred_zone, zone); |
| @@ -2992,7 +3070,7 @@ struct page *rmqueue(struct zone *preferred_zone, | |||
| 2992 | 3070 | ||
| 2993 | if (likely(order == 0)) { | 3071 | if (likely(order == 0)) { |
| 2994 | page = rmqueue_pcplist(preferred_zone, zone, order, | 3072 | page = rmqueue_pcplist(preferred_zone, zone, order, |
| 2995 | gfp_flags, migratetype); | 3073 | gfp_flags, migratetype, alloc_flags); |
| 2996 | goto out; | 3074 | goto out; |
| 2997 | } | 3075 | } |
| 2998 | 3076 | ||
| @@ -3011,7 +3089,7 @@ struct page *rmqueue(struct zone *preferred_zone, | |||
| 3011 | trace_mm_page_alloc_zone_locked(page, order, migratetype); | 3089 | trace_mm_page_alloc_zone_locked(page, order, migratetype); |
| 3012 | } | 3090 | } |
| 3013 | if (!page) | 3091 | if (!page) |
| 3014 | page = __rmqueue(zone, order, migratetype); | 3092 | page = __rmqueue(zone, order, migratetype, alloc_flags); |
| 3015 | } while (page && check_new_pages(page, order)); | 3093 | } while (page && check_new_pages(page, order)); |
| 3016 | spin_unlock(&zone->lock); | 3094 | spin_unlock(&zone->lock); |
| 3017 | if (!page) | 3095 | if (!page) |
| @@ -3053,7 +3131,7 @@ static int __init setup_fail_page_alloc(char *str) | |||
| 3053 | } | 3131 | } |
| 3054 | __setup("fail_page_alloc=", setup_fail_page_alloc); | 3132 | __setup("fail_page_alloc=", setup_fail_page_alloc); |
| 3055 | 3133 | ||
| 3056 | static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) | 3134 | static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) |
| 3057 | { | 3135 | { |
| 3058 | if (order < fail_page_alloc.min_order) | 3136 | if (order < fail_page_alloc.min_order) |
| 3059 | return false; | 3137 | return false; |
| @@ -3103,13 +3181,19 @@ late_initcall(fail_page_alloc_debugfs); | |||
| 3103 | 3181 | ||
| 3104 | #else /* CONFIG_FAIL_PAGE_ALLOC */ | 3182 | #else /* CONFIG_FAIL_PAGE_ALLOC */ |
| 3105 | 3183 | ||
| 3106 | static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) | 3184 | static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) |
| 3107 | { | 3185 | { |
| 3108 | return false; | 3186 | return false; |
| 3109 | } | 3187 | } |
| 3110 | 3188 | ||
| 3111 | #endif /* CONFIG_FAIL_PAGE_ALLOC */ | 3189 | #endif /* CONFIG_FAIL_PAGE_ALLOC */ |
| 3112 | 3190 | ||
| 3191 | static noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) | ||
| 3192 | { | ||
| 3193 | return __should_fail_alloc_page(gfp_mask, order); | ||
| 3194 | } | ||
| 3195 | ALLOW_ERROR_INJECTION(should_fail_alloc_page, TRUE); | ||
| 3196 | |||
| 3113 | /* | 3197 | /* |
| 3114 | * Return true if free base pages are above 'mark'. For high-order checks it | 3198 | * Return true if free base pages are above 'mark'. For high-order checks it |
| 3115 | * will return true of the order-0 watermark is reached and there is at least | 3199 | * will return true of the order-0 watermark is reached and there is at least |
| @@ -3254,6 +3338,40 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) | |||
| 3254 | #endif /* CONFIG_NUMA */ | 3338 | #endif /* CONFIG_NUMA */ |
| 3255 | 3339 | ||
| 3256 | /* | 3340 | /* |
| 3341 | * The restriction on ZONE_DMA32 as being a suitable zone to use to avoid | ||
| 3342 | * fragmentation is subtle. If the preferred zone was HIGHMEM then | ||
| 3343 | * premature use of a lower zone may cause lowmem pressure problems that | ||
| 3344 | * are worse than fragmentation. If the next zone is ZONE_DMA then it is | ||
| 3345 | * probably too small. It only makes sense to spread allocations to avoid | ||
| 3346 | * fragmentation between the Normal and DMA32 zones. | ||
| 3347 | */ | ||
| 3348 | static inline unsigned int | ||
| 3349 | alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) | ||
| 3350 | { | ||
| 3351 | unsigned int alloc_flags = 0; | ||
| 3352 | |||
| 3353 | if (gfp_mask & __GFP_KSWAPD_RECLAIM) | ||
| 3354 | alloc_flags |= ALLOC_KSWAPD; | ||
| 3355 | |||
| 3356 | #ifdef CONFIG_ZONE_DMA32 | ||
| 3357 | if (zone_idx(zone) != ZONE_NORMAL) | ||
| 3358 | goto out; | ||
| 3359 | |||
| 3360 | /* | ||
| 3361 | * If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and | ||
| 3362 | * the pointer is within zone->zone_pgdat->node_zones[]. Also assume | ||
| 3363 | * on UMA that if Normal is populated then so is DMA32. | ||
| 3364 | */ | ||
| 3365 | BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1); | ||
| 3366 | if (nr_online_nodes > 1 && !populated_zone(--zone)) | ||
| 3367 | goto out; | ||
| 3368 | |||
| 3369 | out: | ||
| 3370 | #endif /* CONFIG_ZONE_DMA32 */ | ||
| 3371 | return alloc_flags; | ||
| 3372 | } | ||
| 3373 | |||
| 3374 | /* | ||
| 3257 | * get_page_from_freelist goes through the zonelist trying to allocate | 3375 | * get_page_from_freelist goes through the zonelist trying to allocate |
| 3258 | * a page. | 3376 | * a page. |
| 3259 | */ | 3377 | */ |
| @@ -3261,14 +3379,18 @@ static struct page * | |||
| 3261 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, | 3379 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, |
| 3262 | const struct alloc_context *ac) | 3380 | const struct alloc_context *ac) |
| 3263 | { | 3381 | { |
| 3264 | struct zoneref *z = ac->preferred_zoneref; | 3382 | struct zoneref *z; |
| 3265 | struct zone *zone; | 3383 | struct zone *zone; |
| 3266 | struct pglist_data *last_pgdat_dirty_limit = NULL; | 3384 | struct pglist_data *last_pgdat_dirty_limit = NULL; |
| 3385 | bool no_fallback; | ||
| 3267 | 3386 | ||
| 3387 | retry: | ||
| 3268 | /* | 3388 | /* |
| 3269 | * Scan zonelist, looking for a zone with enough free. | 3389 | * Scan zonelist, looking for a zone with enough free. |
| 3270 | * See also __cpuset_node_allowed() comment in kernel/cpuset.c. | 3390 | * See also __cpuset_node_allowed() comment in kernel/cpuset.c. |
| 3271 | */ | 3391 | */ |
| 3392 | no_fallback = alloc_flags & ALLOC_NOFRAGMENT; | ||
| 3393 | z = ac->preferred_zoneref; | ||
| 3272 | for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, | 3394 | for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, |
| 3273 | ac->nodemask) { | 3395 | ac->nodemask) { |
| 3274 | struct page *page; | 3396 | struct page *page; |
| @@ -3307,7 +3429,23 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, | |||
| 3307 | } | 3429 | } |
| 3308 | } | 3430 | } |
| 3309 | 3431 | ||
| 3310 | mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; | 3432 | if (no_fallback && nr_online_nodes > 1 && |
| 3433 | zone != ac->preferred_zoneref->zone) { | ||
| 3434 | int local_nid; | ||
| 3435 | |||
| 3436 | /* | ||
| 3437 | * If moving to a remote node, retry but allow | ||
| 3438 | * fragmenting fallbacks. Locality is more important | ||
| 3439 | * than fragmentation avoidance. | ||
| 3440 | */ | ||
| 3441 | local_nid = zone_to_nid(ac->preferred_zoneref->zone); | ||
| 3442 | if (zone_to_nid(zone) != local_nid) { | ||
| 3443 | alloc_flags &= ~ALLOC_NOFRAGMENT; | ||
| 3444 | goto retry; | ||
| 3445 | } | ||
| 3446 | } | ||
| 3447 | |||
| 3448 | mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK); | ||
| 3311 | if (!zone_watermark_fast(zone, order, mark, | 3449 | if (!zone_watermark_fast(zone, order, mark, |
| 3312 | ac_classzone_idx(ac), alloc_flags)) { | 3450 | ac_classzone_idx(ac), alloc_flags)) { |
| 3313 | int ret; | 3451 | int ret; |
| @@ -3374,6 +3512,15 @@ try_this_zone: | |||
| 3374 | } | 3512 | } |
| 3375 | } | 3513 | } |
| 3376 | 3514 | ||
| 3515 | /* | ||
| 3516 | * It's possible on a UMA machine to get through all zones that are | ||
| 3517 | * fragmented. If avoiding fragmentation, reset and try again. | ||
| 3518 | */ | ||
| 3519 | if (no_fallback) { | ||
| 3520 | alloc_flags &= ~ALLOC_NOFRAGMENT; | ||
| 3521 | goto retry; | ||
| 3522 | } | ||
| 3523 | |||
| 3377 | return NULL; | 3524 | return NULL; |
| 3378 | } | 3525 | } |
| 3379 | 3526 | ||
| @@ -3413,13 +3560,13 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) | |||
| 3413 | va_start(args, fmt); | 3560 | va_start(args, fmt); |
| 3414 | vaf.fmt = fmt; | 3561 | vaf.fmt = fmt; |
| 3415 | vaf.va = &args; | 3562 | vaf.va = &args; |
| 3416 | pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl\n", | 3563 | pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl", |
| 3417 | current->comm, &vaf, gfp_mask, &gfp_mask, | 3564 | current->comm, &vaf, gfp_mask, &gfp_mask, |
| 3418 | nodemask_pr_args(nodemask)); | 3565 | nodemask_pr_args(nodemask)); |
| 3419 | va_end(args); | 3566 | va_end(args); |
| 3420 | 3567 | ||
| 3421 | cpuset_print_current_mems_allowed(); | 3568 | cpuset_print_current_mems_allowed(); |
| 3422 | 3569 | pr_cont("\n"); | |
| 3423 | dump_stack(); | 3570 | dump_stack(); |
| 3424 | warn_alloc_show_mem(gfp_mask, nodemask); | 3571 | warn_alloc_show_mem(gfp_mask, nodemask); |
| 3425 | } | 3572 | } |
| @@ -3861,6 +4008,9 @@ gfp_to_alloc_flags(gfp_t gfp_mask) | |||
| 3861 | } else if (unlikely(rt_task(current)) && !in_interrupt()) | 4008 | } else if (unlikely(rt_task(current)) && !in_interrupt()) |
| 3862 | alloc_flags |= ALLOC_HARDER; | 4009 | alloc_flags |= ALLOC_HARDER; |
| 3863 | 4010 | ||
| 4011 | if (gfp_mask & __GFP_KSWAPD_RECLAIM) | ||
| 4012 | alloc_flags |= ALLOC_KSWAPD; | ||
| 4013 | |||
| 3864 | #ifdef CONFIG_CMA | 4014 | #ifdef CONFIG_CMA |
| 3865 | if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) | 4015 | if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) |
| 3866 | alloc_flags |= ALLOC_CMA; | 4016 | alloc_flags |= ALLOC_CMA; |
| @@ -4092,7 +4242,7 @@ retry_cpuset: | |||
| 4092 | if (!ac->preferred_zoneref->zone) | 4242 | if (!ac->preferred_zoneref->zone) |
| 4093 | goto nopage; | 4243 | goto nopage; |
| 4094 | 4244 | ||
| 4095 | if (gfp_mask & __GFP_KSWAPD_RECLAIM) | 4245 | if (alloc_flags & ALLOC_KSWAPD) |
| 4096 | wake_all_kswapds(order, gfp_mask, ac); | 4246 | wake_all_kswapds(order, gfp_mask, ac); |
| 4097 | 4247 | ||
| 4098 | /* | 4248 | /* |
| @@ -4150,7 +4300,7 @@ retry_cpuset: | |||
| 4150 | 4300 | ||
| 4151 | retry: | 4301 | retry: |
| 4152 | /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ | 4302 | /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ |
| 4153 | if (gfp_mask & __GFP_KSWAPD_RECLAIM) | 4303 | if (alloc_flags & ALLOC_KSWAPD) |
| 4154 | wake_all_kswapds(order, gfp_mask, ac); | 4304 | wake_all_kswapds(order, gfp_mask, ac); |
| 4155 | 4305 | ||
| 4156 | reserve_flags = __gfp_pfmemalloc_flags(gfp_mask); | 4306 | reserve_flags = __gfp_pfmemalloc_flags(gfp_mask); |
| @@ -4369,6 +4519,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, | |||
| 4369 | 4519 | ||
| 4370 | finalise_ac(gfp_mask, &ac); | 4520 | finalise_ac(gfp_mask, &ac); |
| 4371 | 4521 | ||
| 4522 | /* | ||
| 4523 | * Forbid the first pass from falling back to types that fragment | ||
| 4524 | * memory until all local zones are considered. | ||
| 4525 | */ | ||
| 4526 | alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp_mask); | ||
| 4527 | |||
| 4372 | /* First allocation attempt */ | 4528 | /* First allocation attempt */ |
| 4373 | page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); | 4529 | page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); |
| 4374 | if (likely(page)) | 4530 | if (likely(page)) |
| @@ -4427,16 +4583,19 @@ unsigned long get_zeroed_page(gfp_t gfp_mask) | |||
| 4427 | } | 4583 | } |
| 4428 | EXPORT_SYMBOL(get_zeroed_page); | 4584 | EXPORT_SYMBOL(get_zeroed_page); |
| 4429 | 4585 | ||
| 4430 | void __free_pages(struct page *page, unsigned int order) | 4586 | static inline void free_the_page(struct page *page, unsigned int order) |
| 4431 | { | 4587 | { |
| 4432 | if (put_page_testzero(page)) { | 4588 | if (order == 0) /* Via pcp? */ |
| 4433 | if (order == 0) | 4589 | free_unref_page(page); |
| 4434 | free_unref_page(page); | 4590 | else |
| 4435 | else | 4591 | __free_pages_ok(page, order); |
| 4436 | __free_pages_ok(page, order); | ||
| 4437 | } | ||
| 4438 | } | 4592 | } |
| 4439 | 4593 | ||
| 4594 | void __free_pages(struct page *page, unsigned int order) | ||
| 4595 | { | ||
| 4596 | if (put_page_testzero(page)) | ||
| 4597 | free_the_page(page, order); | ||
| 4598 | } | ||
| 4440 | EXPORT_SYMBOL(__free_pages); | 4599 | EXPORT_SYMBOL(__free_pages); |
| 4441 | 4600 | ||
| 4442 | void free_pages(unsigned long addr, unsigned int order) | 4601 | void free_pages(unsigned long addr, unsigned int order) |
| @@ -4485,14 +4644,8 @@ void __page_frag_cache_drain(struct page *page, unsigned int count) | |||
| 4485 | { | 4644 | { |
| 4486 | VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); | 4645 | VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); |
| 4487 | 4646 | ||
| 4488 | if (page_ref_sub_and_test(page, count)) { | 4647 | if (page_ref_sub_and_test(page, count)) |
| 4489 | unsigned int order = compound_order(page); | 4648 | free_the_page(page, compound_order(page)); |
| 4490 | |||
| 4491 | if (order == 0) | ||
| 4492 | free_unref_page(page); | ||
| 4493 | else | ||
| 4494 | __free_pages_ok(page, order); | ||
| 4495 | } | ||
| 4496 | } | 4649 | } |
| 4497 | EXPORT_SYMBOL(__page_frag_cache_drain); | 4650 | EXPORT_SYMBOL(__page_frag_cache_drain); |
| 4498 | 4651 | ||
| @@ -4558,7 +4711,7 @@ void page_frag_free(void *addr) | |||
| 4558 | struct page *page = virt_to_head_page(addr); | 4711 | struct page *page = virt_to_head_page(addr); |
| 4559 | 4712 | ||
| 4560 | if (unlikely(put_page_testzero(page))) | 4713 | if (unlikely(put_page_testzero(page))) |
| 4561 | __free_pages_ok(page, compound_order(page)); | 4714 | free_the_page(page, compound_order(page)); |
| 4562 | } | 4715 | } |
| 4563 | EXPORT_SYMBOL(page_frag_free); | 4716 | EXPORT_SYMBOL(page_frag_free); |
| 4564 | 4717 | ||
| @@ -4660,7 +4813,7 @@ static unsigned long nr_free_zone_pages(int offset) | |||
| 4660 | struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); | 4813 | struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL); |
| 4661 | 4814 | ||
| 4662 | for_each_zone_zonelist(zone, z, zonelist, offset) { | 4815 | for_each_zone_zonelist(zone, z, zonelist, offset) { |
| 4663 | unsigned long size = zone->managed_pages; | 4816 | unsigned long size = zone_managed_pages(zone); |
| 4664 | unsigned long high = high_wmark_pages(zone); | 4817 | unsigned long high = high_wmark_pages(zone); |
| 4665 | if (size > high) | 4818 | if (size > high) |
| 4666 | sum += size - high; | 4819 | sum += size - high; |
| @@ -4712,7 +4865,7 @@ long si_mem_available(void) | |||
| 4712 | pages[lru] = global_node_page_state(NR_LRU_BASE + lru); | 4865 | pages[lru] = global_node_page_state(NR_LRU_BASE + lru); |
| 4713 | 4866 | ||
| 4714 | for_each_zone(zone) | 4867 | for_each_zone(zone) |
| 4715 | wmark_low += zone->watermark[WMARK_LOW]; | 4868 | wmark_low += low_wmark_pages(zone); |
| 4716 | 4869 | ||
| 4717 | /* | 4870 | /* |
| 4718 | * Estimate the amount of memory available for userspace allocations, | 4871 | * Estimate the amount of memory available for userspace allocations, |
| @@ -4746,11 +4899,11 @@ EXPORT_SYMBOL_GPL(si_mem_available); | |||
| 4746 | 4899 | ||
| 4747 | void si_meminfo(struct sysinfo *val) | 4900 | void si_meminfo(struct sysinfo *val) |
| 4748 | { | 4901 | { |
| 4749 | val->totalram = totalram_pages; | 4902 | val->totalram = totalram_pages(); |
| 4750 | val->sharedram = global_node_page_state(NR_SHMEM); | 4903 | val->sharedram = global_node_page_state(NR_SHMEM); |
| 4751 | val->freeram = global_zone_page_state(NR_FREE_PAGES); | 4904 | val->freeram = global_zone_page_state(NR_FREE_PAGES); |
| 4752 | val->bufferram = nr_blockdev_pages(); | 4905 | val->bufferram = nr_blockdev_pages(); |
| 4753 | val->totalhigh = totalhigh_pages; | 4906 | val->totalhigh = totalhigh_pages(); |
| 4754 | val->freehigh = nr_free_highpages(); | 4907 | val->freehigh = nr_free_highpages(); |
| 4755 | val->mem_unit = PAGE_SIZE; | 4908 | val->mem_unit = PAGE_SIZE; |
| 4756 | } | 4909 | } |
| @@ -4767,7 +4920,7 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
| 4767 | pg_data_t *pgdat = NODE_DATA(nid); | 4920 | pg_data_t *pgdat = NODE_DATA(nid); |
| 4768 | 4921 | ||
| 4769 | for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) | 4922 | for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) |
| 4770 | managed_pages += pgdat->node_zones[zone_type].managed_pages; | 4923 | managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]); |
| 4771 | val->totalram = managed_pages; | 4924 | val->totalram = managed_pages; |
| 4772 | val->sharedram = node_page_state(pgdat, NR_SHMEM); | 4925 | val->sharedram = node_page_state(pgdat, NR_SHMEM); |
| 4773 | val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES); | 4926 | val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES); |
| @@ -4776,7 +4929,7 @@ void si_meminfo_node(struct sysinfo *val, int nid) | |||
| 4776 | struct zone *zone = &pgdat->node_zones[zone_type]; | 4929 | struct zone *zone = &pgdat->node_zones[zone_type]; |
| 4777 | 4930 | ||
| 4778 | if (is_highmem(zone)) { | 4931 | if (is_highmem(zone)) { |
| 4779 | managed_highpages += zone->managed_pages; | 4932 | managed_highpages += zone_managed_pages(zone); |
| 4780 | free_highpages += zone_page_state(zone, NR_FREE_PAGES); | 4933 | free_highpages += zone_page_state(zone, NR_FREE_PAGES); |
| 4781 | } | 4934 | } |
| 4782 | } | 4935 | } |
| @@ -4983,7 +5136,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) | |||
| 4983 | K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)), | 5136 | K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)), |
| 4984 | K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)), | 5137 | K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)), |
| 4985 | K(zone->present_pages), | 5138 | K(zone->present_pages), |
| 4986 | K(zone->managed_pages), | 5139 | K(zone_managed_pages(zone)), |
| 4987 | K(zone_page_state(zone, NR_MLOCK)), | 5140 | K(zone_page_state(zone, NR_MLOCK)), |
| 4988 | zone_page_state(zone, NR_KERNEL_STACK_KB), | 5141 | zone_page_state(zone, NR_KERNEL_STACK_KB), |
| 4989 | K(zone_page_state(zone, NR_PAGETABLE)), | 5142 | K(zone_page_state(zone, NR_PAGETABLE)), |
| @@ -5655,7 +5808,7 @@ static int zone_batchsize(struct zone *zone) | |||
| 5655 | * The per-cpu-pages pools are set to around 1000th of the | 5808 | * The per-cpu-pages pools are set to around 1000th of the |
| 5656 | * size of the zone. | 5809 | * size of the zone. |
| 5657 | */ | 5810 | */ |
| 5658 | batch = zone->managed_pages / 1024; | 5811 | batch = zone_managed_pages(zone) / 1024; |
| 5659 | /* But no more than a meg. */ | 5812 | /* But no more than a meg. */ |
| 5660 | if (batch * PAGE_SIZE > 1024 * 1024) | 5813 | if (batch * PAGE_SIZE > 1024 * 1024) |
| 5661 | batch = (1024 * 1024) / PAGE_SIZE; | 5814 | batch = (1024 * 1024) / PAGE_SIZE; |
| @@ -5736,7 +5889,6 @@ static void pageset_init(struct per_cpu_pageset *p) | |||
| 5736 | memset(p, 0, sizeof(*p)); | 5889 | memset(p, 0, sizeof(*p)); |
| 5737 | 5890 | ||
| 5738 | pcp = &p->pcp; | 5891 | pcp = &p->pcp; |
| 5739 | pcp->count = 0; | ||
| 5740 | for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) | 5892 | for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++) |
| 5741 | INIT_LIST_HEAD(&pcp->lists[migratetype]); | 5893 | INIT_LIST_HEAD(&pcp->lists[migratetype]); |
| 5742 | } | 5894 | } |
| @@ -5766,7 +5918,7 @@ static void pageset_set_high_and_batch(struct zone *zone, | |||
| 5766 | { | 5918 | { |
| 5767 | if (percpu_pagelist_fraction) | 5919 | if (percpu_pagelist_fraction) |
| 5768 | pageset_set_high(pcp, | 5920 | pageset_set_high(pcp, |
| 5769 | (zone->managed_pages / | 5921 | (zone_managed_pages(zone) / |
| 5770 | percpu_pagelist_fraction)); | 5922 | percpu_pagelist_fraction)); |
| 5771 | else | 5923 | else |
| 5772 | pageset_set_batch(pcp, zone_batchsize(zone)); | 5924 | pageset_set_batch(pcp, zone_batchsize(zone)); |
| @@ -5920,7 +6072,7 @@ void __init sparse_memory_present_with_active_regions(int nid) | |||
| 5920 | * with no available memory, a warning is printed and the start and end | 6072 | * with no available memory, a warning is printed and the start and end |
| 5921 | * PFNs will be 0. | 6073 | * PFNs will be 0. |
| 5922 | */ | 6074 | */ |
| 5923 | void __meminit get_pfn_range_for_nid(unsigned int nid, | 6075 | void __init get_pfn_range_for_nid(unsigned int nid, |
| 5924 | unsigned long *start_pfn, unsigned long *end_pfn) | 6076 | unsigned long *start_pfn, unsigned long *end_pfn) |
| 5925 | { | 6077 | { |
| 5926 | unsigned long this_start_pfn, this_end_pfn; | 6078 | unsigned long this_start_pfn, this_end_pfn; |
| @@ -5969,7 +6121,7 @@ static void __init find_usable_zone_for_movable(void) | |||
| 5969 | * highest usable zone for ZONE_MOVABLE. This preserves the assumption that | 6121 | * highest usable zone for ZONE_MOVABLE. This preserves the assumption that |
| 5970 | * zones within a node are in order of monotonic increases memory addresses | 6122 | * zones within a node are in order of monotonic increases memory addresses |
| 5971 | */ | 6123 | */ |
| 5972 | static void __meminit adjust_zone_range_for_zone_movable(int nid, | 6124 | static void __init adjust_zone_range_for_zone_movable(int nid, |
| 5973 | unsigned long zone_type, | 6125 | unsigned long zone_type, |
| 5974 | unsigned long node_start_pfn, | 6126 | unsigned long node_start_pfn, |
| 5975 | unsigned long node_end_pfn, | 6127 | unsigned long node_end_pfn, |
| @@ -6000,7 +6152,7 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid, | |||
| 6000 | * Return the number of pages a zone spans in a node, including holes | 6152 | * Return the number of pages a zone spans in a node, including holes |
| 6001 | * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node() | 6153 | * present_pages = zone_spanned_pages_in_node() - zone_absent_pages_in_node() |
| 6002 | */ | 6154 | */ |
| 6003 | static unsigned long __meminit zone_spanned_pages_in_node(int nid, | 6155 | static unsigned long __init zone_spanned_pages_in_node(int nid, |
| 6004 | unsigned long zone_type, | 6156 | unsigned long zone_type, |
| 6005 | unsigned long node_start_pfn, | 6157 | unsigned long node_start_pfn, |
| 6006 | unsigned long node_end_pfn, | 6158 | unsigned long node_end_pfn, |
| @@ -6035,7 +6187,7 @@ static unsigned long __meminit zone_spanned_pages_in_node(int nid, | |||
| 6035 | * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, | 6187 | * Return the number of holes in a range on a node. If nid is MAX_NUMNODES, |
| 6036 | * then all holes in the requested range will be accounted for. | 6188 | * then all holes in the requested range will be accounted for. |
| 6037 | */ | 6189 | */ |
| 6038 | unsigned long __meminit __absent_pages_in_range(int nid, | 6190 | unsigned long __init __absent_pages_in_range(int nid, |
| 6039 | unsigned long range_start_pfn, | 6191 | unsigned long range_start_pfn, |
| 6040 | unsigned long range_end_pfn) | 6192 | unsigned long range_end_pfn) |
| 6041 | { | 6193 | { |
| @@ -6065,7 +6217,7 @@ unsigned long __init absent_pages_in_range(unsigned long start_pfn, | |||
| 6065 | } | 6217 | } |
| 6066 | 6218 | ||
| 6067 | /* Return the number of page frames in holes in a zone on a node */ | 6219 | /* Return the number of page frames in holes in a zone on a node */ |
| 6068 | static unsigned long __meminit zone_absent_pages_in_node(int nid, | 6220 | static unsigned long __init zone_absent_pages_in_node(int nid, |
| 6069 | unsigned long zone_type, | 6221 | unsigned long zone_type, |
| 6070 | unsigned long node_start_pfn, | 6222 | unsigned long node_start_pfn, |
| 6071 | unsigned long node_end_pfn, | 6223 | unsigned long node_end_pfn, |
| @@ -6117,7 +6269,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
| 6117 | } | 6269 | } |
| 6118 | 6270 | ||
| 6119 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 6271 | #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
| 6120 | static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, | 6272 | static inline unsigned long __init zone_spanned_pages_in_node(int nid, |
| 6121 | unsigned long zone_type, | 6273 | unsigned long zone_type, |
| 6122 | unsigned long node_start_pfn, | 6274 | unsigned long node_start_pfn, |
| 6123 | unsigned long node_end_pfn, | 6275 | unsigned long node_end_pfn, |
| @@ -6136,7 +6288,7 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid, | |||
| 6136 | return zones_size[zone_type]; | 6288 | return zones_size[zone_type]; |
| 6137 | } | 6289 | } |
| 6138 | 6290 | ||
| 6139 | static inline unsigned long __meminit zone_absent_pages_in_node(int nid, | 6291 | static inline unsigned long __init zone_absent_pages_in_node(int nid, |
| 6140 | unsigned long zone_type, | 6292 | unsigned long zone_type, |
| 6141 | unsigned long node_start_pfn, | 6293 | unsigned long node_start_pfn, |
| 6142 | unsigned long node_end_pfn, | 6294 | unsigned long node_end_pfn, |
| @@ -6150,7 +6302,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid, | |||
| 6150 | 6302 | ||
| 6151 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ | 6303 | #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ |
| 6152 | 6304 | ||
| 6153 | static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, | 6305 | static void __init calculate_node_totalpages(struct pglist_data *pgdat, |
| 6154 | unsigned long node_start_pfn, | 6306 | unsigned long node_start_pfn, |
| 6155 | unsigned long node_end_pfn, | 6307 | unsigned long node_end_pfn, |
| 6156 | unsigned long *zones_size, | 6308 | unsigned long *zones_size, |
| @@ -6323,7 +6475,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat) | |||
| 6323 | static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid, | 6475 | static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid, |
| 6324 | unsigned long remaining_pages) | 6476 | unsigned long remaining_pages) |
| 6325 | { | 6477 | { |
| 6326 | zone->managed_pages = remaining_pages; | 6478 | atomic_long_set(&zone->managed_pages, remaining_pages); |
| 6327 | zone_set_nid(zone, nid); | 6479 | zone_set_nid(zone, nid); |
| 6328 | zone->name = zone_names[idx]; | 6480 | zone->name = zone_names[idx]; |
| 6329 | zone->zone_pgdat = NODE_DATA(nid); | 6481 | zone->zone_pgdat = NODE_DATA(nid); |
| @@ -6476,12 +6628,6 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { } | |||
| 6476 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 6628 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
| 6477 | static inline void pgdat_set_deferred_range(pg_data_t *pgdat) | 6629 | static inline void pgdat_set_deferred_range(pg_data_t *pgdat) |
| 6478 | { | 6630 | { |
| 6479 | /* | ||
| 6480 | * We start only with one section of pages, more pages are added as | ||
| 6481 | * needed until the rest of deferred pages are initialized. | ||
| 6482 | */ | ||
| 6483 | pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION, | ||
| 6484 | pgdat->node_spanned_pages); | ||
| 6485 | pgdat->first_deferred_pfn = ULONG_MAX; | 6631 | pgdat->first_deferred_pfn = ULONG_MAX; |
| 6486 | } | 6632 | } |
| 6487 | #else | 6633 | #else |
| @@ -7075,18 +7221,16 @@ early_param("movablecore", cmdline_parse_movablecore); | |||
| 7075 | 7221 | ||
| 7076 | void adjust_managed_page_count(struct page *page, long count) | 7222 | void adjust_managed_page_count(struct page *page, long count) |
| 7077 | { | 7223 | { |
| 7078 | spin_lock(&managed_page_count_lock); | 7224 | atomic_long_add(count, &page_zone(page)->managed_pages); |
| 7079 | page_zone(page)->managed_pages += count; | 7225 | totalram_pages_add(count); |
| 7080 | totalram_pages += count; | ||
| 7081 | #ifdef CONFIG_HIGHMEM | 7226 | #ifdef CONFIG_HIGHMEM |
| 7082 | if (PageHighMem(page)) | 7227 | if (PageHighMem(page)) |
| 7083 | totalhigh_pages += count; | 7228 | totalhigh_pages_add(count); |
| 7084 | #endif | 7229 | #endif |
| 7085 | spin_unlock(&managed_page_count_lock); | ||
| 7086 | } | 7230 | } |
| 7087 | EXPORT_SYMBOL(adjust_managed_page_count); | 7231 | EXPORT_SYMBOL(adjust_managed_page_count); |
| 7088 | 7232 | ||
| 7089 | unsigned long free_reserved_area(void *start, void *end, int poison, char *s) | 7233 | unsigned long free_reserved_area(void *start, void *end, int poison, const char *s) |
| 7090 | { | 7234 | { |
| 7091 | void *pos; | 7235 | void *pos; |
| 7092 | unsigned long pages = 0; | 7236 | unsigned long pages = 0; |
| @@ -7123,9 +7267,9 @@ EXPORT_SYMBOL(free_reserved_area); | |||
| 7123 | void free_highmem_page(struct page *page) | 7267 | void free_highmem_page(struct page *page) |
| 7124 | { | 7268 | { |
| 7125 | __free_reserved_page(page); | 7269 | __free_reserved_page(page); |
| 7126 | totalram_pages++; | 7270 | totalram_pages_inc(); |
| 7127 | page_zone(page)->managed_pages++; | 7271 | atomic_long_inc(&page_zone(page)->managed_pages); |
| 7128 | totalhigh_pages++; | 7272 | totalhigh_pages_inc(); |
| 7129 | } | 7273 | } |
| 7130 | #endif | 7274 | #endif |
| 7131 | 7275 | ||
| @@ -7174,10 +7318,10 @@ void __init mem_init_print_info(const char *str) | |||
| 7174 | physpages << (PAGE_SHIFT - 10), | 7318 | physpages << (PAGE_SHIFT - 10), |
| 7175 | codesize >> 10, datasize >> 10, rosize >> 10, | 7319 | codesize >> 10, datasize >> 10, rosize >> 10, |
| 7176 | (init_data_size + init_code_size) >> 10, bss_size >> 10, | 7320 | (init_data_size + init_code_size) >> 10, bss_size >> 10, |
| 7177 | (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT - 10), | 7321 | (physpages - totalram_pages() - totalcma_pages) << (PAGE_SHIFT - 10), |
| 7178 | totalcma_pages << (PAGE_SHIFT - 10), | 7322 | totalcma_pages << (PAGE_SHIFT - 10), |
| 7179 | #ifdef CONFIG_HIGHMEM | 7323 | #ifdef CONFIG_HIGHMEM |
| 7180 | totalhigh_pages << (PAGE_SHIFT - 10), | 7324 | totalhigh_pages() << (PAGE_SHIFT - 10), |
| 7181 | #endif | 7325 | #endif |
| 7182 | str ? ", " : "", str ? str : ""); | 7326 | str ? ", " : "", str ? str : ""); |
| 7183 | } | 7327 | } |
| @@ -7257,6 +7401,7 @@ static void calculate_totalreserve_pages(void) | |||
| 7257 | for (i = 0; i < MAX_NR_ZONES; i++) { | 7401 | for (i = 0; i < MAX_NR_ZONES; i++) { |
| 7258 | struct zone *zone = pgdat->node_zones + i; | 7402 | struct zone *zone = pgdat->node_zones + i; |
| 7259 | long max = 0; | 7403 | long max = 0; |
| 7404 | unsigned long managed_pages = zone_managed_pages(zone); | ||
| 7260 | 7405 | ||
| 7261 | /* Find valid and maximum lowmem_reserve in the zone */ | 7406 | /* Find valid and maximum lowmem_reserve in the zone */ |
| 7262 | for (j = i; j < MAX_NR_ZONES; j++) { | 7407 | for (j = i; j < MAX_NR_ZONES; j++) { |
| @@ -7267,8 +7412,8 @@ static void calculate_totalreserve_pages(void) | |||
| 7267 | /* we treat the high watermark as reserved pages. */ | 7412 | /* we treat the high watermark as reserved pages. */ |
| 7268 | max += high_wmark_pages(zone); | 7413 | max += high_wmark_pages(zone); |
| 7269 | 7414 | ||
| 7270 | if (max > zone->managed_pages) | 7415 | if (max > managed_pages) |
| 7271 | max = zone->managed_pages; | 7416 | max = managed_pages; |
| 7272 | 7417 | ||
| 7273 | pgdat->totalreserve_pages += max; | 7418 | pgdat->totalreserve_pages += max; |
| 7274 | 7419 | ||
| @@ -7292,7 +7437,7 @@ static void setup_per_zone_lowmem_reserve(void) | |||
| 7292 | for_each_online_pgdat(pgdat) { | 7437 | for_each_online_pgdat(pgdat) { |
| 7293 | for (j = 0; j < MAX_NR_ZONES; j++) { | 7438 | for (j = 0; j < MAX_NR_ZONES; j++) { |
| 7294 | struct zone *zone = pgdat->node_zones + j; | 7439 | struct zone *zone = pgdat->node_zones + j; |
| 7295 | unsigned long managed_pages = zone->managed_pages; | 7440 | unsigned long managed_pages = zone_managed_pages(zone); |
| 7296 | 7441 | ||
| 7297 | zone->lowmem_reserve[j] = 0; | 7442 | zone->lowmem_reserve[j] = 0; |
| 7298 | 7443 | ||
| @@ -7310,7 +7455,7 @@ static void setup_per_zone_lowmem_reserve(void) | |||
| 7310 | lower_zone->lowmem_reserve[j] = | 7455 | lower_zone->lowmem_reserve[j] = |
| 7311 | managed_pages / sysctl_lowmem_reserve_ratio[idx]; | 7456 | managed_pages / sysctl_lowmem_reserve_ratio[idx]; |
| 7312 | } | 7457 | } |
| 7313 | managed_pages += lower_zone->managed_pages; | 7458 | managed_pages += zone_managed_pages(lower_zone); |
| 7314 | } | 7459 | } |
| 7315 | } | 7460 | } |
| 7316 | } | 7461 | } |
| @@ -7329,14 +7474,14 @@ static void __setup_per_zone_wmarks(void) | |||
| 7329 | /* Calculate total number of !ZONE_HIGHMEM pages */ | 7474 | /* Calculate total number of !ZONE_HIGHMEM pages */ |
| 7330 | for_each_zone(zone) { | 7475 | for_each_zone(zone) { |
| 7331 | if (!is_highmem(zone)) | 7476 | if (!is_highmem(zone)) |
| 7332 | lowmem_pages += zone->managed_pages; | 7477 | lowmem_pages += zone_managed_pages(zone); |
| 7333 | } | 7478 | } |
| 7334 | 7479 | ||
| 7335 | for_each_zone(zone) { | 7480 | for_each_zone(zone) { |
| 7336 | u64 tmp; | 7481 | u64 tmp; |
| 7337 | 7482 | ||
| 7338 | spin_lock_irqsave(&zone->lock, flags); | 7483 | spin_lock_irqsave(&zone->lock, flags); |
| 7339 | tmp = (u64)pages_min * zone->managed_pages; | 7484 | tmp = (u64)pages_min * zone_managed_pages(zone); |
| 7340 | do_div(tmp, lowmem_pages); | 7485 | do_div(tmp, lowmem_pages); |
| 7341 | if (is_highmem(zone)) { | 7486 | if (is_highmem(zone)) { |
| 7342 | /* | 7487 | /* |
| @@ -7350,15 +7495,15 @@ static void __setup_per_zone_wmarks(void) | |||
| 7350 | */ | 7495 | */ |
| 7351 | unsigned long min_pages; | 7496 | unsigned long min_pages; |
| 7352 | 7497 | ||
| 7353 | min_pages = zone->managed_pages / 1024; | 7498 | min_pages = zone_managed_pages(zone) / 1024; |
| 7354 | min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL); | 7499 | min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL); |
| 7355 | zone->watermark[WMARK_MIN] = min_pages; | 7500 | zone->_watermark[WMARK_MIN] = min_pages; |
| 7356 | } else { | 7501 | } else { |
| 7357 | /* | 7502 | /* |
| 7358 | * If it's a lowmem zone, reserve a number of pages | 7503 | * If it's a lowmem zone, reserve a number of pages |
| 7359 | * proportionate to the zone's size. | 7504 | * proportionate to the zone's size. |
| 7360 | */ | 7505 | */ |
| 7361 | zone->watermark[WMARK_MIN] = tmp; | 7506 | zone->_watermark[WMARK_MIN] = tmp; |
| 7362 | } | 7507 | } |
| 7363 | 7508 | ||
| 7364 | /* | 7509 | /* |
| @@ -7367,11 +7512,12 @@ static void __setup_per_zone_wmarks(void) | |||
| 7367 | * ensure a minimum size on small systems. | 7512 | * ensure a minimum size on small systems. |
| 7368 | */ | 7513 | */ |
| 7369 | tmp = max_t(u64, tmp >> 2, | 7514 | tmp = max_t(u64, tmp >> 2, |
| 7370 | mult_frac(zone->managed_pages, | 7515 | mult_frac(zone_managed_pages(zone), |
| 7371 | watermark_scale_factor, 10000)); | 7516 | watermark_scale_factor, 10000)); |
| 7372 | 7517 | ||
| 7373 | zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; | 7518 | zone->_watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; |
| 7374 | zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2; | 7519 | zone->_watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2; |
| 7520 | zone->watermark_boost = 0; | ||
| 7375 | 7521 | ||
| 7376 | spin_unlock_irqrestore(&zone->lock, flags); | 7522 | spin_unlock_irqrestore(&zone->lock, flags); |
| 7377 | } | 7523 | } |
| @@ -7472,6 +7618,18 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, | |||
| 7472 | return 0; | 7618 | return 0; |
| 7473 | } | 7619 | } |
| 7474 | 7620 | ||
| 7621 | int watermark_boost_factor_sysctl_handler(struct ctl_table *table, int write, | ||
| 7622 | void __user *buffer, size_t *length, loff_t *ppos) | ||
| 7623 | { | ||
| 7624 | int rc; | ||
| 7625 | |||
| 7626 | rc = proc_dointvec_minmax(table, write, buffer, length, ppos); | ||
| 7627 | if (rc) | ||
| 7628 | return rc; | ||
| 7629 | |||
| 7630 | return 0; | ||
| 7631 | } | ||
| 7632 | |||
| 7475 | int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write, | 7633 | int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write, |
| 7476 | void __user *buffer, size_t *length, loff_t *ppos) | 7634 | void __user *buffer, size_t *length, loff_t *ppos) |
| 7477 | { | 7635 | { |
| @@ -7497,8 +7655,8 @@ static void setup_min_unmapped_ratio(void) | |||
| 7497 | pgdat->min_unmapped_pages = 0; | 7655 | pgdat->min_unmapped_pages = 0; |
| 7498 | 7656 | ||
| 7499 | for_each_zone(zone) | 7657 | for_each_zone(zone) |
| 7500 | zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages * | 7658 | zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) * |
| 7501 | sysctl_min_unmapped_ratio) / 100; | 7659 | sysctl_min_unmapped_ratio) / 100; |
| 7502 | } | 7660 | } |
| 7503 | 7661 | ||
| 7504 | 7662 | ||
| @@ -7525,8 +7683,8 @@ static void setup_min_slab_ratio(void) | |||
| 7525 | pgdat->min_slab_pages = 0; | 7683 | pgdat->min_slab_pages = 0; |
| 7526 | 7684 | ||
| 7527 | for_each_zone(zone) | 7685 | for_each_zone(zone) |
| 7528 | zone->zone_pgdat->min_slab_pages += (zone->managed_pages * | 7686 | zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) * |
| 7529 | sysctl_min_slab_ratio) / 100; | 7687 | sysctl_min_slab_ratio) / 100; |
| 7530 | } | 7688 | } |
| 7531 | 7689 | ||
| 7532 | int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, | 7690 | int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, |
| @@ -7766,8 +7924,7 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
| 7766 | * race condition. So you can't expect this function should be exact. | 7924 | * race condition. So you can't expect this function should be exact. |
| 7767 | */ | 7925 | */ |
| 7768 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | 7926 | bool has_unmovable_pages(struct zone *zone, struct page *page, int count, |
| 7769 | int migratetype, | 7927 | int migratetype, int flags) |
| 7770 | bool skip_hwpoisoned_pages) | ||
| 7771 | { | 7928 | { |
| 7772 | unsigned long pfn, iter, found; | 7929 | unsigned long pfn, iter, found; |
| 7773 | 7930 | ||
| @@ -7841,7 +7998,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
| 7841 | * The HWPoisoned page may be not in buddy system, and | 7998 | * The HWPoisoned page may be not in buddy system, and |
| 7842 | * page_count() is not 0. | 7999 | * page_count() is not 0. |
| 7843 | */ | 8000 | */ |
| 7844 | if (skip_hwpoisoned_pages && PageHWPoison(page)) | 8001 | if ((flags & SKIP_HWPOISON) && PageHWPoison(page)) |
| 7845 | continue; | 8002 | continue; |
| 7846 | 8003 | ||
| 7847 | if (__PageMovable(page)) | 8004 | if (__PageMovable(page)) |
| @@ -7868,6 +8025,8 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, | |||
| 7868 | return false; | 8025 | return false; |
| 7869 | unmovable: | 8026 | unmovable: |
| 7870 | WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); | 8027 | WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); |
| 8028 | if (flags & REPORT_FAILURE) | ||
| 8029 | dump_page(pfn_to_page(pfn+iter), "unmovable page"); | ||
| 7871 | return true; | 8030 | return true; |
| 7872 | } | 8031 | } |
| 7873 | 8032 | ||
| @@ -7994,8 +8153,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, | |||
| 7994 | */ | 8153 | */ |
| 7995 | 8154 | ||
| 7996 | ret = start_isolate_page_range(pfn_max_align_down(start), | 8155 | ret = start_isolate_page_range(pfn_max_align_down(start), |
| 7997 | pfn_max_align_up(end), migratetype, | 8156 | pfn_max_align_up(end), migratetype, 0); |
| 7998 | false); | ||
| 7999 | if (ret) | 8157 | if (ret) |
| 8000 | return ret; | 8158 | return ret; |
| 8001 | 8159 | ||
