aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2015-11-06 19:28:40 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-06 20:50:42 -0500
commit97a16fc82a7c5b0cfce95c05dfb9561e306ca1b1 (patch)
tree23a0c1706ab43d50efb611b2d0d572539aaaf51d /mm
parent0aaa29a56e4fb0fc9e24edb649e2733a672ca099 (diff)
mm, page_alloc: only enforce watermarks for order-0 allocations
The primary purpose of watermarks is to ensure that reclaim can always make forward progress in PF_MEMALLOC context (kswapd and direct reclaim). These assume that order-0 allocations are all that is necessary for forward progress. High-order watermarks serve a different purpose. Kswapd had no high-order awareness before they were introduced (https://lkml.kernel.org/r/413AA7B2.4000907@yahoo.com.au). This was particularly important when there were high-order atomic requests. The watermarks both gave kswapd awareness and made a reserve for those atomic requests. There are two important side-effects of this. The most important is that a non-atomic high-order request can fail even though free pages are available and the order-0 watermarks are ok. The second is that high-order watermark checks are expensive as the free list counts up to the requested order must be examined. With the introduction of MIGRATE_HIGHATOMIC it is no longer necessary to have high-order watermarks. Kswapd and compaction still need high-order awareness which is handled by checking that at least one suitable high-order page is free. With the patch applied, there was little difference in the allocation failure rates as the atomic reserves are small relative to the number of allocation attempts. The expected impact is that there will never be an allocation failure report that shows suitable pages on the free lists. The one potential side-effect of this is that in a vanilla kernel, the watermark checks may have kept a free page for an atomic allocation. Now, we are 100% relying on the HighAtomic reserves and an early allocation to have allocated them. If the first high-order atomic allocation is after the system is already heavily fragmented then it'll fail. [akpm@linux-foundation.org: simplify __zone_watermark_ok(), per Vlastimil] Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Vitaly Wool <vitalywool@gmail.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c53
1 files changed, 39 insertions, 14 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 55e9c56dfe54..b8d560afe266 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2322,8 +2322,10 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
2322#endif /* CONFIG_FAIL_PAGE_ALLOC */ 2322#endif /* CONFIG_FAIL_PAGE_ALLOC */
2323 2323
2324/* 2324/*
2325 * Return true if free pages are above 'mark'. This takes into account the order 2325 * Return true if free base pages are above 'mark'. For high-order checks it
2326 * of the allocation. 2326 * will return true of the order-0 watermark is reached and there is at least
2327 * one free page of a suitable size. Checking now avoids taking the zone lock
2328 * to check in the allocation paths if no pages are free.
2327 */ 2329 */
2328static bool __zone_watermark_ok(struct zone *z, unsigned int order, 2330static bool __zone_watermark_ok(struct zone *z, unsigned int order,
2329 unsigned long mark, int classzone_idx, int alloc_flags, 2331 unsigned long mark, int classzone_idx, int alloc_flags,
@@ -2331,7 +2333,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
2331{ 2333{
2332 long min = mark; 2334 long min = mark;
2333 int o; 2335 int o;
2334 long free_cma = 0; 2336 const int alloc_harder = (alloc_flags & ALLOC_HARDER);
2335 2337
2336 /* free_pages may go negative - that's OK */ 2338 /* free_pages may go negative - that's OK */
2337 free_pages -= (1 << order) - 1; 2339 free_pages -= (1 << order) - 1;
@@ -2344,7 +2346,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
2344 * the high-atomic reserves. This will over-estimate the size of the 2346 * the high-atomic reserves. This will over-estimate the size of the
2345 * atomic reserve but it avoids a search. 2347 * atomic reserve but it avoids a search.
2346 */ 2348 */
2347 if (likely(!(alloc_flags & ALLOC_HARDER))) 2349 if (likely(!alloc_harder))
2348 free_pages -= z->nr_reserved_highatomic; 2350 free_pages -= z->nr_reserved_highatomic;
2349 else 2351 else
2350 min -= min / 4; 2352 min -= min / 4;
@@ -2352,22 +2354,45 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
2352#ifdef CONFIG_CMA 2354#ifdef CONFIG_CMA
2353 /* If allocation can't use CMA areas don't use free CMA pages */ 2355 /* If allocation can't use CMA areas don't use free CMA pages */
2354 if (!(alloc_flags & ALLOC_CMA)) 2356 if (!(alloc_flags & ALLOC_CMA))
2355 free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); 2357 free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
2356#endif 2358#endif
2357 2359
2358 if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx]) 2360 /*
2361 * Check watermarks for an order-0 allocation request. If these
2362 * are not met, then a high-order request also cannot go ahead
2363 * even if a suitable page happened to be free.
2364 */
2365 if (free_pages <= min + z->lowmem_reserve[classzone_idx])
2359 return false; 2366 return false;
2360 for (o = 0; o < order; o++) {
2361 /* At the next order, this order's pages become unavailable */
2362 free_pages -= z->free_area[o].nr_free << o;
2363 2367
2364 /* Require fewer higher order pages to be free */ 2368 /* If this is an order-0 request then the watermark is fine */
2365 min >>= 1; 2369 if (!order)
2370 return true;
2371
2372 /* For a high-order request, check at least one suitable page is free */
2373 for (o = order; o < MAX_ORDER; o++) {
2374 struct free_area *area = &z->free_area[o];
2375 int mt;
2376
2377 if (!area->nr_free)
2378 continue;
2379
2380 if (alloc_harder)
2381 return true;
2366 2382
2367 if (free_pages <= min) 2383 for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
2368 return false; 2384 if (!list_empty(&area->free_list[mt]))
2385 return true;
2386 }
2387
2388#ifdef CONFIG_CMA
2389 if ((alloc_flags & ALLOC_CMA) &&
2390 !list_empty(&area->free_list[MIGRATE_CMA])) {
2391 return true;
2392 }
2393#endif
2369 } 2394 }
2370 return true; 2395 return false;
2371} 2396}
2372 2397
2373bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, 2398bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,