aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-29 16:54:30 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-11-30 11:51:18 -0500
commit782fd30406ecb9d9b082816abe0c6008fc72a7b0 (patch)
tree50e6769cda69a088ecec4d477c740426329b6d2f /mm
parenta50915394f1fc02c2861d3b7ce7014788aa5066e (diff)
mm: avoid waking kswapd for THP allocations when compaction is deferred or contended
With "mm: vmscan: scale number of pages reclaimed by reclaim/compaction based on failures" reverted, Zdenek Kabelac reported the following Hmm, so it's just took longer to hit the problem and observe kswapd0 spinning on my CPU again - it's not as endless like before - but still it easily eats minutes - it helps to turn off Firefox or TB (memory hungry apps) so kswapd0 stops soon - and restart those apps again. (And I still have like >1GB of cached memory) kswapd0 R running task 0 30 2 0x00000000 Call Trace: preempt_schedule+0x42/0x60 _raw_spin_unlock+0x55/0x60 put_super+0x31/0x40 drop_super+0x22/0x30 prune_super+0x149/0x1b0 shrink_slab+0xba/0x510 The sysrq+m indicates the system has no swap so it'll never reclaim anonymous pages as part of reclaim/compaction. That is one part of the problem but not the root cause as file-backed pages could also be reclaimed. The likely underlying problem is that kswapd is woken up or kept awake for each THP allocation request in the page allocator slow path. If compaction fails for the requesting process then compaction will be deferred for a time and direct reclaim is avoided. However, if there are a storm of THP requests that are simply rejected, it will still be the the case that kswapd is awake for a prolonged period of time as pgdat->kswapd_max_order is updated each time. This is noticed by the main kswapd() loop and it will not call kswapd_try_to_sleep(). Instead it will loopp, shrinking a small number of pages and calling shrink_slab() on each iteration. This patch defers when kswapd gets woken up for THP allocations. For !THP allocations, kswapd is always woken up. For THP allocations, kswapd is woken up iff the process is willing to enter into direct reclaim/compaction. [akpm@linux-foundation.org: fix typo in comment] Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: Zdenek Kabelac <zkabelac@redhat.com> Cc: Seth Jennings <sjenning@linux.vnet.ibm.com> Cc: Jiri Slaby <jirislaby@gmail.com> Cc: Rik van Riel <riel@redhat.com> Cc: Robert Jennings <rcj@linux.vnet.ibm.com> Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu> Cc: Glauber Costa <glommer@gmail.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c37
1 files changed, 27 insertions, 10 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8193809f3de0..a8f2c87792c3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2378,6 +2378,15 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
2378 return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS); 2378 return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
2379} 2379}
2380 2380
2381/* Returns true if the allocation is likely for THP */
2382static bool is_thp_alloc(gfp_t gfp_mask, unsigned int order)
2383{
2384 if (order == pageblock_order &&
2385 (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE)
2386 return true;
2387 return false;
2388}
2389
2381static inline struct page * 2390static inline struct page *
2382__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, 2391__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2383 struct zonelist *zonelist, enum zone_type high_zoneidx, 2392 struct zonelist *zonelist, enum zone_type high_zoneidx,
@@ -2416,7 +2425,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2416 goto nopage; 2425 goto nopage;
2417 2426
2418restart: 2427restart:
2419 wake_all_kswapd(order, zonelist, high_zoneidx, 2428 /* The decision whether to wake kswapd for THP is made later */
2429 if (!is_thp_alloc(gfp_mask, order))
2430 wake_all_kswapd(order, zonelist, high_zoneidx,
2420 zone_idx(preferred_zone)); 2431 zone_idx(preferred_zone));
2421 2432
2422 /* 2433 /*
@@ -2487,15 +2498,21 @@ rebalance:
2487 goto got_pg; 2498 goto got_pg;
2488 sync_migration = true; 2499 sync_migration = true;
2489 2500
2490 /* 2501 if (is_thp_alloc(gfp_mask, order)) {
2491 * If compaction is deferred for high-order allocations, it is because 2502 /*
2492 * sync compaction recently failed. In this is the case and the caller 2503 * If compaction is deferred for high-order allocations, it is
2493 * requested a movable allocation that does not heavily disrupt the 2504 * because sync compaction recently failed. If this is the case
2494 * system then fail the allocation instead of entering direct reclaim. 2505 * and the caller requested a movable allocation that does not
2495 */ 2506 * heavily disrupt the system then fail the allocation instead
2496 if ((deferred_compaction || contended_compaction) && 2507 * of entering direct reclaim.
2497 (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) 2508 */
2498 goto nopage; 2509 if (deferred_compaction || contended_compaction)
2510 goto nopage;
2511
2512 /* If process is willing to reclaim/compact then wake kswapd */
2513 wake_all_kswapd(order, zonelist, high_zoneidx,
2514 zone_idx(preferred_zone));
2515 }
2499 2516
2500 /* Try direct reclaim and then allocating */ 2517 /* Try direct reclaim and then allocating */
2501 page = __alloc_pages_direct_reclaim(gfp_mask, order, 2518 page = __alloc_pages_direct_reclaim(gfp_mask, order,