aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2015-01-26 15:58:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-01-26 16:37:18 -0500
commit9879de7373fcfb466ec198293b6ccc1ad7a42dd8 (patch)
tree047533253898e317ce7e41a1078433e1e19d625f /mm/page_alloc.c
parent26bc420b59a38e4e6685a73345a0def461136dce (diff)
mm: page_alloc: embed OOM killing naturally into allocation slowpath
The OOM killing invocation does a lot of duplicative checks against the task's allocation context. Rework it to take advantage of the existing checks in the allocator slowpath. The OOM killer is invoked when the allocator is unable to reclaim any pages but the allocation has to keep looping. Instead of having a check for __GFP_NORETRY hidden in oom_gfp_allowed(), just move the OOM invocation to the true branch of should_alloc_retry(). The __GFP_FS check from oom_gfp_allowed() can then be moved into the OOM avoidance branch in __alloc_pages_may_oom(), along with the PF_DUMPCORE test. __alloc_pages_may_oom() can then signal to the caller whether the OOM killer was invoked, instead of requiring it to duplicate the order and high_zoneidx checks to guess this when deciding whether to continue. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c82
1 files changed, 35 insertions, 47 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7633c503a116..8e20f9c2fa5a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2332,12 +2332,21 @@ static inline struct page *
2332__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, 2332__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2333 struct zonelist *zonelist, enum zone_type high_zoneidx, 2333 struct zonelist *zonelist, enum zone_type high_zoneidx,
2334 nodemask_t *nodemask, struct zone *preferred_zone, 2334 nodemask_t *nodemask, struct zone *preferred_zone,
2335 int classzone_idx, int migratetype) 2335 int classzone_idx, int migratetype, unsigned long *did_some_progress)
2336{ 2336{
2337 struct page *page; 2337 struct page *page;
2338 2338
2339 /* Acquire the per-zone oom lock for each zone */ 2339 *did_some_progress = 0;
2340
2341 if (oom_killer_disabled)
2342 return NULL;
2343
2344 /*
2345 * Acquire the per-zone oom lock for each zone. If that
2346 * fails, somebody else is making progress for us.
2347 */
2340 if (!oom_zonelist_trylock(zonelist, gfp_mask)) { 2348 if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
2349 *did_some_progress = 1;
2341 schedule_timeout_uninterruptible(1); 2350 schedule_timeout_uninterruptible(1);
2342 return NULL; 2351 return NULL;
2343 } 2352 }
@@ -2363,12 +2372,18 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2363 goto out; 2372 goto out;
2364 2373
2365 if (!(gfp_mask & __GFP_NOFAIL)) { 2374 if (!(gfp_mask & __GFP_NOFAIL)) {
2375 /* Coredumps can quickly deplete all memory reserves */
2376 if (current->flags & PF_DUMPCORE)
2377 goto out;
2366 /* The OOM killer will not help higher order allocs */ 2378 /* The OOM killer will not help higher order allocs */
2367 if (order > PAGE_ALLOC_COSTLY_ORDER) 2379 if (order > PAGE_ALLOC_COSTLY_ORDER)
2368 goto out; 2380 goto out;
2369 /* The OOM killer does not needlessly kill tasks for lowmem */ 2381 /* The OOM killer does not needlessly kill tasks for lowmem */
2370 if (high_zoneidx < ZONE_NORMAL) 2382 if (high_zoneidx < ZONE_NORMAL)
2371 goto out; 2383 goto out;
2384 /* The OOM killer does not compensate for light reclaim */
2385 if (!(gfp_mask & __GFP_FS))
2386 goto out;
2372 /* 2387 /*
2373 * GFP_THISNODE contains __GFP_NORETRY and we never hit this. 2388 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
2374 * Sanity check for bare calls of __GFP_THISNODE, not real OOM. 2389 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
@@ -2381,7 +2396,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2381 } 2396 }
2382 /* Exhausted what can be done so it's blamo time */ 2397 /* Exhausted what can be done so it's blamo time */
2383 out_of_memory(zonelist, gfp_mask, order, nodemask, false); 2398 out_of_memory(zonelist, gfp_mask, order, nodemask, false);
2384 2399 *did_some_progress = 1;
2385out: 2400out:
2386 oom_zonelist_unlock(zonelist, gfp_mask); 2401 oom_zonelist_unlock(zonelist, gfp_mask);
2387 return page; 2402 return page;
@@ -2658,7 +2673,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2658 (gfp_mask & GFP_THISNODE) == GFP_THISNODE) 2673 (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
2659 goto nopage; 2674 goto nopage;
2660 2675
2661restart: 2676retry:
2662 if (!(gfp_mask & __GFP_NO_KSWAPD)) 2677 if (!(gfp_mask & __GFP_NO_KSWAPD))
2663 wake_all_kswapds(order, zonelist, high_zoneidx, 2678 wake_all_kswapds(order, zonelist, high_zoneidx,
2664 preferred_zone, nodemask); 2679 preferred_zone, nodemask);
@@ -2681,7 +2696,6 @@ restart:
2681 classzone_idx = zonelist_zone_idx(preferred_zoneref); 2696 classzone_idx = zonelist_zone_idx(preferred_zoneref);
2682 } 2697 }
2683 2698
2684rebalance:
2685 /* This is the last chance, in general, before the goto nopage. */ 2699 /* This is the last chance, in general, before the goto nopage. */
2686 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, 2700 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
2687 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, 2701 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2788,54 +2802,28 @@ rebalance:
2788 if (page) 2802 if (page)
2789 goto got_pg; 2803 goto got_pg;
2790 2804
2791 /*
2792 * If we failed to make any progress reclaiming, then we are
2793 * running out of options and have to consider going OOM
2794 */
2795 if (!did_some_progress) {
2796 if (oom_gfp_allowed(gfp_mask)) {
2797 if (oom_killer_disabled)
2798 goto nopage;
2799 /* Coredumps can quickly deplete all memory reserves */
2800 if ((current->flags & PF_DUMPCORE) &&
2801 !(gfp_mask & __GFP_NOFAIL))
2802 goto nopage;
2803 page = __alloc_pages_may_oom(gfp_mask, order,
2804 zonelist, high_zoneidx,
2805 nodemask, preferred_zone,
2806 classzone_idx, migratetype);
2807 if (page)
2808 goto got_pg;
2809
2810 if (!(gfp_mask & __GFP_NOFAIL)) {
2811 /*
2812 * The oom killer is not called for high-order
2813 * allocations that may fail, so if no progress
2814 * is being made, there are no other options and
2815 * retrying is unlikely to help.
2816 */
2817 if (order > PAGE_ALLOC_COSTLY_ORDER)
2818 goto nopage;
2819 /*
2820 * The oom killer is not called for lowmem
2821 * allocations to prevent needlessly killing
2822 * innocent tasks.
2823 */
2824 if (high_zoneidx < ZONE_NORMAL)
2825 goto nopage;
2826 }
2827
2828 goto restart;
2829 }
2830 }
2831
2832 /* Check if we should retry the allocation */ 2805 /* Check if we should retry the allocation */
2833 pages_reclaimed += did_some_progress; 2806 pages_reclaimed += did_some_progress;
2834 if (should_alloc_retry(gfp_mask, order, did_some_progress, 2807 if (should_alloc_retry(gfp_mask, order, did_some_progress,
2835 pages_reclaimed)) { 2808 pages_reclaimed)) {
2809 /*
2810 * If we fail to make progress by freeing individual
2811 * pages, but the allocation wants us to keep going,
2812 * start OOM killing tasks.
2813 */
2814 if (!did_some_progress) {
2815 page = __alloc_pages_may_oom(gfp_mask, order, zonelist,
2816 high_zoneidx, nodemask,
2817 preferred_zone, classzone_idx,
2818 migratetype,&did_some_progress);
2819 if (page)
2820 goto got_pg;
2821 if (!did_some_progress)
2822 goto nopage;
2823 }
2836 /* Wait for some write requests to complete then retry */ 2824 /* Wait for some write requests to complete then retry */
2837 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); 2825 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
2838 goto rebalance; 2826 goto retry;
2839 } else { 2827 } else {
2840 /* 2828 /*
2841 * High-order allocations do not necessarily loop after 2829 * High-order allocations do not necessarily loop after