mm: page_alloc: embed OOM killing naturally into allocation slowpath

The OOM killing invocation does a lot of duplicative checks against the task's allocation context. Rework it to take advantage of the existing checks in the allocator slowpath. The OOM killer is invoked when the allocator is unable to reclaim any pages but the allocation has to keep looping. Instead of having a check for __GFP_NORETRY hidden in oom_gfp_allowed(), just move the OOM invocation to the true branch of should_alloc_retry(). The __GFP_FS check from oom_gfp_allowed() can then be moved into the OOM avoidance branch in __alloc_pages_may_oom(), along with the PF_DUMPCORE test. __alloc_pages_may_oom() can then signal to the caller whether the OOM killer was invoked, instead of requiring it to duplicate the order and high_zoneidx checks to guess this when deciding whether to continue. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Johannes Weiner <hannes@cmpxchg.org> 2015-01-26 15:58:32 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-01-26 16:37:18 -0500
commit: 9879de7373fcfb466ec198293b6ccc1ad7a42dd8 (patch)
tree: 047533253898e317ce7e41a1078433e1e19d625f /mm/page_alloc.c
parent: 26bc420b59a38e4e6685a73345a0def461136dce (diff)
1 files changed, 35 insertions, 47 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7633c503a116..8e20f9c2fa5a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2332,12 +2332,21 @@ static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, struct zone *preferred_zone,
-        int classzone_idx, int migratetype)
+        int classzone_idx, int migratetype, unsigned long *did_some_progress)
 {
        struct page *page;
-        /* Acquire the per-zone oom lock for each zone */
+        *did_some_progress = 0;
+        if (oom_killer_disabled)
+                return NULL;
+        /*
+         * Acquire the per-zone oom lock for each zone.  If that
+         * fails, somebody else is making progress for us.
+         */
        if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
+                *did_some_progress = 1;
                schedule_timeout_uninterruptible(1);
                return NULL;
        }
@@ -2363,12 +2372,18 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                goto out;
        if (!(gfp_mask & __GFP_NOFAIL)) {
+                /* Coredumps can quickly deplete all memory reserves */
+                if (current->flags & PF_DUMPCORE)
+                        goto out;
                /* The OOM killer will not help higher order allocs */
                if (order > PAGE_ALLOC_COSTLY_ORDER)
                        goto out;
                /* The OOM killer does not needlessly kill tasks for lowmem */
                if (high_zoneidx < ZONE_NORMAL)
                        goto out;
+                /* The OOM killer does not compensate for light reclaim */
+                if (!(gfp_mask & __GFP_FS))
+                        goto out;
                /*
                 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
                 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
@@ -2381,7 +2396,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
        }
        /* Exhausted what can be done so it's blamo time */
        out_of_memory(zonelist, gfp_mask, order, nodemask, false);
+        *did_some_progress = 1;
 out:
        oom_zonelist_unlock(zonelist, gfp_mask);
        return page;
@@ -2658,7 +2673,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
            (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
                goto nopage;
-restart:
+retry:
        if (!(gfp_mask & __GFP_NO_KSWAPD))
                wake_all_kswapds(order, zonelist, high_zoneidx,
                                preferred_zone, nodemask);
@@ -2681,7 +2696,6 @@ restart:
                classzone_idx = zonelist_zone_idx(preferred_zoneref);
        }
-rebalance:
        /* This is the last chance, in general, before the goto nopage. */
        page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
                        high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2788,54 +2802,28 @@ rebalance:
        if (page)
                goto got_pg;
-        /*
-         * If we failed to make any progress reclaiming, then we are
-         * running out of options and have to consider going OOM
-         */
-        if (!did_some_progress) {
-                if (oom_gfp_allowed(gfp_mask)) {
-                        if (oom_killer_disabled)
-                                goto nopage;
-                        /* Coredumps can quickly deplete all memory reserves */
-                        if ((current->flags & PF_DUMPCORE) &&
-                            !(gfp_mask & __GFP_NOFAIL))
-                                goto nopage;
-                        page = __alloc_pages_may_oom(gfp_mask, order,
-                                        zonelist, high_zoneidx,
-                                        nodemask, preferred_zone,
-                                        classzone_idx, migratetype);
-                        if (page)
-                                goto got_pg;
-                        if (!(gfp_mask & __GFP_NOFAIL)) {
-                                /*
-                                 * The oom killer is not called for high-order
-                                 * allocations that may fail, so if no progress
-                                 * is being made, there are no other options and
-                                 * retrying is unlikely to help.
-                                 */
-                                if (order > PAGE_ALLOC_COSTLY_ORDER)
-                                        goto nopage;
-                                /*
-                                 * The oom killer is not called for lowmem
-                                 * allocations to prevent needlessly killing
-                                 * innocent tasks.
-                                 */
-                                if (high_zoneidx < ZONE_NORMAL)
-                                        goto nopage;
-                        }
-                        goto restart;
-                }
-        }
        /* Check if we should retry the allocation */
        pages_reclaimed += did_some_progress;
        if (should_alloc_retry(gfp_mask, order, did_some_progress,
                                                pages_reclaimed)) {
+                /*
+                 * If we fail to make progress by freeing individual
+                 * pages, but the allocation wants us to keep going,
+                 * start OOM killing tasks.
+                 */
+                if (!did_some_progress) {
+                        page = __alloc_pages_may_oom(gfp_mask, order, zonelist,
+                                                high_zoneidx, nodemask,
+                                                preferred_zone, classzone_idx,
+                                                migratetype,&did_some_progress);
+                        if (page)
+                                goto got_pg;
+                        if (!did_some_progress)
+                                goto nopage;
+                }
                /* Wait for some write requests to complete then retry */
                wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
-                goto rebalance;
+                goto retry;
        } else {
                /*
                 * High-order allocations do not necessarily loop after
author	Johannes Weiner <hannes@cmpxchg.org>	2015-01-26 15:58:32 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-01-26 16:37:18 -0500
commit	9879de7373fcfb466ec198293b6ccc1ad7a42dd8 (patch)
tree	047533253898e317ce7e41a1078433e1e19d625f /mm/page_alloc.c
parent	26bc420b59a38e4e6685a73345a0def461136dce (diff)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7633c503a116..8e20f9c2fa5a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -2332,12 +2332,21 @@ static inline struct page *
2332	__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,	2332	__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2333	struct zonelist *zonelist, enum zone_type high_zoneidx,	2333	struct zonelist *zonelist, enum zone_type high_zoneidx,
2334	nodemask_t nodemask, struct zone preferred_zone,	2334	nodemask_t nodemask, struct zone preferred_zone,
2335	int classzone_idx, int migratetype)	2335	int classzone_idx, int migratetype, unsigned long *did_some_progress)
2336	{	2336	{
2337	struct page *page;	2337	struct page *page;
2338		2338
2339	/* Acquire the per-zone oom lock for each zone */	2339	*did_some_progress = 0;
		2340
		2341	if (oom_killer_disabled)
		2342	return NULL;
		2343
		2344	/*
		2345	* Acquire the per-zone oom lock for each zone. If that
		2346	* fails, somebody else is making progress for us.
		2347	*/
2340	if (!oom_zonelist_trylock(zonelist, gfp_mask)) {	2348	if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
		2349	*did_some_progress = 1;
2341	schedule_timeout_uninterruptible(1);	2350	schedule_timeout_uninterruptible(1);
2342	return NULL;	2351	return NULL;
2343	}	2352	}
@@ -2363,12 +2372,18 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2363	goto out;	2372	goto out;
2364		2373
2365	if (!(gfp_mask & __GFP_NOFAIL)) {	2374	if (!(gfp_mask & __GFP_NOFAIL)) {
		2375	/* Coredumps can quickly deplete all memory reserves */
		2376	if (current->flags & PF_DUMPCORE)
		2377	goto out;
2366	/* The OOM killer will not help higher order allocs */	2378	/* The OOM killer will not help higher order allocs */
2367	if (order > PAGE_ALLOC_COSTLY_ORDER)	2379	if (order > PAGE_ALLOC_COSTLY_ORDER)
2368	goto out;	2380	goto out;
2369	/* The OOM killer does not needlessly kill tasks for lowmem */	2381	/* The OOM killer does not needlessly kill tasks for lowmem */
2370	if (high_zoneidx < ZONE_NORMAL)	2382	if (high_zoneidx < ZONE_NORMAL)
2371	goto out;	2383	goto out;
		2384	/* The OOM killer does not compensate for light reclaim */
		2385	if (!(gfp_mask & __GFP_FS))
		2386	goto out;
2372	/*	2387	/*
2373	* GFP_THISNODE contains __GFP_NORETRY and we never hit this.	2388	* GFP_THISNODE contains __GFP_NORETRY and we never hit this.
2374	* Sanity check for bare calls of __GFP_THISNODE, not real OOM.	2389	* Sanity check for bare calls of __GFP_THISNODE, not real OOM.
@@ -2381,7 +2396,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2381	}	2396	}
2382	/* Exhausted what can be done so it's blamo time */	2397	/* Exhausted what can be done so it's blamo time */
2383	out_of_memory(zonelist, gfp_mask, order, nodemask, false);	2398	out_of_memory(zonelist, gfp_mask, order, nodemask, false);
2384		2399	*did_some_progress = 1;
2385	out:	2400	out:
2386	oom_zonelist_unlock(zonelist, gfp_mask);	2401	oom_zonelist_unlock(zonelist, gfp_mask);
2387	return page;	2402	return page;
@@ -2658,7 +2673,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2658	(gfp_mask & GFP_THISNODE) == GFP_THISNODE)	2673	(gfp_mask & GFP_THISNODE) == GFP_THISNODE)
2659	goto nopage;	2674	goto nopage;
2660		2675
2661	restart:	2676	retry:
2662	if (!(gfp_mask & __GFP_NO_KSWAPD))	2677	if (!(gfp_mask & __GFP_NO_KSWAPD))
2663	wake_all_kswapds(order, zonelist, high_zoneidx,	2678	wake_all_kswapds(order, zonelist, high_zoneidx,
2664	preferred_zone, nodemask);	2679	preferred_zone, nodemask);
@@ -2681,7 +2696,6 @@ restart:
2681	classzone_idx = zonelist_zone_idx(preferred_zoneref);	2696	classzone_idx = zonelist_zone_idx(preferred_zoneref);
2682	}	2697	}
2683		2698
2684	rebalance:
2685	/* This is the last chance, in general, before the goto nopage. */	2699	/* This is the last chance, in general, before the goto nopage. */
2686	page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,	2700	page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
2687	high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,	2701	high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
@@ -2788,54 +2802,28 @@ rebalance:
2788	if (page)	2802	if (page)
2789	goto got_pg;	2803	goto got_pg;
2790		2804
2791	/*
2792	* If we failed to make any progress reclaiming, then we are
2793	* running out of options and have to consider going OOM
2794	*/
2795	if (!did_some_progress) {
2796	if (oom_gfp_allowed(gfp_mask)) {
2797	if (oom_killer_disabled)
2798	goto nopage;
2799	/* Coredumps can quickly deplete all memory reserves */
2800	if ((current->flags & PF_DUMPCORE) &&
2801	!(gfp_mask & __GFP_NOFAIL))
2802	goto nopage;
2803	page = __alloc_pages_may_oom(gfp_mask, order,
2804	zonelist, high_zoneidx,
2805	nodemask, preferred_zone,
2806	classzone_idx, migratetype);
2807	if (page)
2808	goto got_pg;
2809
2810	if (!(gfp_mask & __GFP_NOFAIL)) {
2811	/*
2812	* The oom killer is not called for high-order
2813	* allocations that may fail, so if no progress
2814	* is being made, there are no other options and
2815	* retrying is unlikely to help.
2816	*/
2817	if (order > PAGE_ALLOC_COSTLY_ORDER)
2818	goto nopage;
2819	/*
2820	* The oom killer is not called for lowmem
2821	* allocations to prevent needlessly killing
2822	* innocent tasks.
2823	*/
2824	if (high_zoneidx < ZONE_NORMAL)
2825	goto nopage;
2826	}
2827
2828	goto restart;
2829	}
2830	}
2831
2832	/* Check if we should retry the allocation */	2805	/* Check if we should retry the allocation */
2833	pages_reclaimed += did_some_progress;	2806	pages_reclaimed += did_some_progress;
2834	if (should_alloc_retry(gfp_mask, order, did_some_progress,	2807	if (should_alloc_retry(gfp_mask, order, did_some_progress,
2835	pages_reclaimed)) {	2808	pages_reclaimed)) {
		2809	/*
		2810	* If we fail to make progress by freeing individual
		2811	* pages, but the allocation wants us to keep going,
		2812	* start OOM killing tasks.
		2813	*/
		2814	if (!did_some_progress) {
		2815	page = __alloc_pages_may_oom(gfp_mask, order, zonelist,
		2816	high_zoneidx, nodemask,
		2817	preferred_zone, classzone_idx,
		2818	migratetype,&did_some_progress);
		2819	if (page)
		2820	goto got_pg;
		2821	if (!did_some_progress)
		2822	goto nopage;
		2823	}
2836	/* Wait for some write requests to complete then retry */	2824	/* Wait for some write requests to complete then retry */
2837	wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);	2825	wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
2838	goto rebalance;	2826	goto retry;
2839	} else {	2827	} else {
2840	/*	2828	/*
2841	* High-order allocations do not necessarily loop after	2829	* High-order allocations do not necessarily loop after