mm: vmscan: when reclaiming for compaction, ensure there are sufficient free pages available

commit fe4b1b244bdb96136855f2c694071cb09d140766 upstream. Stable note: Not tracked on Bugzilla. THP and compaction was found to aggressively reclaim pages and stall systems under different situations that was addressed piecemeal over time. This patch addresses a problem where the fix regressed THP allocation success rates. In commit e0887c19 ("vmscan: limit direct reclaim for higher order allocations"), Rik noted that reclaim was too aggressive when THP was enabled. In his initial patch he used the number of free pages to decide if reclaim should abort for compaction. My feedback was that reclaim and compaction should be using the same logic when deciding if reclaim should be aborted. Unfortunately, this had the effect of reducing THP success rates when the workload included something like streaming reads that continually allocated pages. The window during which compaction could run and return a THP was too small. This patch combines Rik's two patches together. compaction_suitable() is still used to decide if reclaim should be aborted to allow compaction is used. However, it will also ensure that there is a reasonable buffer of free pages available. This improves upon the THP allocation success rates but bounds the number of pages that are freed for compaction. Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel<riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Dave Jones <davej@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Andy Isaacson <adi@hexapodia.org> Cc: Nai Xia <nai.xia@gmail.com> Cc: Johannes Weiner <jweiner@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Mel Gorman <mgorman@suse.de> 2012-01-12 20:19:45 -0500
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2012-08-01 15:27:18 -0400
commit: d50462a3a29fc5f53ef4a5d74eb693b4d4cb1512 (patch)
tree: ad17a74f1b57d55b9572ef70b77e71bc9ba69dcf
parent: f869774c37710ef2b773d167d184b9936988d07f (diff)
1 files changed, 39 insertions, 5 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 870cbcfc328..eadab094314 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2075,6 +2075,42 @@ restart:
        throttle_vm_writeout(sc->gfp_mask);
 }
+/* Returns true if compaction should go ahead for a high-order request */
+static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+{
+        unsigned long balance_gap, watermark;
+        bool watermark_ok;
+        /* Do not consider compaction for orders reclaim is meant to satisfy */
+        if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
+                return false;
+        /*
+         * Compaction takes time to run and there are potentially other
+         * callers using the pages just freed. Continue reclaiming until
+         * there is a buffer of free pages available to give compaction
+         * a reasonable chance of completing and allocating the page
+         */
+        balance_gap = min(low_wmark_pages(zone),
+                (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
+                        KSWAPD_ZONE_BALANCE_GAP_RATIO);
+        watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+        watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
+        /*
+         * If compaction is deferred, reclaim up to a point where
+         * compaction will have a chance of success when re-enabled
+         */
+        if (compaction_deferred(zone))
+                return watermark_ok;
+        /* If compaction is not ready to start, keep reclaiming */
+        if (!compaction_suitable(zone, sc->order))
+                return false;
+        return watermark_ok;
+}
 /*
 * This is the direct reclaim path, for page-allocating processes.  We only
 * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -2092,8 +2128,8 @@ restart:
 * scan then give up on it.
 *
 * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is either ready to begin or deferred.
+ * high-order allocation and compaction is ready to begin. This indicates to
- * This indicates to the caller that it should retry the allocation or fail.
+ * the caller that it should retry the allocation or fail.
 */
 static bool shrink_zones(int priority, struct zonelist *zonelist,
                                        struct scan_control *sc)
@@ -2127,9 +2163,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
                                 * noticable problem, like transparent huge page
                                 * allocations.
                                 */
-                                if (sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+                                if (compaction_ready(zone, sc)) {
-                                        (compaction_suitable(zone, sc->order) ||
-                                         compaction_deferred(zone))) {
                                        should_abort_reclaim = true;
                                        continue;
                                }
author	Mel Gorman <mgorman@suse.de>	2012-01-12 20:19:45 -0500
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2012-08-01 15:27:18 -0400
commit	d50462a3a29fc5f53ef4a5d74eb693b4d4cb1512 (patch)
tree	ad17a74f1b57d55b9572ef70b77e71bc9ba69dcf
parent	f869774c37710ef2b773d167d184b9936988d07f (diff)

diff --git a/mm/vmscan.c b/mm/vmscan.c index 870cbcfc328..eadab094314 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -2075,6 +2075,42 @@ restart:
2075	throttle_vm_writeout(sc->gfp_mask);	2075	throttle_vm_writeout(sc->gfp_mask);
2076	}	2076	}
2077		2077
		2078	/* Returns true if compaction should go ahead for a high-order request */
		2079	static inline bool compaction_ready(struct zone zone, struct scan_control sc)
		2080	{
		2081	unsigned long balance_gap, watermark;
		2082	bool watermark_ok;
		2083
		2084	/* Do not consider compaction for orders reclaim is meant to satisfy */
		2085	if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
		2086	return false;
		2087
		2088	/*
		2089	* Compaction takes time to run and there are potentially other
		2090	* callers using the pages just freed. Continue reclaiming until
		2091	* there is a buffer of free pages available to give compaction
		2092	* a reasonable chance of completing and allocating the page
		2093	*/
		2094	balance_gap = min(low_wmark_pages(zone),
		2095	(zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
		2096	KSWAPD_ZONE_BALANCE_GAP_RATIO);
		2097	watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
		2098	watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
		2099
		2100	/*
		2101	* If compaction is deferred, reclaim up to a point where
		2102	* compaction will have a chance of success when re-enabled
		2103	*/
		2104	if (compaction_deferred(zone))
		2105	return watermark_ok;
		2106
		2107	/* If compaction is not ready to start, keep reclaiming */
		2108	if (!compaction_suitable(zone, sc->order))
		2109	return false;
		2110
		2111	return watermark_ok;
		2112	}
		2113
2078	/*	2114	/*
2079	* This is the direct reclaim path, for page-allocating processes. We only	2115	* This is the direct reclaim path, for page-allocating processes. We only
2080	* try to reclaim pages from zones which will satisfy the caller's allocation	2116	* try to reclaim pages from zones which will satisfy the caller's allocation
@@ -2092,8 +2128,8 @@ restart:
2092	* scan then give up on it.	2128	* scan then give up on it.
2093	*	2129	*
2094	* This function returns true if a zone is being reclaimed for a costly	2130	* This function returns true if a zone is being reclaimed for a costly
2095	* high-order allocation and compaction is either ready to begin or deferred.	2131	* high-order allocation and compaction is ready to begin. This indicates to
2096	* This indicates to the caller that it should retry the allocation or fail.	2132	* the caller that it should retry the allocation or fail.
2097	*/	2133	*/
2098	static bool shrink_zones(int priority, struct zonelist *zonelist,	2134	static bool shrink_zones(int priority, struct zonelist *zonelist,
2099	struct scan_control *sc)	2135	struct scan_control *sc)
@@ -2127,9 +2163,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
2127	* noticable problem, like transparent huge page	2163	* noticable problem, like transparent huge page
2128	* allocations.	2164	* allocations.
2129	*/	2165	*/
2130	if (sc->order > PAGE_ALLOC_COSTLY_ORDER &&	2166	if (compaction_ready(zone, sc)) {
2131	(compaction_suitable(zone, sc->order) \|\|
2132	compaction_deferred(zone))) {
2133	should_abort_reclaim = true;	2167	should_abort_reclaim = true;
2134	continue;	2168	continue;
2135	}	2169	}