aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-26 19:29:48 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-11-26 20:41:24 -0500
commit50694c28f1e1dbea18272980d265742a5027fb63 (patch)
treea04762b6db3954f6550ed6b86deea04148e9829c /mm/vmscan.c
parent82b212f40059bffd6808c07266a942d444d5558a (diff)
mm: vmscan: check for fatal signals iff the process was throttled
Commit 5515061d22f0 ("mm: throttle direct reclaimers if PF_MEMALLOC reserves are low and swap is backed by network storage") introduced a check for fatal signals after a process gets throttled for network storage. The intention was that if a process was throttled and got killed that it should not trigger the OOM killer. As pointed out by Minchan Kim and David Rientjes, this check is in the wrong place and too broad. If a system is in am OOM situation and a process is exiting, it can loop in __alloc_pages_slowpath() and calling direct reclaim in a loop. As the fatal signal is pending it returns 1 as if it is making forward progress and can effectively deadlock. This patch moves the fatal_signal_pending() check after throttling to throttle_direct_reclaim() where it belongs. If the process is killed while throttled, it will return immediately without direct reclaim except now it will have TIF_MEMDIE set and will use the PFMEMALLOC reserves. Minchan pointed out that it may be better to direct reclaim before returning to avoid using the reserves because there may be pages that can easily reclaim that would avoid using the reserves. However, we do no such targetted reclaim and there is no guarantee that suitable pages are available. As it is expected that this throttling happens when swap-over-NFS is used there is a possibility that the process will instead swap which may allocate network buffers from the PFMEMALLOC reserves. Hence, in the swap-over-nfs case where a process can be throtted and be killed it can use the reserves to exit or it can potentially use reserves to swap a few pages and then exit. This patch takes the option of using the reserves if necessary to allow the process exit quickly. If this patch passes review it should be considered a -stable candidate for 3.6. Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: David Rientjes <rientjes@google.com> Cc: Luigi Semenzato <semenzato@google.com> Cc: Dan Magenheimer <dan.magenheimer@oracle.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Sonny Rao <sonnyrao@google.com> Cc: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c37
1 files changed, 27 insertions, 10 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 48550c66f1f2..cbf84e152f04 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2207,9 +2207,12 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
2207 * Throttle direct reclaimers if backing storage is backed by the network 2207 * Throttle direct reclaimers if backing storage is backed by the network
2208 * and the PFMEMALLOC reserve for the preferred node is getting dangerously 2208 * and the PFMEMALLOC reserve for the preferred node is getting dangerously
2209 * depleted. kswapd will continue to make progress and wake the processes 2209 * depleted. kswapd will continue to make progress and wake the processes
2210 * when the low watermark is reached 2210 * when the low watermark is reached.
2211 *
2212 * Returns true if a fatal signal was delivered during throttling. If this
2213 * happens, the page allocator should not consider triggering the OOM killer.
2211 */ 2214 */
2212static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, 2215static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
2213 nodemask_t *nodemask) 2216 nodemask_t *nodemask)
2214{ 2217{
2215 struct zone *zone; 2218 struct zone *zone;
@@ -2224,13 +2227,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
2224 * processes to block on log_wait_commit(). 2227 * processes to block on log_wait_commit().
2225 */ 2228 */
2226 if (current->flags & PF_KTHREAD) 2229 if (current->flags & PF_KTHREAD)
2227 return; 2230 goto out;
2231
2232 /*
2233 * If a fatal signal is pending, this process should not throttle.
2234 * It should return quickly so it can exit and free its memory
2235 */
2236 if (fatal_signal_pending(current))
2237 goto out;
2228 2238
2229 /* Check if the pfmemalloc reserves are ok */ 2239 /* Check if the pfmemalloc reserves are ok */
2230 first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone); 2240 first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone);
2231 pgdat = zone->zone_pgdat; 2241 pgdat = zone->zone_pgdat;
2232 if (pfmemalloc_watermark_ok(pgdat)) 2242 if (pfmemalloc_watermark_ok(pgdat))
2233 return; 2243 goto out;
2234 2244
2235 /* Account for the throttling */ 2245 /* Account for the throttling */
2236 count_vm_event(PGSCAN_DIRECT_THROTTLE); 2246 count_vm_event(PGSCAN_DIRECT_THROTTLE);
@@ -2246,12 +2256,20 @@ static void throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
2246 if (!(gfp_mask & __GFP_FS)) { 2256 if (!(gfp_mask & __GFP_FS)) {
2247 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, 2257 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
2248 pfmemalloc_watermark_ok(pgdat), HZ); 2258 pfmemalloc_watermark_ok(pgdat), HZ);
2249 return; 2259
2260 goto check_pending;
2250 } 2261 }
2251 2262
2252 /* Throttle until kswapd wakes the process */ 2263 /* Throttle until kswapd wakes the process */
2253 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, 2264 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
2254 pfmemalloc_watermark_ok(pgdat)); 2265 pfmemalloc_watermark_ok(pgdat));
2266
2267check_pending:
2268 if (fatal_signal_pending(current))
2269 return true;
2270
2271out:
2272 return false;
2255} 2273}
2256 2274
2257unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 2275unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
@@ -2273,13 +2291,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2273 .gfp_mask = sc.gfp_mask, 2291 .gfp_mask = sc.gfp_mask,
2274 }; 2292 };
2275 2293
2276 throttle_direct_reclaim(gfp_mask, zonelist, nodemask);
2277
2278 /* 2294 /*
2279 * Do not enter reclaim if fatal signal is pending. 1 is returned so 2295 * Do not enter reclaim if fatal signal was delivered while throttled.
2280 * that the page allocator does not consider triggering OOM 2296 * 1 is returned so that the page allocator does not OOM kill at this
2297 * point.
2281 */ 2298 */
2282 if (fatal_signal_pending(current)) 2299 if (throttle_direct_reclaim(gfp_mask, zonelist, nodemask))
2283 return 1; 2300 return 1;
2284 2301
2285 trace_mm_vmscan_direct_reclaim_begin(order, 2302 trace_mm_vmscan_direct_reclaim_begin(order,