aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2007-05-06 17:49:32 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-07 15:12:53 -0400
commitc596d9f320aaf30d28c1d793ff3a976dee1db8f5 (patch)
treea07a482831077ef0220467fadf77218272638ddb
parenta3a02be79114b854acc555e8ed686eb84f44ae2e (diff)
cpusets: allow TIF_MEMDIE threads to allocate anywhere
OOM killed tasks have access to memory reserves as specified by the TIF_MEMDIE flag in the hopes that it will quickly exit. If such a task has memory allocations constrained by cpusets, we may encounter a deadlock if a blocking task cannot exit because it cannot allocate the necessary memory. We allow tasks that have the TIF_MEMDIE flag to allocate memory anywhere, including outside its cpuset restriction, so that it can quickly die regardless of whether it is __GFP_HARDWALL. Cc: Andi Kleen <ak@suse.de> Cc: Paul Jackson <pj@sgi.com> Cc: Christoph Lameter <clameter@engr.sgi.com> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--kernel/cpuset.c22
1 files changed, 20 insertions, 2 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f382b0f775e1..d240349cbf0f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2351,6 +2351,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2351 * z's node is in our tasks mems_allowed, yes. If it's not a 2351 * z's node is in our tasks mems_allowed, yes. If it's not a
2352 * __GFP_HARDWALL request and this zone's nodes is in the nearest 2352 * __GFP_HARDWALL request and this zone's nodes is in the nearest
2353 * mem_exclusive cpuset ancestor to this tasks cpuset, yes. 2353 * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
2354 * If the task has been OOM killed and has access to memory reserves
2355 * as specified by the TIF_MEMDIE flag, yes.
2354 * Otherwise, no. 2356 * Otherwise, no.
2355 * 2357 *
2356 * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() 2358 * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
@@ -2368,7 +2370,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2368 * calls get to this routine, we should just shut up and say 'yes'. 2370 * calls get to this routine, we should just shut up and say 'yes'.
2369 * 2371 *
2370 * GFP_USER allocations are marked with the __GFP_HARDWALL bit, 2372 * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
2371 * and do not allow allocations outside the current tasks cpuset. 2373 * and do not allow allocations outside the current tasks cpuset
2374 * unless the task has been OOM killed as is marked TIF_MEMDIE.
2372 * GFP_KERNEL allocations are not so marked, so can escape to the 2375 * GFP_KERNEL allocations are not so marked, so can escape to the
2373 * nearest enclosing mem_exclusive ancestor cpuset. 2376 * nearest enclosing mem_exclusive ancestor cpuset.
2374 * 2377 *
@@ -2392,6 +2395,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2392 * affect that: 2395 * affect that:
2393 * in_interrupt - any node ok (current task context irrelevant) 2396 * in_interrupt - any node ok (current task context irrelevant)
2394 * GFP_ATOMIC - any node ok 2397 * GFP_ATOMIC - any node ok
2398 * TIF_MEMDIE - any node ok
2395 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok 2399 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
2396 * GFP_USER - only nodes in current tasks mems allowed ok. 2400 * GFP_USER - only nodes in current tasks mems allowed ok.
2397 * 2401 *
@@ -2413,6 +2417,12 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2413 might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); 2417 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2414 if (node_isset(node, current->mems_allowed)) 2418 if (node_isset(node, current->mems_allowed))
2415 return 1; 2419 return 1;
2420 /*
2421 * Allow tasks that have access to memory reserves because they have
2422 * been OOM killed to get memory anywhere.
2423 */
2424 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2425 return 1;
2416 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ 2426 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
2417 return 0; 2427 return 0;
2418 2428
@@ -2438,7 +2448,9 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2438 * 2448 *
2439 * If we're in interrupt, yes, we can always allocate. 2449 * If we're in interrupt, yes, we can always allocate.
2440 * If __GFP_THISNODE is set, yes, we can always allocate. If zone 2450 * If __GFP_THISNODE is set, yes, we can always allocate. If zone
2441 * z's node is in our tasks mems_allowed, yes. Otherwise, no. 2451 * z's node is in our tasks mems_allowed, yes. If the task has been
2452 * OOM killed and has access to memory reserves as specified by the
2453 * TIF_MEMDIE flag, yes. Otherwise, no.
2442 * 2454 *
2443 * The __GFP_THISNODE placement logic is really handled elsewhere, 2455 * The __GFP_THISNODE placement logic is really handled elsewhere,
2444 * by forcibly using a zonelist starting at a specified node, and by 2456 * by forcibly using a zonelist starting at a specified node, and by
@@ -2462,6 +2474,12 @@ int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
2462 node = zone_to_nid(z); 2474 node = zone_to_nid(z);
2463 if (node_isset(node, current->mems_allowed)) 2475 if (node_isset(node, current->mems_allowed))
2464 return 1; 2476 return 1;
2477 /*
2478 * Allow tasks that have access to memory reserves because they have
2479 * been OOM killed to get memory anywhere.
2480 */
2481 if (unlikely(test_thread_flag(TIF_MEMDIE)))
2482 return 1;
2465 return 0; 2483 return 0;
2466} 2484}
2467 2485