diff options
author | David Rientjes <rientjes@google.com> | 2007-05-06 17:49:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-07 15:12:53 -0400 |
commit | c596d9f320aaf30d28c1d793ff3a976dee1db8f5 (patch) | |
tree | a07a482831077ef0220467fadf77218272638ddb /kernel/cpuset.c | |
parent | a3a02be79114b854acc555e8ed686eb84f44ae2e (diff) |
cpusets: allow TIF_MEMDIE threads to allocate anywhere
OOM killed tasks have access to memory reserves as specified by the
TIF_MEMDIE flag in the hopes that it will quickly exit. If such a task has
memory allocations constrained by cpusets, we may encounter a deadlock if a
blocking task cannot exit because it cannot allocate the necessary memory.
We allow tasks that have the TIF_MEMDIE flag to allocate memory anywhere,
including outside its cpuset restriction, so that it can quickly die
regardless of whether it is __GFP_HARDWALL.
Cc: Andi Kleen <ak@suse.de>
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f382b0f775e1..d240349cbf0f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -2351,6 +2351,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2351 | * z's node is in our tasks mems_allowed, yes. If it's not a | 2351 | * z's node is in our tasks mems_allowed, yes. If it's not a |
2352 | * __GFP_HARDWALL request and this zone's nodes is in the nearest | 2352 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
2353 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. | 2353 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. |
2354 | * If the task has been OOM killed and has access to memory reserves | ||
2355 | * as specified by the TIF_MEMDIE flag, yes. | ||
2354 | * Otherwise, no. | 2356 | * Otherwise, no. |
2355 | * | 2357 | * |
2356 | * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() | 2358 | * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() |
@@ -2368,7 +2370,8 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2368 | * calls get to this routine, we should just shut up and say 'yes'. | 2370 | * calls get to this routine, we should just shut up and say 'yes'. |
2369 | * | 2371 | * |
2370 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | 2372 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, |
2371 | * and do not allow allocations outside the current tasks cpuset. | 2373 | * and do not allow allocations outside the current tasks cpuset |
2374 | * unless the task has been OOM killed as is marked TIF_MEMDIE. | ||
2372 | * GFP_KERNEL allocations are not so marked, so can escape to the | 2375 | * GFP_KERNEL allocations are not so marked, so can escape to the |
2373 | * nearest enclosing mem_exclusive ancestor cpuset. | 2376 | * nearest enclosing mem_exclusive ancestor cpuset. |
2374 | * | 2377 | * |
@@ -2392,6 +2395,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2392 | * affect that: | 2395 | * affect that: |
2393 | * in_interrupt - any node ok (current task context irrelevant) | 2396 | * in_interrupt - any node ok (current task context irrelevant) |
2394 | * GFP_ATOMIC - any node ok | 2397 | * GFP_ATOMIC - any node ok |
2398 | * TIF_MEMDIE - any node ok | ||
2395 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | 2399 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok |
2396 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2400 | * GFP_USER - only nodes in current tasks mems allowed ok. |
2397 | * | 2401 | * |
@@ -2413,6 +2417,12 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) | |||
2413 | might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); | 2417 | might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); |
2414 | if (node_isset(node, current->mems_allowed)) | 2418 | if (node_isset(node, current->mems_allowed)) |
2415 | return 1; | 2419 | return 1; |
2420 | /* | ||
2421 | * Allow tasks that have access to memory reserves because they have | ||
2422 | * been OOM killed to get memory anywhere. | ||
2423 | */ | ||
2424 | if (unlikely(test_thread_flag(TIF_MEMDIE))) | ||
2425 | return 1; | ||
2416 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ | 2426 | if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ |
2417 | return 0; | 2427 | return 0; |
2418 | 2428 | ||
@@ -2438,7 +2448,9 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) | |||
2438 | * | 2448 | * |
2439 | * If we're in interrupt, yes, we can always allocate. | 2449 | * If we're in interrupt, yes, we can always allocate. |
2440 | * If __GFP_THISNODE is set, yes, we can always allocate. If zone | 2450 | * If __GFP_THISNODE is set, yes, we can always allocate. If zone |
2441 | * z's node is in our tasks mems_allowed, yes. Otherwise, no. | 2451 | * z's node is in our tasks mems_allowed, yes. If the task has been |
2452 | * OOM killed and has access to memory reserves as specified by the | ||
2453 | * TIF_MEMDIE flag, yes. Otherwise, no. | ||
2442 | * | 2454 | * |
2443 | * The __GFP_THISNODE placement logic is really handled elsewhere, | 2455 | * The __GFP_THISNODE placement logic is really handled elsewhere, |
2444 | * by forcibly using a zonelist starting at a specified node, and by | 2456 | * by forcibly using a zonelist starting at a specified node, and by |
@@ -2462,6 +2474,12 @@ int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) | |||
2462 | node = zone_to_nid(z); | 2474 | node = zone_to_nid(z); |
2463 | if (node_isset(node, current->mems_allowed)) | 2475 | if (node_isset(node, current->mems_allowed)) |
2464 | return 1; | 2476 | return 1; |
2477 | /* | ||
2478 | * Allow tasks that have access to memory reserves because they have | ||
2479 | * been OOM killed to get memory anywhere. | ||
2480 | */ | ||
2481 | if (unlikely(test_thread_flag(TIF_MEMDIE))) | ||
2482 | return 1; | ||
2465 | return 0; | 2483 | return 0; |
2466 | } | 2484 | } |
2467 | 2485 | ||