diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 82 |
1 files changed, 66 insertions, 16 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2c3b4431472b..232aed2b10f9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2342 | } | 2342 | } |
2343 | 2343 | ||
2344 | /** | 2344 | /** |
2345 | * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? | 2345 | * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node? |
2346 | * @z: is this zone on an allowed node? | 2346 | * @z: is this zone on an allowed node? |
2347 | * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) | 2347 | * @gfp_mask: memory allocation flags |
2348 | * | 2348 | * |
2349 | * If we're in interrupt, yes, we can always allocate. If zone | 2349 | * If we're in interrupt, yes, we can always allocate. If |
2350 | * __GFP_THISNODE is set, yes, we can always allocate. If zone | ||
2350 | * z's node is in our tasks mems_allowed, yes. If it's not a | 2351 | * z's node is in our tasks mems_allowed, yes. If it's not a |
2351 | * __GFP_HARDWALL request and this zone's nodes is in the nearest | 2352 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
2352 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. | 2353 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. |
2353 | * Otherwise, no. | 2354 | * Otherwise, no. |
2354 | * | 2355 | * |
2356 | * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() | ||
2357 | * reduces to cpuset_zone_allowed_hardwall(). Otherwise, | ||
2358 | * cpuset_zone_allowed_softwall() might sleep, and might allow a zone | ||
2359 | * from an enclosing cpuset. | ||
2360 | * | ||
2361 | * cpuset_zone_allowed_hardwall() only handles the simpler case of | ||
2362 | * hardwall cpusets, and never sleeps. | ||
2363 | * | ||
2364 | * The __GFP_THISNODE placement logic is really handled elsewhere, | ||
2365 | * by forcibly using a zonelist starting at a specified node, and by | ||
2366 | * (in get_page_from_freelist()) refusing to consider the zones for | ||
2367 | * any node on the zonelist except the first. By the time any such | ||
2368 | * calls get to this routine, we should just shut up and say 'yes'. | ||
2369 | * | ||
2355 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | 2370 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, |
2356 | * and do not allow allocations outside the current tasks cpuset. | 2371 | * and do not allow allocations outside the current tasks cpuset. |
2357 | * GFP_KERNEL allocations are not so marked, so can escape to the | 2372 | * GFP_KERNEL allocations are not so marked, so can escape to the |
2358 | * nearest mem_exclusive ancestor cpuset. | 2373 | * nearest enclosing mem_exclusive ancestor cpuset. |
2359 | * | 2374 | * |
2360 | * Scanning up parent cpusets requires callback_mutex. The __alloc_pages() | 2375 | * Scanning up parent cpusets requires callback_mutex. The |
2361 | * routine only calls here with __GFP_HARDWALL bit _not_ set if | 2376 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit |
2362 | * it's a GFP_KERNEL allocation, and all nodes in the current tasks | 2377 | * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the |
2363 | * mems_allowed came up empty on the first pass over the zonelist. | 2378 | * current tasks mems_allowed came up empty on the first pass over |
2364 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are | 2379 | * the zonelist. So only GFP_KERNEL allocations, if all nodes in the |
2365 | * short of memory, might require taking the callback_mutex mutex. | 2380 | * cpuset are short of memory, might require taking the callback_mutex |
2381 | * mutex. | ||
2366 | * | 2382 | * |
2367 | * The first call here from mm/page_alloc:get_page_from_freelist() | 2383 | * The first call here from mm/page_alloc:get_page_from_freelist() |
2368 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so | 2384 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, |
2369 | * no allocation on a node outside the cpuset is allowed (unless in | 2385 | * so no allocation on a node outside the cpuset is allowed (unless |
2370 | * interrupt, of course). | 2386 | * in interrupt, of course). |
2371 | * | 2387 | * |
2372 | * The second pass through get_page_from_freelist() doesn't even call | 2388 | * The second pass through get_page_from_freelist() doesn't even call |
2373 | * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() | 2389 | * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() |
@@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2380 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2396 | * GFP_USER - only nodes in current tasks mems allowed ok. |
2381 | * | 2397 | * |
2382 | * Rule: | 2398 | * Rule: |
2383 | * Don't call cpuset_zone_allowed() if you can't sleep, unless you | 2399 | * Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you |
2384 | * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables | 2400 | * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables |
2385 | * the code that might scan up ancestor cpusets and sleep. | 2401 | * the code that might scan up ancestor cpusets and sleep. |
2386 | **/ | 2402 | */ |
2387 | 2403 | ||
2388 | int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | 2404 | int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) |
2389 | { | 2405 | { |
2390 | int node; /* node that zone z is on */ | 2406 | int node; /* node that zone z is on */ |
2391 | const struct cpuset *cs; /* current cpuset ancestors */ | 2407 | const struct cpuset *cs; /* current cpuset ancestors */ |
@@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | |||
2415 | return allowed; | 2431 | return allowed; |
2416 | } | 2432 | } |
2417 | 2433 | ||
2434 | /* | ||
2435 | * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node? | ||
2436 | * @z: is this zone on an allowed node? | ||
2437 | * @gfp_mask: memory allocation flags | ||
2438 | * | ||
2439 | * If we're in interrupt, yes, we can always allocate. | ||
2440 | * If __GFP_THISNODE is set, yes, we can always allocate. If zone | ||
2441 | * z's node is in our tasks mems_allowed, yes. Otherwise, no. | ||
2442 | * | ||
2443 | * The __GFP_THISNODE placement logic is really handled elsewhere, | ||
2444 | * by forcibly using a zonelist starting at a specified node, and by | ||
2445 | * (in get_page_from_freelist()) refusing to consider the zones for | ||
2446 | * any node on the zonelist except the first. By the time any such | ||
2447 | * calls get to this routine, we should just shut up and say 'yes'. | ||
2448 | * | ||
2449 | * Unlike the cpuset_zone_allowed_softwall() variant, above, | ||
2450 | * this variant requires that the zone be in the current tasks | ||
2451 | * mems_allowed or that we're in interrupt. It does not scan up the | ||
2452 | * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset. | ||
2453 | * It never sleeps. | ||
2454 | */ | ||
2455 | |||
2456 | int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) | ||
2457 | { | ||
2458 | int node; /* node that zone z is on */ | ||
2459 | |||
2460 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) | ||
2461 | return 1; | ||
2462 | node = zone_to_nid(z); | ||
2463 | if (node_isset(node, current->mems_allowed)) | ||
2464 | return 1; | ||
2465 | return 0; | ||
2466 | } | ||
2467 | |||
2418 | /** | 2468 | /** |
2419 | * cpuset_lock - lock out any changes to cpuset structures | 2469 | * cpuset_lock - lock out any changes to cpuset structures |
2420 | * | 2470 | * |