aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c82
1 files changed, 66 insertions, 16 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2c3b4431472b..232aed2b10f9 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2342} 2342}
2343 2343
2344/** 2344/**
2345 * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? 2345 * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node?
2346 * @z: is this zone on an allowed node? 2346 * @z: is this zone on an allowed node?
2347 * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) 2347 * @gfp_mask: memory allocation flags
2348 * 2348 *
2349 * If we're in interrupt, yes, we can always allocate. If zone 2349 * If we're in interrupt, yes, we can always allocate. If
2350 * __GFP_THISNODE is set, yes, we can always allocate. If zone
2350 * z's node is in our tasks mems_allowed, yes. If it's not a 2351 * z's node is in our tasks mems_allowed, yes. If it's not a
2351 * __GFP_HARDWALL request and this zone's nodes is in the nearest 2352 * __GFP_HARDWALL request and this zone's nodes is in the nearest
2352 * mem_exclusive cpuset ancestor to this tasks cpuset, yes. 2353 * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
2353 * Otherwise, no. 2354 * Otherwise, no.
2354 * 2355 *
2356 * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
2357 * reduces to cpuset_zone_allowed_hardwall(). Otherwise,
2358 * cpuset_zone_allowed_softwall() might sleep, and might allow a zone
2359 * from an enclosing cpuset.
2360 *
2361 * cpuset_zone_allowed_hardwall() only handles the simpler case of
2362 * hardwall cpusets, and never sleeps.
2363 *
2364 * The __GFP_THISNODE placement logic is really handled elsewhere,
2365 * by forcibly using a zonelist starting at a specified node, and by
2366 * (in get_page_from_freelist()) refusing to consider the zones for
2367 * any node on the zonelist except the first. By the time any such
2368 * calls get to this routine, we should just shut up and say 'yes'.
2369 *
2355 * GFP_USER allocations are marked with the __GFP_HARDWALL bit, 2370 * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
2356 * and do not allow allocations outside the current tasks cpuset. 2371 * and do not allow allocations outside the current tasks cpuset.
2357 * GFP_KERNEL allocations are not so marked, so can escape to the 2372 * GFP_KERNEL allocations are not so marked, so can escape to the
2358 * nearest mem_exclusive ancestor cpuset. 2373 * nearest enclosing mem_exclusive ancestor cpuset.
2359 * 2374 *
2360 * Scanning up parent cpusets requires callback_mutex. The __alloc_pages() 2375 * Scanning up parent cpusets requires callback_mutex. The
2361 * routine only calls here with __GFP_HARDWALL bit _not_ set if 2376 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
2362 * it's a GFP_KERNEL allocation, and all nodes in the current tasks 2377 * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
2363 * mems_allowed came up empty on the first pass over the zonelist. 2378 * current tasks mems_allowed came up empty on the first pass over
2364 * So only GFP_KERNEL allocations, if all nodes in the cpuset are 2379 * the zonelist. So only GFP_KERNEL allocations, if all nodes in the
2365 * short of memory, might require taking the callback_mutex mutex. 2380 * cpuset are short of memory, might require taking the callback_mutex
2381 * mutex.
2366 * 2382 *
2367 * The first call here from mm/page_alloc:get_page_from_freelist() 2383 * The first call here from mm/page_alloc:get_page_from_freelist()
2368 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so 2384 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
2369 * no allocation on a node outside the cpuset is allowed (unless in 2385 * so no allocation on a node outside the cpuset is allowed (unless
2370 * interrupt, of course). 2386 * in interrupt, of course).
2371 * 2387 *
2372 * The second pass through get_page_from_freelist() doesn't even call 2388 * The second pass through get_page_from_freelist() doesn't even call
2373 * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() 2389 * here for GFP_ATOMIC calls. For those calls, the __alloc_pages()
@@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2380 * GFP_USER - only nodes in current tasks mems allowed ok. 2396 * GFP_USER - only nodes in current tasks mems allowed ok.
2381 * 2397 *
2382 * Rule: 2398 * Rule:
2383 * Don't call cpuset_zone_allowed() if you can't sleep, unless you 2399 * Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you
2384 * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables 2400 * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
2385 * the code that might scan up ancestor cpusets and sleep. 2401 * the code that might scan up ancestor cpusets and sleep.
2386 **/ 2402 */
2387 2403
2388int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) 2404int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2389{ 2405{
2390 int node; /* node that zone z is on */ 2406 int node; /* node that zone z is on */
2391 const struct cpuset *cs; /* current cpuset ancestors */ 2407 const struct cpuset *cs; /* current cpuset ancestors */
@@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2415 return allowed; 2431 return allowed;
2416} 2432}
2417 2433
2434/*
2435 * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node?
2436 * @z: is this zone on an allowed node?
2437 * @gfp_mask: memory allocation flags
2438 *
2439 * If we're in interrupt, yes, we can always allocate.
2440 * If __GFP_THISNODE is set, yes, we can always allocate. If zone
2441 * z's node is in our tasks mems_allowed, yes. Otherwise, no.
2442 *
2443 * The __GFP_THISNODE placement logic is really handled elsewhere,
2444 * by forcibly using a zonelist starting at a specified node, and by
2445 * (in get_page_from_freelist()) refusing to consider the zones for
2446 * any node on the zonelist except the first. By the time any such
2447 * calls get to this routine, we should just shut up and say 'yes'.
2448 *
2449 * Unlike the cpuset_zone_allowed_softwall() variant, above,
2450 * this variant requires that the zone be in the current tasks
2451 * mems_allowed or that we're in interrupt. It does not scan up the
2452 * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
2453 * It never sleeps.
2454 */
2455
2456int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
2457{
2458 int node; /* node that zone z is on */
2459
2460 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2461 return 1;
2462 node = zone_to_nid(z);
2463 if (node_isset(node, current->mems_allowed))
2464 return 1;
2465 return 0;
2466}
2467
2418/** 2468/**
2419 * cpuset_lock - lock out any changes to cpuset structures 2469 * cpuset_lock - lock out any changes to cpuset structures
2420 * 2470 *