aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cpuset.h22
-rw-r--r--kernel/cpuset.c82
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/slab.c2
-rw-r--r--mm/vmscan.c8
7 files changed, 92 insertions, 28 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 8821e1f75b44..826b15e914e2 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -30,10 +30,19 @@ void cpuset_update_task_memory_state(void);
30 nodes_subset((nodes), current->mems_allowed) 30 nodes_subset((nodes), current->mems_allowed)
31int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); 31int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
32 32
33extern int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); 33extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask);
34static int inline cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) 34extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask);
35
36static int inline cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
37{
38 return number_of_cpusets <= 1 ||
39 __cpuset_zone_allowed_softwall(z, gfp_mask);
40}
41
42static int inline cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
35{ 43{
36 return number_of_cpusets <= 1 || __cpuset_zone_allowed(z, gfp_mask); 44 return number_of_cpusets <= 1 ||
45 __cpuset_zone_allowed_hardwall(z, gfp_mask);
37} 46}
38 47
39extern int cpuset_excl_nodes_overlap(const struct task_struct *p); 48extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
@@ -94,7 +103,12 @@ static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
94 return 1; 103 return 1;
95} 104}
96 105
97static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) 106static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
107{
108 return 1;
109}
110
111static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
98{ 112{
99 return 1; 113 return 1;
100} 114}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2c3b4431472b..232aed2b10f9 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2342} 2342}
2343 2343
2344/** 2344/**
2345 * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? 2345 * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node?
2346 * @z: is this zone on an allowed node? 2346 * @z: is this zone on an allowed node?
2347 * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) 2347 * @gfp_mask: memory allocation flags
2348 * 2348 *
2349 * If we're in interrupt, yes, we can always allocate. If zone 2349 * If we're in interrupt, yes, we can always allocate. If
2350 * __GFP_THISNODE is set, yes, we can always allocate. If zone
2350 * z's node is in our tasks mems_allowed, yes. If it's not a 2351 * z's node is in our tasks mems_allowed, yes. If it's not a
2351 * __GFP_HARDWALL request and this zone's nodes is in the nearest 2352 * __GFP_HARDWALL request and this zone's nodes is in the nearest
2352 * mem_exclusive cpuset ancestor to this tasks cpuset, yes. 2353 * mem_exclusive cpuset ancestor to this tasks cpuset, yes.
2353 * Otherwise, no. 2354 * Otherwise, no.
2354 * 2355 *
2356 * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
2357 * reduces to cpuset_zone_allowed_hardwall(). Otherwise,
2358 * cpuset_zone_allowed_softwall() might sleep, and might allow a zone
2359 * from an enclosing cpuset.
2360 *
2361 * cpuset_zone_allowed_hardwall() only handles the simpler case of
2362 * hardwall cpusets, and never sleeps.
2363 *
2364 * The __GFP_THISNODE placement logic is really handled elsewhere,
2365 * by forcibly using a zonelist starting at a specified node, and by
2366 * (in get_page_from_freelist()) refusing to consider the zones for
2367 * any node on the zonelist except the first. By the time any such
2368 * calls get to this routine, we should just shut up and say 'yes'.
2369 *
2355 * GFP_USER allocations are marked with the __GFP_HARDWALL bit, 2370 * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
2356 * and do not allow allocations outside the current tasks cpuset. 2371 * and do not allow allocations outside the current tasks cpuset.
2357 * GFP_KERNEL allocations are not so marked, so can escape to the 2372 * GFP_KERNEL allocations are not so marked, so can escape to the
2358 * nearest mem_exclusive ancestor cpuset. 2373 * nearest enclosing mem_exclusive ancestor cpuset.
2359 * 2374 *
2360 * Scanning up parent cpusets requires callback_mutex. The __alloc_pages() 2375 * Scanning up parent cpusets requires callback_mutex. The
2361 * routine only calls here with __GFP_HARDWALL bit _not_ set if 2376 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
2362 * it's a GFP_KERNEL allocation, and all nodes in the current tasks 2377 * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
2363 * mems_allowed came up empty on the first pass over the zonelist. 2378 * current tasks mems_allowed came up empty on the first pass over
2364 * So only GFP_KERNEL allocations, if all nodes in the cpuset are 2379 * the zonelist. So only GFP_KERNEL allocations, if all nodes in the
2365 * short of memory, might require taking the callback_mutex mutex. 2380 * cpuset are short of memory, might require taking the callback_mutex
2381 * mutex.
2366 * 2382 *
2367 * The first call here from mm/page_alloc:get_page_from_freelist() 2383 * The first call here from mm/page_alloc:get_page_from_freelist()
2368 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so 2384 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
2369 * no allocation on a node outside the cpuset is allowed (unless in 2385 * so no allocation on a node outside the cpuset is allowed (unless
2370 * interrupt, of course). 2386 * in interrupt, of course).
2371 * 2387 *
2372 * The second pass through get_page_from_freelist() doesn't even call 2388 * The second pass through get_page_from_freelist() doesn't even call
2373 * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() 2389 * here for GFP_ATOMIC calls. For those calls, the __alloc_pages()
@@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2380 * GFP_USER - only nodes in current tasks mems allowed ok. 2396 * GFP_USER - only nodes in current tasks mems allowed ok.
2381 * 2397 *
2382 * Rule: 2398 * Rule:
2383 * Don't call cpuset_zone_allowed() if you can't sleep, unless you 2399 * Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you
2384 * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables 2400 * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
2385 * the code that might scan up ancestor cpusets and sleep. 2401 * the code that might scan up ancestor cpusets and sleep.
2386 **/ 2402 */
2387 2403
2388int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) 2404int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2389{ 2405{
2390 int node; /* node that zone z is on */ 2406 int node; /* node that zone z is on */
2391 const struct cpuset *cs; /* current cpuset ancestors */ 2407 const struct cpuset *cs; /* current cpuset ancestors */
@@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2415 return allowed; 2431 return allowed;
2416} 2432}
2417 2433
2434/*
2435 * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node?
2436 * @z: is this zone on an allowed node?
2437 * @gfp_mask: memory allocation flags
2438 *
2439 * If we're in interrupt, yes, we can always allocate.
2440 * If __GFP_THISNODE is set, yes, we can always allocate. If zone
2441 * z's node is in our tasks mems_allowed, yes. Otherwise, no.
2442 *
2443 * The __GFP_THISNODE placement logic is really handled elsewhere,
2444 * by forcibly using a zonelist starting at a specified node, and by
2445 * (in get_page_from_freelist()) refusing to consider the zones for
2446 * any node on the zonelist except the first. By the time any such
2447 * calls get to this routine, we should just shut up and say 'yes'.
2448 *
2449 * Unlike the cpuset_zone_allowed_softwall() variant, above,
2450 * this variant requires that the zone be in the current tasks
2451 * mems_allowed or that we're in interrupt. It does not scan up the
2452 * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
2453 * It never sleeps.
2454 */
2455
2456int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
2457{
2458 int node; /* node that zone z is on */
2459
2460 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2461 return 1;
2462 node = zone_to_nid(z);
2463 if (node_isset(node, current->mems_allowed))
2464 return 1;
2465 return 0;
2466}
2467
2418/** 2468/**
2419 * cpuset_lock - lock out any changes to cpuset structures 2469 * cpuset_lock - lock out any changes to cpuset structures
2420 * 2470 *
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0ccc7f230252..089092d152ab 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -73,7 +73,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
73 73
74 for (z = zonelist->zones; *z; z++) { 74 for (z = zonelist->zones; *z; z++) {
75 nid = zone_to_nid(*z); 75 nid = zone_to_nid(*z);
76 if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && 76 if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) &&
77 !list_empty(&hugepage_freelists[nid])) 77 !list_empty(&hugepage_freelists[nid]))
78 break; 78 break;
79 } 79 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 223d9ccb7d64..64cf3c214634 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -177,7 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
177 nodemask_t nodes = node_online_map; 177 nodemask_t nodes = node_online_map;
178 178
179 for (z = zonelist->zones; *z; z++) 179 for (z = zonelist->zones; *z; z++)
180 if (cpuset_zone_allowed(*z, gfp_mask)) 180 if (cpuset_zone_allowed_softwall(*z, gfp_mask))
181 node_clear(zone_to_nid(*z), nodes); 181 node_clear(zone_to_nid(*z), nodes);
182 else 182 else
183 return CONSTRAINT_CPUSET; 183 return CONSTRAINT_CPUSET;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e6b17b2989e0..8c1a116875bc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1162,7 +1162,7 @@ zonelist_scan:
1162 zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) 1162 zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
1163 break; 1163 break;
1164 if ((alloc_flags & ALLOC_CPUSET) && 1164 if ((alloc_flags & ALLOC_CPUSET) &&
1165 !cpuset_zone_allowed(zone, gfp_mask)) 1165 !cpuset_zone_allowed_softwall(zone, gfp_mask))
1166 goto try_next_zone; 1166 goto try_next_zone;
1167 1167
1168 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { 1168 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
diff --git a/mm/slab.c b/mm/slab.c
index 9d3550086c93..b856786a3a30 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3262,7 +3262,7 @@ retry:
3262 for (z = zonelist->zones; *z && !obj; z++) { 3262 for (z = zonelist->zones; *z && !obj; z++) {
3263 nid = zone_to_nid(*z); 3263 nid = zone_to_nid(*z);
3264 3264
3265 if (cpuset_zone_allowed(*z, flags | __GFP_HARDWALL) && 3265 if (cpuset_zone_allowed_hardwall(*z, flags) &&
3266 cache->nodelists[nid] && 3266 cache->nodelists[nid] &&
3267 cache->nodelists[nid]->free_objects) 3267 cache->nodelists[nid]->free_objects)
3268 obj = ____cache_alloc_node(cache, 3268 obj = ____cache_alloc_node(cache,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 093f5fe6dd77..e9813b06c7a3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -984,7 +984,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
984 if (!populated_zone(zone)) 984 if (!populated_zone(zone))
985 continue; 985 continue;
986 986
987 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 987 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
988 continue; 988 continue;
989 989
990 note_zone_scanning_priority(zone, priority); 990 note_zone_scanning_priority(zone, priority);
@@ -1034,7 +1034,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1034 for (i = 0; zones[i] != NULL; i++) { 1034 for (i = 0; zones[i] != NULL; i++) {
1035 struct zone *zone = zones[i]; 1035 struct zone *zone = zones[i];
1036 1036
1037 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 1037 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1038 continue; 1038 continue;
1039 1039
1040 lru_pages += zone->nr_active + zone->nr_inactive; 1040 lru_pages += zone->nr_active + zone->nr_inactive;
@@ -1089,7 +1089,7 @@ out:
1089 for (i = 0; zones[i] != 0; i++) { 1089 for (i = 0; zones[i] != 0; i++) {
1090 struct zone *zone = zones[i]; 1090 struct zone *zone = zones[i];
1091 1091
1092 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 1092 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1093 continue; 1093 continue;
1094 1094
1095 zone->prev_priority = priority; 1095 zone->prev_priority = priority;
@@ -1354,7 +1354,7 @@ void wakeup_kswapd(struct zone *zone, int order)
1354 return; 1354 return;
1355 if (pgdat->kswapd_max_order < order) 1355 if (pgdat->kswapd_max_order < order)
1356 pgdat->kswapd_max_order = order; 1356 pgdat->kswapd_max_order = order;
1357 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 1357 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1358 return; 1358 return;
1359 if (!waitqueue_active(&pgdat->kswapd_wait)) 1359 if (!waitqueue_active(&pgdat->kswapd_wait))
1360 return; 1360 return;