diff options
-rw-r--r-- | include/linux/cpuset.h | 22 | ||||
-rw-r--r-- | kernel/cpuset.c | 82 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/slab.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 8 |
7 files changed, 92 insertions, 28 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 8821e1f75b44..826b15e914e2 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
@@ -30,10 +30,19 @@ void cpuset_update_task_memory_state(void); | |||
30 | nodes_subset((nodes), current->mems_allowed) | 30 | nodes_subset((nodes), current->mems_allowed) |
31 | int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); | 31 | int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); |
32 | 32 | ||
33 | extern int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask); | 33 | extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask); |
34 | static int inline cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | 34 | extern int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask); |
35 | |||
36 | static int inline cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) | ||
37 | { | ||
38 | return number_of_cpusets <= 1 || | ||
39 | __cpuset_zone_allowed_softwall(z, gfp_mask); | ||
40 | } | ||
41 | |||
42 | static int inline cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) | ||
35 | { | 43 | { |
36 | return number_of_cpusets <= 1 || __cpuset_zone_allowed(z, gfp_mask); | 44 | return number_of_cpusets <= 1 || |
45 | __cpuset_zone_allowed_hardwall(z, gfp_mask); | ||
37 | } | 46 | } |
38 | 47 | ||
39 | extern int cpuset_excl_nodes_overlap(const struct task_struct *p); | 48 | extern int cpuset_excl_nodes_overlap(const struct task_struct *p); |
@@ -94,7 +103,12 @@ static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) | |||
94 | return 1; | 103 | return 1; |
95 | } | 104 | } |
96 | 105 | ||
97 | static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | 106 | static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) |
107 | { | ||
108 | return 1; | ||
109 | } | ||
110 | |||
111 | static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) | ||
98 | { | 112 | { |
99 | return 1; | 113 | return 1; |
100 | } | 114 | } |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2c3b4431472b..232aed2b10f9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2342 | } | 2342 | } |
2343 | 2343 | ||
2344 | /** | 2344 | /** |
2345 | * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? | 2345 | * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node? |
2346 | * @z: is this zone on an allowed node? | 2346 | * @z: is this zone on an allowed node? |
2347 | * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) | 2347 | * @gfp_mask: memory allocation flags |
2348 | * | 2348 | * |
2349 | * If we're in interrupt, yes, we can always allocate. If zone | 2349 | * If we're in interrupt, yes, we can always allocate. If |
2350 | * __GFP_THISNODE is set, yes, we can always allocate. If zone | ||
2350 | * z's node is in our tasks mems_allowed, yes. If it's not a | 2351 | * z's node is in our tasks mems_allowed, yes. If it's not a |
2351 | * __GFP_HARDWALL request and this zone's nodes is in the nearest | 2352 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
2352 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. | 2353 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. |
2353 | * Otherwise, no. | 2354 | * Otherwise, no. |
2354 | * | 2355 | * |
2356 | * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() | ||
2357 | * reduces to cpuset_zone_allowed_hardwall(). Otherwise, | ||
2358 | * cpuset_zone_allowed_softwall() might sleep, and might allow a zone | ||
2359 | * from an enclosing cpuset. | ||
2360 | * | ||
2361 | * cpuset_zone_allowed_hardwall() only handles the simpler case of | ||
2362 | * hardwall cpusets, and never sleeps. | ||
2363 | * | ||
2364 | * The __GFP_THISNODE placement logic is really handled elsewhere, | ||
2365 | * by forcibly using a zonelist starting at a specified node, and by | ||
2366 | * (in get_page_from_freelist()) refusing to consider the zones for | ||
2367 | * any node on the zonelist except the first. By the time any such | ||
2368 | * calls get to this routine, we should just shut up and say 'yes'. | ||
2369 | * | ||
2355 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, | 2370 | * GFP_USER allocations are marked with the __GFP_HARDWALL bit, |
2356 | * and do not allow allocations outside the current tasks cpuset. | 2371 | * and do not allow allocations outside the current tasks cpuset. |
2357 | * GFP_KERNEL allocations are not so marked, so can escape to the | 2372 | * GFP_KERNEL allocations are not so marked, so can escape to the |
2358 | * nearest mem_exclusive ancestor cpuset. | 2373 | * nearest enclosing mem_exclusive ancestor cpuset. |
2359 | * | 2374 | * |
2360 | * Scanning up parent cpusets requires callback_mutex. The __alloc_pages() | 2375 | * Scanning up parent cpusets requires callback_mutex. The |
2361 | * routine only calls here with __GFP_HARDWALL bit _not_ set if | 2376 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit |
2362 | * it's a GFP_KERNEL allocation, and all nodes in the current tasks | 2377 | * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the |
2363 | * mems_allowed came up empty on the first pass over the zonelist. | 2378 | * current tasks mems_allowed came up empty on the first pass over |
2364 | * So only GFP_KERNEL allocations, if all nodes in the cpuset are | 2379 | * the zonelist. So only GFP_KERNEL allocations, if all nodes in the |
2365 | * short of memory, might require taking the callback_mutex mutex. | 2380 | * cpuset are short of memory, might require taking the callback_mutex |
2381 | * mutex. | ||
2366 | * | 2382 | * |
2367 | * The first call here from mm/page_alloc:get_page_from_freelist() | 2383 | * The first call here from mm/page_alloc:get_page_from_freelist() |
2368 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so | 2384 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, |
2369 | * no allocation on a node outside the cpuset is allowed (unless in | 2385 | * so no allocation on a node outside the cpuset is allowed (unless |
2370 | * interrupt, of course). | 2386 | * in interrupt, of course). |
2371 | * | 2387 | * |
2372 | * The second pass through get_page_from_freelist() doesn't even call | 2388 | * The second pass through get_page_from_freelist() doesn't even call |
2373 | * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() | 2389 | * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() |
@@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2380 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2396 | * GFP_USER - only nodes in current tasks mems allowed ok. |
2381 | * | 2397 | * |
2382 | * Rule: | 2398 | * Rule: |
2383 | * Don't call cpuset_zone_allowed() if you can't sleep, unless you | 2399 | * Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you |
2384 | * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables | 2400 | * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables |
2385 | * the code that might scan up ancestor cpusets and sleep. | 2401 | * the code that might scan up ancestor cpusets and sleep. |
2386 | **/ | 2402 | */ |
2387 | 2403 | ||
2388 | int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | 2404 | int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) |
2389 | { | 2405 | { |
2390 | int node; /* node that zone z is on */ | 2406 | int node; /* node that zone z is on */ |
2391 | const struct cpuset *cs; /* current cpuset ancestors */ | 2407 | const struct cpuset *cs; /* current cpuset ancestors */ |
@@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) | |||
2415 | return allowed; | 2431 | return allowed; |
2416 | } | 2432 | } |
2417 | 2433 | ||
2434 | /* | ||
2435 | * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node? | ||
2436 | * @z: is this zone on an allowed node? | ||
2437 | * @gfp_mask: memory allocation flags | ||
2438 | * | ||
2439 | * If we're in interrupt, yes, we can always allocate. | ||
2440 | * If __GFP_THISNODE is set, yes, we can always allocate. If zone | ||
2441 | * z's node is in our tasks mems_allowed, yes. Otherwise, no. | ||
2442 | * | ||
2443 | * The __GFP_THISNODE placement logic is really handled elsewhere, | ||
2444 | * by forcibly using a zonelist starting at a specified node, and by | ||
2445 | * (in get_page_from_freelist()) refusing to consider the zones for | ||
2446 | * any node on the zonelist except the first. By the time any such | ||
2447 | * calls get to this routine, we should just shut up and say 'yes'. | ||
2448 | * | ||
2449 | * Unlike the cpuset_zone_allowed_softwall() variant, above, | ||
2450 | * this variant requires that the zone be in the current tasks | ||
2451 | * mems_allowed or that we're in interrupt. It does not scan up the | ||
2452 | * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset. | ||
2453 | * It never sleeps. | ||
2454 | */ | ||
2455 | |||
2456 | int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) | ||
2457 | { | ||
2458 | int node; /* node that zone z is on */ | ||
2459 | |||
2460 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) | ||
2461 | return 1; | ||
2462 | node = zone_to_nid(z); | ||
2463 | if (node_isset(node, current->mems_allowed)) | ||
2464 | return 1; | ||
2465 | return 0; | ||
2466 | } | ||
2467 | |||
2418 | /** | 2468 | /** |
2419 | * cpuset_lock - lock out any changes to cpuset structures | 2469 | * cpuset_lock - lock out any changes to cpuset structures |
2420 | * | 2470 | * |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0ccc7f230252..089092d152ab 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -73,7 +73,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, | |||
73 | 73 | ||
74 | for (z = zonelist->zones; *z; z++) { | 74 | for (z = zonelist->zones; *z; z++) { |
75 | nid = zone_to_nid(*z); | 75 | nid = zone_to_nid(*z); |
76 | if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && | 76 | if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) && |
77 | !list_empty(&hugepage_freelists[nid])) | 77 | !list_empty(&hugepage_freelists[nid])) |
78 | break; | 78 | break; |
79 | } | 79 | } |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 223d9ccb7d64..64cf3c214634 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -177,7 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) | |||
177 | nodemask_t nodes = node_online_map; | 177 | nodemask_t nodes = node_online_map; |
178 | 178 | ||
179 | for (z = zonelist->zones; *z; z++) | 179 | for (z = zonelist->zones; *z; z++) |
180 | if (cpuset_zone_allowed(*z, gfp_mask)) | 180 | if (cpuset_zone_allowed_softwall(*z, gfp_mask)) |
181 | node_clear(zone_to_nid(*z), nodes); | 181 | node_clear(zone_to_nid(*z), nodes); |
182 | else | 182 | else |
183 | return CONSTRAINT_CPUSET; | 183 | return CONSTRAINT_CPUSET; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e6b17b2989e0..8c1a116875bc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1162,7 +1162,7 @@ zonelist_scan: | |||
1162 | zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) | 1162 | zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) |
1163 | break; | 1163 | break; |
1164 | if ((alloc_flags & ALLOC_CPUSET) && | 1164 | if ((alloc_flags & ALLOC_CPUSET) && |
1165 | !cpuset_zone_allowed(zone, gfp_mask)) | 1165 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) |
1166 | goto try_next_zone; | 1166 | goto try_next_zone; |
1167 | 1167 | ||
1168 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { | 1168 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { |
@@ -3262,7 +3262,7 @@ retry: | |||
3262 | for (z = zonelist->zones; *z && !obj; z++) { | 3262 | for (z = zonelist->zones; *z && !obj; z++) { |
3263 | nid = zone_to_nid(*z); | 3263 | nid = zone_to_nid(*z); |
3264 | 3264 | ||
3265 | if (cpuset_zone_allowed(*z, flags | __GFP_HARDWALL) && | 3265 | if (cpuset_zone_allowed_hardwall(*z, flags) && |
3266 | cache->nodelists[nid] && | 3266 | cache->nodelists[nid] && |
3267 | cache->nodelists[nid]->free_objects) | 3267 | cache->nodelists[nid]->free_objects) |
3268 | obj = ____cache_alloc_node(cache, | 3268 | obj = ____cache_alloc_node(cache, |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 093f5fe6dd77..e9813b06c7a3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -984,7 +984,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones, | |||
984 | if (!populated_zone(zone)) | 984 | if (!populated_zone(zone)) |
985 | continue; | 985 | continue; |
986 | 986 | ||
987 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 987 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
988 | continue; | 988 | continue; |
989 | 989 | ||
990 | note_zone_scanning_priority(zone, priority); | 990 | note_zone_scanning_priority(zone, priority); |
@@ -1034,7 +1034,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
1034 | for (i = 0; zones[i] != NULL; i++) { | 1034 | for (i = 0; zones[i] != NULL; i++) { |
1035 | struct zone *zone = zones[i]; | 1035 | struct zone *zone = zones[i]; |
1036 | 1036 | ||
1037 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 1037 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
1038 | continue; | 1038 | continue; |
1039 | 1039 | ||
1040 | lru_pages += zone->nr_active + zone->nr_inactive; | 1040 | lru_pages += zone->nr_active + zone->nr_inactive; |
@@ -1089,7 +1089,7 @@ out: | |||
1089 | for (i = 0; zones[i] != 0; i++) { | 1089 | for (i = 0; zones[i] != 0; i++) { |
1090 | struct zone *zone = zones[i]; | 1090 | struct zone *zone = zones[i]; |
1091 | 1091 | ||
1092 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 1092 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
1093 | continue; | 1093 | continue; |
1094 | 1094 | ||
1095 | zone->prev_priority = priority; | 1095 | zone->prev_priority = priority; |
@@ -1354,7 +1354,7 @@ void wakeup_kswapd(struct zone *zone, int order) | |||
1354 | return; | 1354 | return; |
1355 | if (pgdat->kswapd_max_order < order) | 1355 | if (pgdat->kswapd_max_order < order) |
1356 | pgdat->kswapd_max_order = order; | 1356 | pgdat->kswapd_max_order = order; |
1357 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 1357 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
1358 | return; | 1358 | return; |
1359 | if (!waitqueue_active(&pgdat->kswapd_wait)) | 1359 | if (!waitqueue_active(&pgdat->kswapd_wait)) |
1360 | return; | 1360 | return; |