diff options
-rw-r--r-- | kernel/cpuset.c | 107 |
1 files changed, 55 insertions, 52 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1f107c74087b..f21ba868f0d1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -248,34 +248,34 @@ static struct cpuset top_cpuset = { | |||
248 | if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) | 248 | if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) |
249 | 249 | ||
250 | /* | 250 | /* |
251 | * There are two global mutexes guarding cpuset structures - cpuset_mutex | 251 | * There are two global locks guarding cpuset structures - cpuset_mutex and |
252 | * and callback_mutex. The latter may nest inside the former. We also | 252 | * callback_lock. We also require taking task_lock() when dereferencing a |
253 | * require taking task_lock() when dereferencing a task's cpuset pointer. | 253 | * task's cpuset pointer. See "The task_lock() exception", at the end of this |
254 | * See "The task_lock() exception", at the end of this comment. | 254 | * comment. |
255 | * | 255 | * |
256 | * A task must hold both mutexes to modify cpusets. If a task holds | 256 | * A task must hold both locks to modify cpusets. If a task holds |
257 | * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it | 257 | * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it |
258 | * is the only task able to also acquire callback_mutex and be able to | 258 | * is the only task able to also acquire callback_lock and be able to |
259 | * modify cpusets. It can perform various checks on the cpuset structure | 259 | * modify cpusets. It can perform various checks on the cpuset structure |
260 | * first, knowing nothing will change. It can also allocate memory while | 260 | * first, knowing nothing will change. It can also allocate memory while |
261 | * just holding cpuset_mutex. While it is performing these checks, various | 261 | * just holding cpuset_mutex. While it is performing these checks, various |
262 | * callback routines can briefly acquire callback_mutex to query cpusets. | 262 | * callback routines can briefly acquire callback_lock to query cpusets. |
263 | * Once it is ready to make the changes, it takes callback_mutex, blocking | 263 | * Once it is ready to make the changes, it takes callback_lock, blocking |
264 | * everyone else. | 264 | * everyone else. |
265 | * | 265 | * |
266 | * Calls to the kernel memory allocator can not be made while holding | 266 | * Calls to the kernel memory allocator can not be made while holding |
267 | * callback_mutex, as that would risk double tripping on callback_mutex | 267 | * callback_lock, as that would risk double tripping on callback_lock |
268 | * from one of the callbacks into the cpuset code from within | 268 | * from one of the callbacks into the cpuset code from within |
269 | * __alloc_pages(). | 269 | * __alloc_pages(). |
270 | * | 270 | * |
271 | * If a task is only holding callback_mutex, then it has read-only | 271 | * If a task is only holding callback_lock, then it has read-only |
272 | * access to cpusets. | 272 | * access to cpusets. |
273 | * | 273 | * |
274 | * Now, the task_struct fields mems_allowed and mempolicy may be changed | 274 | * Now, the task_struct fields mems_allowed and mempolicy may be changed |
275 | * by other task, we use alloc_lock in the task_struct fields to protect | 275 | * by other task, we use alloc_lock in the task_struct fields to protect |
276 | * them. | 276 | * them. |
277 | * | 277 | * |
278 | * The cpuset_common_file_read() handlers only hold callback_mutex across | 278 | * The cpuset_common_file_read() handlers only hold callback_lock across |
279 | * small pieces of code, such as when reading out possibly multi-word | 279 | * small pieces of code, such as when reading out possibly multi-word |
280 | * cpumasks and nodemasks. | 280 | * cpumasks and nodemasks. |
281 | * | 281 | * |
@@ -284,7 +284,7 @@ static struct cpuset top_cpuset = { | |||
284 | */ | 284 | */ |
285 | 285 | ||
286 | static DEFINE_MUTEX(cpuset_mutex); | 286 | static DEFINE_MUTEX(cpuset_mutex); |
287 | static DEFINE_MUTEX(callback_mutex); | 287 | static DEFINE_SPINLOCK(callback_lock); |
288 | 288 | ||
289 | /* | 289 | /* |
290 | * CPU / memory hotplug is handled asynchronously. | 290 | * CPU / memory hotplug is handled asynchronously. |
@@ -329,7 +329,7 @@ static struct file_system_type cpuset_fs_type = { | |||
329 | * One way or another, we guarantee to return some non-empty subset | 329 | * One way or another, we guarantee to return some non-empty subset |
330 | * of cpu_online_mask. | 330 | * of cpu_online_mask. |
331 | * | 331 | * |
332 | * Call with callback_mutex held. | 332 | * Call with callback_lock or cpuset_mutex held. |
333 | */ | 333 | */ |
334 | static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) | 334 | static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) |
335 | { | 335 | { |
@@ -347,7 +347,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) | |||
347 | * One way or another, we guarantee to return some non-empty subset | 347 | * One way or another, we guarantee to return some non-empty subset |
348 | * of node_states[N_MEMORY]. | 348 | * of node_states[N_MEMORY]. |
349 | * | 349 | * |
350 | * Call with callback_mutex held. | 350 | * Call with callback_lock or cpuset_mutex held. |
351 | */ | 351 | */ |
352 | static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) | 352 | static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) |
353 | { | 353 | { |
@@ -359,7 +359,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) | |||
359 | /* | 359 | /* |
360 | * update task's spread flag if cpuset's page/slab spread flag is set | 360 | * update task's spread flag if cpuset's page/slab spread flag is set |
361 | * | 361 | * |
362 | * Called with callback_mutex/cpuset_mutex held | 362 | * Call with callback_lock or cpuset_mutex held. |
363 | */ | 363 | */ |
364 | static void cpuset_update_task_spread_flag(struct cpuset *cs, | 364 | static void cpuset_update_task_spread_flag(struct cpuset *cs, |
365 | struct task_struct *tsk) | 365 | struct task_struct *tsk) |
@@ -876,9 +876,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) | |||
876 | continue; | 876 | continue; |
877 | rcu_read_unlock(); | 877 | rcu_read_unlock(); |
878 | 878 | ||
879 | mutex_lock(&callback_mutex); | 879 | spin_lock_irq(&callback_lock); |
880 | cpumask_copy(cp->effective_cpus, new_cpus); | 880 | cpumask_copy(cp->effective_cpus, new_cpus); |
881 | mutex_unlock(&callback_mutex); | 881 | spin_unlock_irq(&callback_lock); |
882 | 882 | ||
883 | WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && | 883 | WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && |
884 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); | 884 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); |
@@ -943,9 +943,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
943 | if (retval < 0) | 943 | if (retval < 0) |
944 | return retval; | 944 | return retval; |
945 | 945 | ||
946 | mutex_lock(&callback_mutex); | 946 | spin_lock_irq(&callback_lock); |
947 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); | 947 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); |
948 | mutex_unlock(&callback_mutex); | 948 | spin_unlock_irq(&callback_lock); |
949 | 949 | ||
950 | /* use trialcs->cpus_allowed as a temp variable */ | 950 | /* use trialcs->cpus_allowed as a temp variable */ |
951 | update_cpumasks_hier(cs, trialcs->cpus_allowed); | 951 | update_cpumasks_hier(cs, trialcs->cpus_allowed); |
@@ -1132,9 +1132,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) | |||
1132 | continue; | 1132 | continue; |
1133 | rcu_read_unlock(); | 1133 | rcu_read_unlock(); |
1134 | 1134 | ||
1135 | mutex_lock(&callback_mutex); | 1135 | spin_lock_irq(&callback_lock); |
1136 | cp->effective_mems = *new_mems; | 1136 | cp->effective_mems = *new_mems; |
1137 | mutex_unlock(&callback_mutex); | 1137 | spin_unlock_irq(&callback_lock); |
1138 | 1138 | ||
1139 | WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && | 1139 | WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && |
1140 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); | 1140 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); |
@@ -1155,7 +1155,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) | |||
1155 | * mempolicies and if the cpuset is marked 'memory_migrate', | 1155 | * mempolicies and if the cpuset is marked 'memory_migrate', |
1156 | * migrate the tasks pages to the new memory. | 1156 | * migrate the tasks pages to the new memory. |
1157 | * | 1157 | * |
1158 | * Call with cpuset_mutex held. May take callback_mutex during call. | 1158 | * Call with cpuset_mutex held. May take callback_lock during call. |
1159 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, | 1159 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, |
1160 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | 1160 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind |
1161 | * their mempolicies to the cpusets new mems_allowed. | 1161 | * their mempolicies to the cpusets new mems_allowed. |
@@ -1202,9 +1202,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |||
1202 | if (retval < 0) | 1202 | if (retval < 0) |
1203 | goto done; | 1203 | goto done; |
1204 | 1204 | ||
1205 | mutex_lock(&callback_mutex); | 1205 | spin_lock_irq(&callback_lock); |
1206 | cs->mems_allowed = trialcs->mems_allowed; | 1206 | cs->mems_allowed = trialcs->mems_allowed; |
1207 | mutex_unlock(&callback_mutex); | 1207 | spin_unlock_irq(&callback_lock); |
1208 | 1208 | ||
1209 | /* use trialcs->mems_allowed as a temp variable */ | 1209 | /* use trialcs->mems_allowed as a temp variable */ |
1210 | update_nodemasks_hier(cs, &cs->mems_allowed); | 1210 | update_nodemasks_hier(cs, &cs->mems_allowed); |
@@ -1295,9 +1295,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | |||
1295 | spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) | 1295 | spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) |
1296 | || (is_spread_page(cs) != is_spread_page(trialcs))); | 1296 | || (is_spread_page(cs) != is_spread_page(trialcs))); |
1297 | 1297 | ||
1298 | mutex_lock(&callback_mutex); | 1298 | spin_lock_irq(&callback_lock); |
1299 | cs->flags = trialcs->flags; | 1299 | cs->flags = trialcs->flags; |
1300 | mutex_unlock(&callback_mutex); | 1300 | spin_unlock_irq(&callback_lock); |
1301 | 1301 | ||
1302 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) | 1302 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) |
1303 | rebuild_sched_domains_locked(); | 1303 | rebuild_sched_domains_locked(); |
@@ -1713,7 +1713,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |||
1713 | count = seq_get_buf(sf, &buf); | 1713 | count = seq_get_buf(sf, &buf); |
1714 | s = buf; | 1714 | s = buf; |
1715 | 1715 | ||
1716 | mutex_lock(&callback_mutex); | 1716 | spin_lock_irq(&callback_lock); |
1717 | 1717 | ||
1718 | switch (type) { | 1718 | switch (type) { |
1719 | case FILE_CPULIST: | 1719 | case FILE_CPULIST: |
@@ -1740,7 +1740,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |||
1740 | seq_commit(sf, -1); | 1740 | seq_commit(sf, -1); |
1741 | } | 1741 | } |
1742 | out_unlock: | 1742 | out_unlock: |
1743 | mutex_unlock(&callback_mutex); | 1743 | spin_unlock_irq(&callback_lock); |
1744 | return ret; | 1744 | return ret; |
1745 | } | 1745 | } |
1746 | 1746 | ||
@@ -1957,12 +1957,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) | |||
1957 | 1957 | ||
1958 | cpuset_inc(); | 1958 | cpuset_inc(); |
1959 | 1959 | ||
1960 | mutex_lock(&callback_mutex); | 1960 | spin_lock_irq(&callback_lock); |
1961 | if (cgroup_on_dfl(cs->css.cgroup)) { | 1961 | if (cgroup_on_dfl(cs->css.cgroup)) { |
1962 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); | 1962 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); |
1963 | cs->effective_mems = parent->effective_mems; | 1963 | cs->effective_mems = parent->effective_mems; |
1964 | } | 1964 | } |
1965 | mutex_unlock(&callback_mutex); | 1965 | spin_unlock_irq(&callback_lock); |
1966 | 1966 | ||
1967 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) | 1967 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) |
1968 | goto out_unlock; | 1968 | goto out_unlock; |
@@ -1989,10 +1989,10 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) | |||
1989 | } | 1989 | } |
1990 | rcu_read_unlock(); | 1990 | rcu_read_unlock(); |
1991 | 1991 | ||
1992 | mutex_lock(&callback_mutex); | 1992 | spin_lock_irq(&callback_lock); |
1993 | cs->mems_allowed = parent->mems_allowed; | 1993 | cs->mems_allowed = parent->mems_allowed; |
1994 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); | 1994 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); |
1995 | mutex_unlock(&callback_mutex); | 1995 | spin_lock_irq(&callback_lock); |
1996 | out_unlock: | 1996 | out_unlock: |
1997 | mutex_unlock(&cpuset_mutex); | 1997 | mutex_unlock(&cpuset_mutex); |
1998 | return 0; | 1998 | return 0; |
@@ -2031,7 +2031,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) | |||
2031 | static void cpuset_bind(struct cgroup_subsys_state *root_css) | 2031 | static void cpuset_bind(struct cgroup_subsys_state *root_css) |
2032 | { | 2032 | { |
2033 | mutex_lock(&cpuset_mutex); | 2033 | mutex_lock(&cpuset_mutex); |
2034 | mutex_lock(&callback_mutex); | 2034 | spin_lock_irq(&callback_lock); |
2035 | 2035 | ||
2036 | if (cgroup_on_dfl(root_css->cgroup)) { | 2036 | if (cgroup_on_dfl(root_css->cgroup)) { |
2037 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); | 2037 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); |
@@ -2042,7 +2042,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) | |||
2042 | top_cpuset.mems_allowed = top_cpuset.effective_mems; | 2042 | top_cpuset.mems_allowed = top_cpuset.effective_mems; |
2043 | } | 2043 | } |
2044 | 2044 | ||
2045 | mutex_unlock(&callback_mutex); | 2045 | spin_unlock_irq(&callback_lock); |
2046 | mutex_unlock(&cpuset_mutex); | 2046 | mutex_unlock(&cpuset_mutex); |
2047 | } | 2047 | } |
2048 | 2048 | ||
@@ -2127,12 +2127,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs, | |||
2127 | { | 2127 | { |
2128 | bool is_empty; | 2128 | bool is_empty; |
2129 | 2129 | ||
2130 | mutex_lock(&callback_mutex); | 2130 | spin_lock_irq(&callback_lock); |
2131 | cpumask_copy(cs->cpus_allowed, new_cpus); | 2131 | cpumask_copy(cs->cpus_allowed, new_cpus); |
2132 | cpumask_copy(cs->effective_cpus, new_cpus); | 2132 | cpumask_copy(cs->effective_cpus, new_cpus); |
2133 | cs->mems_allowed = *new_mems; | 2133 | cs->mems_allowed = *new_mems; |
2134 | cs->effective_mems = *new_mems; | 2134 | cs->effective_mems = *new_mems; |
2135 | mutex_unlock(&callback_mutex); | 2135 | spin_unlock_irq(&callback_lock); |
2136 | 2136 | ||
2137 | /* | 2137 | /* |
2138 | * Don't call update_tasks_cpumask() if the cpuset becomes empty, | 2138 | * Don't call update_tasks_cpumask() if the cpuset becomes empty, |
@@ -2169,10 +2169,10 @@ hotplug_update_tasks(struct cpuset *cs, | |||
2169 | if (nodes_empty(*new_mems)) | 2169 | if (nodes_empty(*new_mems)) |
2170 | *new_mems = parent_cs(cs)->effective_mems; | 2170 | *new_mems = parent_cs(cs)->effective_mems; |
2171 | 2171 | ||
2172 | mutex_lock(&callback_mutex); | 2172 | spin_lock_irq(&callback_lock); |
2173 | cpumask_copy(cs->effective_cpus, new_cpus); | 2173 | cpumask_copy(cs->effective_cpus, new_cpus); |
2174 | cs->effective_mems = *new_mems; | 2174 | cs->effective_mems = *new_mems; |
2175 | mutex_unlock(&callback_mutex); | 2175 | spin_unlock_irq(&callback_lock); |
2176 | 2176 | ||
2177 | if (cpus_updated) | 2177 | if (cpus_updated) |
2178 | update_tasks_cpumask(cs); | 2178 | update_tasks_cpumask(cs); |
@@ -2258,21 +2258,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
2258 | 2258 | ||
2259 | /* synchronize cpus_allowed to cpu_active_mask */ | 2259 | /* synchronize cpus_allowed to cpu_active_mask */ |
2260 | if (cpus_updated) { | 2260 | if (cpus_updated) { |
2261 | mutex_lock(&callback_mutex); | 2261 | spin_lock_irq(&callback_lock); |
2262 | if (!on_dfl) | 2262 | if (!on_dfl) |
2263 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); | 2263 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); |
2264 | cpumask_copy(top_cpuset.effective_cpus, &new_cpus); | 2264 | cpumask_copy(top_cpuset.effective_cpus, &new_cpus); |
2265 | mutex_unlock(&callback_mutex); | 2265 | spin_unlock_irq(&callback_lock); |
2266 | /* we don't mess with cpumasks of tasks in top_cpuset */ | 2266 | /* we don't mess with cpumasks of tasks in top_cpuset */ |
2267 | } | 2267 | } |
2268 | 2268 | ||
2269 | /* synchronize mems_allowed to N_MEMORY */ | 2269 | /* synchronize mems_allowed to N_MEMORY */ |
2270 | if (mems_updated) { | 2270 | if (mems_updated) { |
2271 | mutex_lock(&callback_mutex); | 2271 | spin_lock_irq(&callback_lock); |
2272 | if (!on_dfl) | 2272 | if (!on_dfl) |
2273 | top_cpuset.mems_allowed = new_mems; | 2273 | top_cpuset.mems_allowed = new_mems; |
2274 | top_cpuset.effective_mems = new_mems; | 2274 | top_cpuset.effective_mems = new_mems; |
2275 | mutex_unlock(&callback_mutex); | 2275 | spin_unlock_irq(&callback_lock); |
2276 | update_tasks_nodemask(&top_cpuset); | 2276 | update_tasks_nodemask(&top_cpuset); |
2277 | } | 2277 | } |
2278 | 2278 | ||
@@ -2365,11 +2365,13 @@ void __init cpuset_init_smp(void) | |||
2365 | 2365 | ||
2366 | void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) | 2366 | void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) |
2367 | { | 2367 | { |
2368 | mutex_lock(&callback_mutex); | 2368 | unsigned long flags; |
2369 | |||
2370 | spin_lock_irqsave(&callback_lock, flags); | ||
2369 | rcu_read_lock(); | 2371 | rcu_read_lock(); |
2370 | guarantee_online_cpus(task_cs(tsk), pmask); | 2372 | guarantee_online_cpus(task_cs(tsk), pmask); |
2371 | rcu_read_unlock(); | 2373 | rcu_read_unlock(); |
2372 | mutex_unlock(&callback_mutex); | 2374 | spin_unlock_irqrestore(&callback_lock, flags); |
2373 | } | 2375 | } |
2374 | 2376 | ||
2375 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) | 2377 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) |
@@ -2415,12 +2417,13 @@ void cpuset_init_current_mems_allowed(void) | |||
2415 | nodemask_t cpuset_mems_allowed(struct task_struct *tsk) | 2417 | nodemask_t cpuset_mems_allowed(struct task_struct *tsk) |
2416 | { | 2418 | { |
2417 | nodemask_t mask; | 2419 | nodemask_t mask; |
2420 | unsigned long flags; | ||
2418 | 2421 | ||
2419 | mutex_lock(&callback_mutex); | 2422 | spin_lock_irqsave(&callback_lock, flags); |
2420 | rcu_read_lock(); | 2423 | rcu_read_lock(); |
2421 | guarantee_online_mems(task_cs(tsk), &mask); | 2424 | guarantee_online_mems(task_cs(tsk), &mask); |
2422 | rcu_read_unlock(); | 2425 | rcu_read_unlock(); |
2423 | mutex_unlock(&callback_mutex); | 2426 | spin_unlock_irqrestore(&callback_lock, flags); |
2424 | 2427 | ||
2425 | return mask; | 2428 | return mask; |
2426 | } | 2429 | } |
@@ -2439,7 +2442,7 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) | |||
2439 | /* | 2442 | /* |
2440 | * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or | 2443 | * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or |
2441 | * mem_hardwall ancestor to the specified cpuset. Call holding | 2444 | * mem_hardwall ancestor to the specified cpuset. Call holding |
2442 | * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall | 2445 | * callback_lock. If no ancestor is mem_exclusive or mem_hardwall |
2443 | * (an unusual configuration), then returns the root cpuset. | 2446 | * (an unusual configuration), then returns the root cpuset. |
2444 | */ | 2447 | */ |
2445 | static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) | 2448 | static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) |
@@ -2481,13 +2484,12 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) | |||
2481 | * GFP_KERNEL allocations are not so marked, so can escape to the | 2484 | * GFP_KERNEL allocations are not so marked, so can escape to the |
2482 | * nearest enclosing hardwalled ancestor cpuset. | 2485 | * nearest enclosing hardwalled ancestor cpuset. |
2483 | * | 2486 | * |
2484 | * Scanning up parent cpusets requires callback_mutex. The | 2487 | * Scanning up parent cpusets requires callback_lock. The |
2485 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit | 2488 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit |
2486 | * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the | 2489 | * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the |
2487 | * current tasks mems_allowed came up empty on the first pass over | 2490 | * current tasks mems_allowed came up empty on the first pass over |
2488 | * the zonelist. So only GFP_KERNEL allocations, if all nodes in the | 2491 | * the zonelist. So only GFP_KERNEL allocations, if all nodes in the |
2489 | * cpuset are short of memory, might require taking the callback_mutex | 2492 | * cpuset are short of memory, might require taking the callback_lock. |
2490 | * mutex. | ||
2491 | * | 2493 | * |
2492 | * The first call here from mm/page_alloc:get_page_from_freelist() | 2494 | * The first call here from mm/page_alloc:get_page_from_freelist() |
2493 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, | 2495 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, |
@@ -2514,6 +2516,7 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) | |||
2514 | { | 2516 | { |
2515 | struct cpuset *cs; /* current cpuset ancestors */ | 2517 | struct cpuset *cs; /* current cpuset ancestors */ |
2516 | int allowed; /* is allocation in zone z allowed? */ | 2518 | int allowed; /* is allocation in zone z allowed? */ |
2519 | unsigned long flags; | ||
2517 | 2520 | ||
2518 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) | 2521 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) |
2519 | return 1; | 2522 | return 1; |
@@ -2533,14 +2536,14 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) | |||
2533 | return 1; | 2536 | return 1; |
2534 | 2537 | ||
2535 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | 2538 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ |
2536 | mutex_lock(&callback_mutex); | 2539 | spin_lock_irqsave(&callback_lock, flags); |
2537 | 2540 | ||
2538 | rcu_read_lock(); | 2541 | rcu_read_lock(); |
2539 | cs = nearest_hardwall_ancestor(task_cs(current)); | 2542 | cs = nearest_hardwall_ancestor(task_cs(current)); |
2540 | allowed = node_isset(node, cs->mems_allowed); | 2543 | allowed = node_isset(node, cs->mems_allowed); |
2541 | rcu_read_unlock(); | 2544 | rcu_read_unlock(); |
2542 | 2545 | ||
2543 | mutex_unlock(&callback_mutex); | 2546 | spin_unlock_irqrestore(&callback_lock, flags); |
2544 | return allowed; | 2547 | return allowed; |
2545 | } | 2548 | } |
2546 | 2549 | ||