aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2014-10-20 07:50:29 -0400
committerTejun Heo <tj@kernel.org>2014-10-27 11:15:26 -0400
commit8447a0fee974433f7e0035fd30e1edecf00e014f (patch)
treeb670f05f0983a7705c58a97c4122f4963e4f840a /kernel
parentcac7f2429872d3733dc3f9915857b1691da2eb2f (diff)
cpuset: convert callback_mutex to a spinlock
The callback_mutex is only used to synchronize reads/updates of cpusets' flags and cpu/node masks. These operations should always proceed fast so there's no reason why we can't use a spinlock instead of the mutex. Converting the callback_mutex into a spinlock will let us call cpuset_zone_allowed_softwall from atomic context. This, in turn, makes it possible to simplify the code by merging the hardwall and asoftwall checks into the same function, which is the business of the next patch. Suggested-by: Zefan Li <lizefan@huawei.com> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Christoph Lameter <cl@linux.com> Acked-by: Zefan Li <lizefan@huawei.com> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c107
1 files changed, 55 insertions, 52 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1f107c74087b..f21ba868f0d1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -248,34 +248,34 @@ static struct cpuset top_cpuset = {
248 if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) 248 if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
249 249
250/* 250/*
251 * There are two global mutexes guarding cpuset structures - cpuset_mutex 251 * There are two global locks guarding cpuset structures - cpuset_mutex and
252 * and callback_mutex. The latter may nest inside the former. We also 252 * callback_lock. We also require taking task_lock() when dereferencing a
253 * require taking task_lock() when dereferencing a task's cpuset pointer. 253 * task's cpuset pointer. See "The task_lock() exception", at the end of this
254 * See "The task_lock() exception", at the end of this comment. 254 * comment.
255 * 255 *
256 * A task must hold both mutexes to modify cpusets. If a task holds 256 * A task must hold both locks to modify cpusets. If a task holds
257 * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it 257 * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
258 * is the only task able to also acquire callback_mutex and be able to 258 * is the only task able to also acquire callback_lock and be able to
259 * modify cpusets. It can perform various checks on the cpuset structure 259 * modify cpusets. It can perform various checks on the cpuset structure
260 * first, knowing nothing will change. It can also allocate memory while 260 * first, knowing nothing will change. It can also allocate memory while
261 * just holding cpuset_mutex. While it is performing these checks, various 261 * just holding cpuset_mutex. While it is performing these checks, various
262 * callback routines can briefly acquire callback_mutex to query cpusets. 262 * callback routines can briefly acquire callback_lock to query cpusets.
263 * Once it is ready to make the changes, it takes callback_mutex, blocking 263 * Once it is ready to make the changes, it takes callback_lock, blocking
264 * everyone else. 264 * everyone else.
265 * 265 *
266 * Calls to the kernel memory allocator can not be made while holding 266 * Calls to the kernel memory allocator can not be made while holding
267 * callback_mutex, as that would risk double tripping on callback_mutex 267 * callback_lock, as that would risk double tripping on callback_lock
268 * from one of the callbacks into the cpuset code from within 268 * from one of the callbacks into the cpuset code from within
269 * __alloc_pages(). 269 * __alloc_pages().
270 * 270 *
271 * If a task is only holding callback_mutex, then it has read-only 271 * If a task is only holding callback_lock, then it has read-only
272 * access to cpusets. 272 * access to cpusets.
273 * 273 *
274 * Now, the task_struct fields mems_allowed and mempolicy may be changed 274 * Now, the task_struct fields mems_allowed and mempolicy may be changed
275 * by other task, we use alloc_lock in the task_struct fields to protect 275 * by other task, we use alloc_lock in the task_struct fields to protect
276 * them. 276 * them.
277 * 277 *
278 * The cpuset_common_file_read() handlers only hold callback_mutex across 278 * The cpuset_common_file_read() handlers only hold callback_lock across
279 * small pieces of code, such as when reading out possibly multi-word 279 * small pieces of code, such as when reading out possibly multi-word
280 * cpumasks and nodemasks. 280 * cpumasks and nodemasks.
281 * 281 *
@@ -284,7 +284,7 @@ static struct cpuset top_cpuset = {
284 */ 284 */
285 285
286static DEFINE_MUTEX(cpuset_mutex); 286static DEFINE_MUTEX(cpuset_mutex);
287static DEFINE_MUTEX(callback_mutex); 287static DEFINE_SPINLOCK(callback_lock);
288 288
289/* 289/*
290 * CPU / memory hotplug is handled asynchronously. 290 * CPU / memory hotplug is handled asynchronously.
@@ -329,7 +329,7 @@ static struct file_system_type cpuset_fs_type = {
329 * One way or another, we guarantee to return some non-empty subset 329 * One way or another, we guarantee to return some non-empty subset
330 * of cpu_online_mask. 330 * of cpu_online_mask.
331 * 331 *
332 * Call with callback_mutex held. 332 * Call with callback_lock or cpuset_mutex held.
333 */ 333 */
334static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) 334static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
335{ 335{
@@ -347,7 +347,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
347 * One way or another, we guarantee to return some non-empty subset 347 * One way or another, we guarantee to return some non-empty subset
348 * of node_states[N_MEMORY]. 348 * of node_states[N_MEMORY].
349 * 349 *
350 * Call with callback_mutex held. 350 * Call with callback_lock or cpuset_mutex held.
351 */ 351 */
352static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) 352static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
353{ 353{
@@ -359,7 +359,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
359/* 359/*
360 * update task's spread flag if cpuset's page/slab spread flag is set 360 * update task's spread flag if cpuset's page/slab spread flag is set
361 * 361 *
362 * Called with callback_mutex/cpuset_mutex held 362 * Call with callback_lock or cpuset_mutex held.
363 */ 363 */
364static void cpuset_update_task_spread_flag(struct cpuset *cs, 364static void cpuset_update_task_spread_flag(struct cpuset *cs,
365 struct task_struct *tsk) 365 struct task_struct *tsk)
@@ -876,9 +876,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
876 continue; 876 continue;
877 rcu_read_unlock(); 877 rcu_read_unlock();
878 878
879 mutex_lock(&callback_mutex); 879 spin_lock_irq(&callback_lock);
880 cpumask_copy(cp->effective_cpus, new_cpus); 880 cpumask_copy(cp->effective_cpus, new_cpus);
881 mutex_unlock(&callback_mutex); 881 spin_unlock_irq(&callback_lock);
882 882
883 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && 883 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
884 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); 884 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
@@ -943,9 +943,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
943 if (retval < 0) 943 if (retval < 0)
944 return retval; 944 return retval;
945 945
946 mutex_lock(&callback_mutex); 946 spin_lock_irq(&callback_lock);
947 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); 947 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
948 mutex_unlock(&callback_mutex); 948 spin_unlock_irq(&callback_lock);
949 949
950 /* use trialcs->cpus_allowed as a temp variable */ 950 /* use trialcs->cpus_allowed as a temp variable */
951 update_cpumasks_hier(cs, trialcs->cpus_allowed); 951 update_cpumasks_hier(cs, trialcs->cpus_allowed);
@@ -1132,9 +1132,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
1132 continue; 1132 continue;
1133 rcu_read_unlock(); 1133 rcu_read_unlock();
1134 1134
1135 mutex_lock(&callback_mutex); 1135 spin_lock_irq(&callback_lock);
1136 cp->effective_mems = *new_mems; 1136 cp->effective_mems = *new_mems;
1137 mutex_unlock(&callback_mutex); 1137 spin_unlock_irq(&callback_lock);
1138 1138
1139 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && 1139 WARN_ON(!cgroup_on_dfl(cp->css.cgroup) &&
1140 !nodes_equal(cp->mems_allowed, cp->effective_mems)); 1140 !nodes_equal(cp->mems_allowed, cp->effective_mems));
@@ -1155,7 +1155,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
1155 * mempolicies and if the cpuset is marked 'memory_migrate', 1155 * mempolicies and if the cpuset is marked 'memory_migrate',
1156 * migrate the tasks pages to the new memory. 1156 * migrate the tasks pages to the new memory.
1157 * 1157 *
1158 * Call with cpuset_mutex held. May take callback_mutex during call. 1158 * Call with cpuset_mutex held. May take callback_lock during call.
1159 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, 1159 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
1160 * lock each such tasks mm->mmap_sem, scan its vma's and rebind 1160 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
1161 * their mempolicies to the cpusets new mems_allowed. 1161 * their mempolicies to the cpusets new mems_allowed.
@@ -1202,9 +1202,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1202 if (retval < 0) 1202 if (retval < 0)
1203 goto done; 1203 goto done;
1204 1204
1205 mutex_lock(&callback_mutex); 1205 spin_lock_irq(&callback_lock);
1206 cs->mems_allowed = trialcs->mems_allowed; 1206 cs->mems_allowed = trialcs->mems_allowed;
1207 mutex_unlock(&callback_mutex); 1207 spin_unlock_irq(&callback_lock);
1208 1208
1209 /* use trialcs->mems_allowed as a temp variable */ 1209 /* use trialcs->mems_allowed as a temp variable */
1210 update_nodemasks_hier(cs, &cs->mems_allowed); 1210 update_nodemasks_hier(cs, &cs->mems_allowed);
@@ -1295,9 +1295,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1295 spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) 1295 spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
1296 || (is_spread_page(cs) != is_spread_page(trialcs))); 1296 || (is_spread_page(cs) != is_spread_page(trialcs)));
1297 1297
1298 mutex_lock(&callback_mutex); 1298 spin_lock_irq(&callback_lock);
1299 cs->flags = trialcs->flags; 1299 cs->flags = trialcs->flags;
1300 mutex_unlock(&callback_mutex); 1300 spin_unlock_irq(&callback_lock);
1301 1301
1302 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) 1302 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
1303 rebuild_sched_domains_locked(); 1303 rebuild_sched_domains_locked();
@@ -1713,7 +1713,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
1713 count = seq_get_buf(sf, &buf); 1713 count = seq_get_buf(sf, &buf);
1714 s = buf; 1714 s = buf;
1715 1715
1716 mutex_lock(&callback_mutex); 1716 spin_lock_irq(&callback_lock);
1717 1717
1718 switch (type) { 1718 switch (type) {
1719 case FILE_CPULIST: 1719 case FILE_CPULIST:
@@ -1740,7 +1740,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
1740 seq_commit(sf, -1); 1740 seq_commit(sf, -1);
1741 } 1741 }
1742out_unlock: 1742out_unlock:
1743 mutex_unlock(&callback_mutex); 1743 spin_unlock_irq(&callback_lock);
1744 return ret; 1744 return ret;
1745} 1745}
1746 1746
@@ -1957,12 +1957,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
1957 1957
1958 cpuset_inc(); 1958 cpuset_inc();
1959 1959
1960 mutex_lock(&callback_mutex); 1960 spin_lock_irq(&callback_lock);
1961 if (cgroup_on_dfl(cs->css.cgroup)) { 1961 if (cgroup_on_dfl(cs->css.cgroup)) {
1962 cpumask_copy(cs->effective_cpus, parent->effective_cpus); 1962 cpumask_copy(cs->effective_cpus, parent->effective_cpus);
1963 cs->effective_mems = parent->effective_mems; 1963 cs->effective_mems = parent->effective_mems;
1964 } 1964 }
1965 mutex_unlock(&callback_mutex); 1965 spin_unlock_irq(&callback_lock);
1966 1966
1967 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) 1967 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
1968 goto out_unlock; 1968 goto out_unlock;
@@ -1989,10 +1989,10 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
1989 } 1989 }
1990 rcu_read_unlock(); 1990 rcu_read_unlock();
1991 1991
1992 mutex_lock(&callback_mutex); 1992 spin_lock_irq(&callback_lock);
1993 cs->mems_allowed = parent->mems_allowed; 1993 cs->mems_allowed = parent->mems_allowed;
1994 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); 1994 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
1995 mutex_unlock(&callback_mutex); 1995 spin_lock_irq(&callback_lock);
1996out_unlock: 1996out_unlock:
1997 mutex_unlock(&cpuset_mutex); 1997 mutex_unlock(&cpuset_mutex);
1998 return 0; 1998 return 0;
@@ -2031,7 +2031,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
2031static void cpuset_bind(struct cgroup_subsys_state *root_css) 2031static void cpuset_bind(struct cgroup_subsys_state *root_css)
2032{ 2032{
2033 mutex_lock(&cpuset_mutex); 2033 mutex_lock(&cpuset_mutex);
2034 mutex_lock(&callback_mutex); 2034 spin_lock_irq(&callback_lock);
2035 2035
2036 if (cgroup_on_dfl(root_css->cgroup)) { 2036 if (cgroup_on_dfl(root_css->cgroup)) {
2037 cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); 2037 cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
@@ -2042,7 +2042,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
2042 top_cpuset.mems_allowed = top_cpuset.effective_mems; 2042 top_cpuset.mems_allowed = top_cpuset.effective_mems;
2043 } 2043 }
2044 2044
2045 mutex_unlock(&callback_mutex); 2045 spin_unlock_irq(&callback_lock);
2046 mutex_unlock(&cpuset_mutex); 2046 mutex_unlock(&cpuset_mutex);
2047} 2047}
2048 2048
@@ -2127,12 +2127,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
2127{ 2127{
2128 bool is_empty; 2128 bool is_empty;
2129 2129
2130 mutex_lock(&callback_mutex); 2130 spin_lock_irq(&callback_lock);
2131 cpumask_copy(cs->cpus_allowed, new_cpus); 2131 cpumask_copy(cs->cpus_allowed, new_cpus);
2132 cpumask_copy(cs->effective_cpus, new_cpus); 2132 cpumask_copy(cs->effective_cpus, new_cpus);
2133 cs->mems_allowed = *new_mems; 2133 cs->mems_allowed = *new_mems;
2134 cs->effective_mems = *new_mems; 2134 cs->effective_mems = *new_mems;
2135 mutex_unlock(&callback_mutex); 2135 spin_unlock_irq(&callback_lock);
2136 2136
2137 /* 2137 /*
2138 * Don't call update_tasks_cpumask() if the cpuset becomes empty, 2138 * Don't call update_tasks_cpumask() if the cpuset becomes empty,
@@ -2169,10 +2169,10 @@ hotplug_update_tasks(struct cpuset *cs,
2169 if (nodes_empty(*new_mems)) 2169 if (nodes_empty(*new_mems))
2170 *new_mems = parent_cs(cs)->effective_mems; 2170 *new_mems = parent_cs(cs)->effective_mems;
2171 2171
2172 mutex_lock(&callback_mutex); 2172 spin_lock_irq(&callback_lock);
2173 cpumask_copy(cs->effective_cpus, new_cpus); 2173 cpumask_copy(cs->effective_cpus, new_cpus);
2174 cs->effective_mems = *new_mems; 2174 cs->effective_mems = *new_mems;
2175 mutex_unlock(&callback_mutex); 2175 spin_unlock_irq(&callback_lock);
2176 2176
2177 if (cpus_updated) 2177 if (cpus_updated)
2178 update_tasks_cpumask(cs); 2178 update_tasks_cpumask(cs);
@@ -2258,21 +2258,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
2258 2258
2259 /* synchronize cpus_allowed to cpu_active_mask */ 2259 /* synchronize cpus_allowed to cpu_active_mask */
2260 if (cpus_updated) { 2260 if (cpus_updated) {
2261 mutex_lock(&callback_mutex); 2261 spin_lock_irq(&callback_lock);
2262 if (!on_dfl) 2262 if (!on_dfl)
2263 cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); 2263 cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
2264 cpumask_copy(top_cpuset.effective_cpus, &new_cpus); 2264 cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
2265 mutex_unlock(&callback_mutex); 2265 spin_unlock_irq(&callback_lock);
2266 /* we don't mess with cpumasks of tasks in top_cpuset */ 2266 /* we don't mess with cpumasks of tasks in top_cpuset */
2267 } 2267 }
2268 2268
2269 /* synchronize mems_allowed to N_MEMORY */ 2269 /* synchronize mems_allowed to N_MEMORY */
2270 if (mems_updated) { 2270 if (mems_updated) {
2271 mutex_lock(&callback_mutex); 2271 spin_lock_irq(&callback_lock);
2272 if (!on_dfl) 2272 if (!on_dfl)
2273 top_cpuset.mems_allowed = new_mems; 2273 top_cpuset.mems_allowed = new_mems;
2274 top_cpuset.effective_mems = new_mems; 2274 top_cpuset.effective_mems = new_mems;
2275 mutex_unlock(&callback_mutex); 2275 spin_unlock_irq(&callback_lock);
2276 update_tasks_nodemask(&top_cpuset); 2276 update_tasks_nodemask(&top_cpuset);
2277 } 2277 }
2278 2278
@@ -2365,11 +2365,13 @@ void __init cpuset_init_smp(void)
2365 2365
2366void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) 2366void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2367{ 2367{
2368 mutex_lock(&callback_mutex); 2368 unsigned long flags;
2369
2370 spin_lock_irqsave(&callback_lock, flags);
2369 rcu_read_lock(); 2371 rcu_read_lock();
2370 guarantee_online_cpus(task_cs(tsk), pmask); 2372 guarantee_online_cpus(task_cs(tsk), pmask);
2371 rcu_read_unlock(); 2373 rcu_read_unlock();
2372 mutex_unlock(&callback_mutex); 2374 spin_unlock_irqrestore(&callback_lock, flags);
2373} 2375}
2374 2376
2375void cpuset_cpus_allowed_fallback(struct task_struct *tsk) 2377void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
@@ -2415,12 +2417,13 @@ void cpuset_init_current_mems_allowed(void)
2415nodemask_t cpuset_mems_allowed(struct task_struct *tsk) 2417nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
2416{ 2418{
2417 nodemask_t mask; 2419 nodemask_t mask;
2420 unsigned long flags;
2418 2421
2419 mutex_lock(&callback_mutex); 2422 spin_lock_irqsave(&callback_lock, flags);
2420 rcu_read_lock(); 2423 rcu_read_lock();
2421 guarantee_online_mems(task_cs(tsk), &mask); 2424 guarantee_online_mems(task_cs(tsk), &mask);
2422 rcu_read_unlock(); 2425 rcu_read_unlock();
2423 mutex_unlock(&callback_mutex); 2426 spin_unlock_irqrestore(&callback_lock, flags);
2424 2427
2425 return mask; 2428 return mask;
2426} 2429}
@@ -2439,7 +2442,7 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
2439/* 2442/*
2440 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or 2443 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
2441 * mem_hardwall ancestor to the specified cpuset. Call holding 2444 * mem_hardwall ancestor to the specified cpuset. Call holding
2442 * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall 2445 * callback_lock. If no ancestor is mem_exclusive or mem_hardwall
2443 * (an unusual configuration), then returns the root cpuset. 2446 * (an unusual configuration), then returns the root cpuset.
2444 */ 2447 */
2445static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) 2448static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
@@ -2481,13 +2484,12 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
2481 * GFP_KERNEL allocations are not so marked, so can escape to the 2484 * GFP_KERNEL allocations are not so marked, so can escape to the
2482 * nearest enclosing hardwalled ancestor cpuset. 2485 * nearest enclosing hardwalled ancestor cpuset.
2483 * 2486 *
2484 * Scanning up parent cpusets requires callback_mutex. The 2487 * Scanning up parent cpusets requires callback_lock. The
2485 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit 2488 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
2486 * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the 2489 * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
2487 * current tasks mems_allowed came up empty on the first pass over 2490 * current tasks mems_allowed came up empty on the first pass over
2488 * the zonelist. So only GFP_KERNEL allocations, if all nodes in the 2491 * the zonelist. So only GFP_KERNEL allocations, if all nodes in the
2489 * cpuset are short of memory, might require taking the callback_mutex 2492 * cpuset are short of memory, might require taking the callback_lock.
2490 * mutex.
2491 * 2493 *
2492 * The first call here from mm/page_alloc:get_page_from_freelist() 2494 * The first call here from mm/page_alloc:get_page_from_freelist()
2493 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, 2495 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
@@ -2514,6 +2516,7 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
2514{ 2516{
2515 struct cpuset *cs; /* current cpuset ancestors */ 2517 struct cpuset *cs; /* current cpuset ancestors */
2516 int allowed; /* is allocation in zone z allowed? */ 2518 int allowed; /* is allocation in zone z allowed? */
2519 unsigned long flags;
2517 2520
2518 if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) 2521 if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
2519 return 1; 2522 return 1;
@@ -2533,14 +2536,14 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
2533 return 1; 2536 return 1;
2534 2537
2535 /* Not hardwall and node outside mems_allowed: scan up cpusets */ 2538 /* Not hardwall and node outside mems_allowed: scan up cpusets */
2536 mutex_lock(&callback_mutex); 2539 spin_lock_irqsave(&callback_lock, flags);
2537 2540
2538 rcu_read_lock(); 2541 rcu_read_lock();
2539 cs = nearest_hardwall_ancestor(task_cs(current)); 2542 cs = nearest_hardwall_ancestor(task_cs(current));
2540 allowed = node_isset(node, cs->mems_allowed); 2543 allowed = node_isset(node, cs->mems_allowed);
2541 rcu_read_unlock(); 2544 rcu_read_unlock();
2542 2545
2543 mutex_unlock(&callback_mutex); 2546 spin_unlock_irqrestore(&callback_lock, flags);
2544 return allowed; 2547 return allowed;
2545} 2548}
2546 2549