diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-11 21:57:19 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-11 21:57:19 -0500 |
commit | 2756d373a3f45a3a9ebf4ac389f9e0e02bd35a93 (patch) | |
tree | e248c5adccb3045f96b3cfe0a1ffeb37bb81e4cb /kernel/cpuset.c | |
parent | 4e8790f77f051d4cc745a57b48a73052521e8dfc (diff) | |
parent | eeecbd1971517103e06f11750dd1a9a1dc37e4e6 (diff) |
Merge branch 'for-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup update from Tejun Heo:
"cpuset got simplified a bit. cgroup core got a fix on unified
hierarchy and grew some effective css related interfaces which will be
used for blkio support for writeback IO traffic which is currently
being worked on"
* 'for-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: implement cgroup_get_e_css()
cgroup: add cgroup_subsys->css_e_css_changed()
cgroup: add cgroup_subsys->css_released()
cgroup: fix the async css offline wait logic in cgroup_subtree_control_write()
cgroup: restructure child_subsys_mask handling in cgroup_subtree_control_write()
cgroup: separate out cgroup_calc_child_subsys_mask() from cgroup_refresh_child_subsys_mask()
cpuset: lock vs unlock typo
cpuset: simplify cpuset_node_allowed API
cpuset: convert callback_mutex to a spinlock
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 162 |
1 files changed, 57 insertions, 105 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 723cfc9d0ad7..64b257f6bca2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -248,34 +248,34 @@ static struct cpuset top_cpuset = { | |||
248 | if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) | 248 | if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) |
249 | 249 | ||
250 | /* | 250 | /* |
251 | * There are two global mutexes guarding cpuset structures - cpuset_mutex | 251 | * There are two global locks guarding cpuset structures - cpuset_mutex and |
252 | * and callback_mutex. The latter may nest inside the former. We also | 252 | * callback_lock. We also require taking task_lock() when dereferencing a |
253 | * require taking task_lock() when dereferencing a task's cpuset pointer. | 253 | * task's cpuset pointer. See "The task_lock() exception", at the end of this |
254 | * See "The task_lock() exception", at the end of this comment. | 254 | * comment. |
255 | * | 255 | * |
256 | * A task must hold both mutexes to modify cpusets. If a task holds | 256 | * A task must hold both locks to modify cpusets. If a task holds |
257 | * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it | 257 | * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it |
258 | * is the only task able to also acquire callback_mutex and be able to | 258 | * is the only task able to also acquire callback_lock and be able to |
259 | * modify cpusets. It can perform various checks on the cpuset structure | 259 | * modify cpusets. It can perform various checks on the cpuset structure |
260 | * first, knowing nothing will change. It can also allocate memory while | 260 | * first, knowing nothing will change. It can also allocate memory while |
261 | * just holding cpuset_mutex. While it is performing these checks, various | 261 | * just holding cpuset_mutex. While it is performing these checks, various |
262 | * callback routines can briefly acquire callback_mutex to query cpusets. | 262 | * callback routines can briefly acquire callback_lock to query cpusets. |
263 | * Once it is ready to make the changes, it takes callback_mutex, blocking | 263 | * Once it is ready to make the changes, it takes callback_lock, blocking |
264 | * everyone else. | 264 | * everyone else. |
265 | * | 265 | * |
266 | * Calls to the kernel memory allocator can not be made while holding | 266 | * Calls to the kernel memory allocator can not be made while holding |
267 | * callback_mutex, as that would risk double tripping on callback_mutex | 267 | * callback_lock, as that would risk double tripping on callback_lock |
268 | * from one of the callbacks into the cpuset code from within | 268 | * from one of the callbacks into the cpuset code from within |
269 | * __alloc_pages(). | 269 | * __alloc_pages(). |
270 | * | 270 | * |
271 | * If a task is only holding callback_mutex, then it has read-only | 271 | * If a task is only holding callback_lock, then it has read-only |
272 | * access to cpusets. | 272 | * access to cpusets. |
273 | * | 273 | * |
274 | * Now, the task_struct fields mems_allowed and mempolicy may be changed | 274 | * Now, the task_struct fields mems_allowed and mempolicy may be changed |
275 | * by other task, we use alloc_lock in the task_struct fields to protect | 275 | * by other task, we use alloc_lock in the task_struct fields to protect |
276 | * them. | 276 | * them. |
277 | * | 277 | * |
278 | * The cpuset_common_file_read() handlers only hold callback_mutex across | 278 | * The cpuset_common_file_read() handlers only hold callback_lock across |
279 | * small pieces of code, such as when reading out possibly multi-word | 279 | * small pieces of code, such as when reading out possibly multi-word |
280 | * cpumasks and nodemasks. | 280 | * cpumasks and nodemasks. |
281 | * | 281 | * |
@@ -284,7 +284,7 @@ static struct cpuset top_cpuset = { | |||
284 | */ | 284 | */ |
285 | 285 | ||
286 | static DEFINE_MUTEX(cpuset_mutex); | 286 | static DEFINE_MUTEX(cpuset_mutex); |
287 | static DEFINE_MUTEX(callback_mutex); | 287 | static DEFINE_SPINLOCK(callback_lock); |
288 | 288 | ||
289 | /* | 289 | /* |
290 | * CPU / memory hotplug is handled asynchronously. | 290 | * CPU / memory hotplug is handled asynchronously. |
@@ -329,7 +329,7 @@ static struct file_system_type cpuset_fs_type = { | |||
329 | * One way or another, we guarantee to return some non-empty subset | 329 | * One way or another, we guarantee to return some non-empty subset |
330 | * of cpu_online_mask. | 330 | * of cpu_online_mask. |
331 | * | 331 | * |
332 | * Call with callback_mutex held. | 332 | * Call with callback_lock or cpuset_mutex held. |
333 | */ | 333 | */ |
334 | static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) | 334 | static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) |
335 | { | 335 | { |
@@ -347,7 +347,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) | |||
347 | * One way or another, we guarantee to return some non-empty subset | 347 | * One way or another, we guarantee to return some non-empty subset |
348 | * of node_states[N_MEMORY]. | 348 | * of node_states[N_MEMORY]. |
349 | * | 349 | * |
350 | * Call with callback_mutex held. | 350 | * Call with callback_lock or cpuset_mutex held. |
351 | */ | 351 | */ |
352 | static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) | 352 | static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) |
353 | { | 353 | { |
@@ -359,7 +359,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) | |||
359 | /* | 359 | /* |
360 | * update task's spread flag if cpuset's page/slab spread flag is set | 360 | * update task's spread flag if cpuset's page/slab spread flag is set |
361 | * | 361 | * |
362 | * Called with callback_mutex/cpuset_mutex held | 362 | * Call with callback_lock or cpuset_mutex held. |
363 | */ | 363 | */ |
364 | static void cpuset_update_task_spread_flag(struct cpuset *cs, | 364 | static void cpuset_update_task_spread_flag(struct cpuset *cs, |
365 | struct task_struct *tsk) | 365 | struct task_struct *tsk) |
@@ -886,9 +886,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) | |||
886 | continue; | 886 | continue; |
887 | rcu_read_unlock(); | 887 | rcu_read_unlock(); |
888 | 888 | ||
889 | mutex_lock(&callback_mutex); | 889 | spin_lock_irq(&callback_lock); |
890 | cpumask_copy(cp->effective_cpus, new_cpus); | 890 | cpumask_copy(cp->effective_cpus, new_cpus); |
891 | mutex_unlock(&callback_mutex); | 891 | spin_unlock_irq(&callback_lock); |
892 | 892 | ||
893 | WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && | 893 | WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && |
894 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); | 894 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); |
@@ -953,9 +953,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |||
953 | if (retval < 0) | 953 | if (retval < 0) |
954 | return retval; | 954 | return retval; |
955 | 955 | ||
956 | mutex_lock(&callback_mutex); | 956 | spin_lock_irq(&callback_lock); |
957 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); | 957 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); |
958 | mutex_unlock(&callback_mutex); | 958 | spin_unlock_irq(&callback_lock); |
959 | 959 | ||
960 | /* use trialcs->cpus_allowed as a temp variable */ | 960 | /* use trialcs->cpus_allowed as a temp variable */ |
961 | update_cpumasks_hier(cs, trialcs->cpus_allowed); | 961 | update_cpumasks_hier(cs, trialcs->cpus_allowed); |
@@ -1142,9 +1142,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) | |||
1142 | continue; | 1142 | continue; |
1143 | rcu_read_unlock(); | 1143 | rcu_read_unlock(); |
1144 | 1144 | ||
1145 | mutex_lock(&callback_mutex); | 1145 | spin_lock_irq(&callback_lock); |
1146 | cp->effective_mems = *new_mems; | 1146 | cp->effective_mems = *new_mems; |
1147 | mutex_unlock(&callback_mutex); | 1147 | spin_unlock_irq(&callback_lock); |
1148 | 1148 | ||
1149 | WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && | 1149 | WARN_ON(!cgroup_on_dfl(cp->css.cgroup) && |
1150 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); | 1150 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); |
@@ -1165,7 +1165,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) | |||
1165 | * mempolicies and if the cpuset is marked 'memory_migrate', | 1165 | * mempolicies and if the cpuset is marked 'memory_migrate', |
1166 | * migrate the tasks pages to the new memory. | 1166 | * migrate the tasks pages to the new memory. |
1167 | * | 1167 | * |
1168 | * Call with cpuset_mutex held. May take callback_mutex during call. | 1168 | * Call with cpuset_mutex held. May take callback_lock during call. |
1169 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, | 1169 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, |
1170 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | 1170 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind |
1171 | * their mempolicies to the cpusets new mems_allowed. | 1171 | * their mempolicies to the cpusets new mems_allowed. |
@@ -1212,9 +1212,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |||
1212 | if (retval < 0) | 1212 | if (retval < 0) |
1213 | goto done; | 1213 | goto done; |
1214 | 1214 | ||
1215 | mutex_lock(&callback_mutex); | 1215 | spin_lock_irq(&callback_lock); |
1216 | cs->mems_allowed = trialcs->mems_allowed; | 1216 | cs->mems_allowed = trialcs->mems_allowed; |
1217 | mutex_unlock(&callback_mutex); | 1217 | spin_unlock_irq(&callback_lock); |
1218 | 1218 | ||
1219 | /* use trialcs->mems_allowed as a temp variable */ | 1219 | /* use trialcs->mems_allowed as a temp variable */ |
1220 | update_nodemasks_hier(cs, &cs->mems_allowed); | 1220 | update_nodemasks_hier(cs, &cs->mems_allowed); |
@@ -1305,9 +1305,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | |||
1305 | spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) | 1305 | spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) |
1306 | || (is_spread_page(cs) != is_spread_page(trialcs))); | 1306 | || (is_spread_page(cs) != is_spread_page(trialcs))); |
1307 | 1307 | ||
1308 | mutex_lock(&callback_mutex); | 1308 | spin_lock_irq(&callback_lock); |
1309 | cs->flags = trialcs->flags; | 1309 | cs->flags = trialcs->flags; |
1310 | mutex_unlock(&callback_mutex); | 1310 | spin_unlock_irq(&callback_lock); |
1311 | 1311 | ||
1312 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) | 1312 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) |
1313 | rebuild_sched_domains_locked(); | 1313 | rebuild_sched_domains_locked(); |
@@ -1714,7 +1714,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |||
1714 | count = seq_get_buf(sf, &buf); | 1714 | count = seq_get_buf(sf, &buf); |
1715 | s = buf; | 1715 | s = buf; |
1716 | 1716 | ||
1717 | mutex_lock(&callback_mutex); | 1717 | spin_lock_irq(&callback_lock); |
1718 | 1718 | ||
1719 | switch (type) { | 1719 | switch (type) { |
1720 | case FILE_CPULIST: | 1720 | case FILE_CPULIST: |
@@ -1741,7 +1741,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |||
1741 | seq_commit(sf, -1); | 1741 | seq_commit(sf, -1); |
1742 | } | 1742 | } |
1743 | out_unlock: | 1743 | out_unlock: |
1744 | mutex_unlock(&callback_mutex); | 1744 | spin_unlock_irq(&callback_lock); |
1745 | return ret; | 1745 | return ret; |
1746 | } | 1746 | } |
1747 | 1747 | ||
@@ -1958,12 +1958,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) | |||
1958 | 1958 | ||
1959 | cpuset_inc(); | 1959 | cpuset_inc(); |
1960 | 1960 | ||
1961 | mutex_lock(&callback_mutex); | 1961 | spin_lock_irq(&callback_lock); |
1962 | if (cgroup_on_dfl(cs->css.cgroup)) { | 1962 | if (cgroup_on_dfl(cs->css.cgroup)) { |
1963 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); | 1963 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); |
1964 | cs->effective_mems = parent->effective_mems; | 1964 | cs->effective_mems = parent->effective_mems; |
1965 | } | 1965 | } |
1966 | mutex_unlock(&callback_mutex); | 1966 | spin_unlock_irq(&callback_lock); |
1967 | 1967 | ||
1968 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) | 1968 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) |
1969 | goto out_unlock; | 1969 | goto out_unlock; |
@@ -1990,10 +1990,10 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) | |||
1990 | } | 1990 | } |
1991 | rcu_read_unlock(); | 1991 | rcu_read_unlock(); |
1992 | 1992 | ||
1993 | mutex_lock(&callback_mutex); | 1993 | spin_lock_irq(&callback_lock); |
1994 | cs->mems_allowed = parent->mems_allowed; | 1994 | cs->mems_allowed = parent->mems_allowed; |
1995 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); | 1995 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); |
1996 | mutex_unlock(&callback_mutex); | 1996 | spin_unlock_irq(&callback_lock); |
1997 | out_unlock: | 1997 | out_unlock: |
1998 | mutex_unlock(&cpuset_mutex); | 1998 | mutex_unlock(&cpuset_mutex); |
1999 | return 0; | 1999 | return 0; |
@@ -2032,7 +2032,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) | |||
2032 | static void cpuset_bind(struct cgroup_subsys_state *root_css) | 2032 | static void cpuset_bind(struct cgroup_subsys_state *root_css) |
2033 | { | 2033 | { |
2034 | mutex_lock(&cpuset_mutex); | 2034 | mutex_lock(&cpuset_mutex); |
2035 | mutex_lock(&callback_mutex); | 2035 | spin_lock_irq(&callback_lock); |
2036 | 2036 | ||
2037 | if (cgroup_on_dfl(root_css->cgroup)) { | 2037 | if (cgroup_on_dfl(root_css->cgroup)) { |
2038 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); | 2038 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); |
@@ -2043,7 +2043,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) | |||
2043 | top_cpuset.mems_allowed = top_cpuset.effective_mems; | 2043 | top_cpuset.mems_allowed = top_cpuset.effective_mems; |
2044 | } | 2044 | } |
2045 | 2045 | ||
2046 | mutex_unlock(&callback_mutex); | 2046 | spin_unlock_irq(&callback_lock); |
2047 | mutex_unlock(&cpuset_mutex); | 2047 | mutex_unlock(&cpuset_mutex); |
2048 | } | 2048 | } |
2049 | 2049 | ||
@@ -2128,12 +2128,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs, | |||
2128 | { | 2128 | { |
2129 | bool is_empty; | 2129 | bool is_empty; |
2130 | 2130 | ||
2131 | mutex_lock(&callback_mutex); | 2131 | spin_lock_irq(&callback_lock); |
2132 | cpumask_copy(cs->cpus_allowed, new_cpus); | 2132 | cpumask_copy(cs->cpus_allowed, new_cpus); |
2133 | cpumask_copy(cs->effective_cpus, new_cpus); | 2133 | cpumask_copy(cs->effective_cpus, new_cpus); |
2134 | cs->mems_allowed = *new_mems; | 2134 | cs->mems_allowed = *new_mems; |
2135 | cs->effective_mems = *new_mems; | 2135 | cs->effective_mems = *new_mems; |
2136 | mutex_unlock(&callback_mutex); | 2136 | spin_unlock_irq(&callback_lock); |
2137 | 2137 | ||
2138 | /* | 2138 | /* |
2139 | * Don't call update_tasks_cpumask() if the cpuset becomes empty, | 2139 | * Don't call update_tasks_cpumask() if the cpuset becomes empty, |
@@ -2170,10 +2170,10 @@ hotplug_update_tasks(struct cpuset *cs, | |||
2170 | if (nodes_empty(*new_mems)) | 2170 | if (nodes_empty(*new_mems)) |
2171 | *new_mems = parent_cs(cs)->effective_mems; | 2171 | *new_mems = parent_cs(cs)->effective_mems; |
2172 | 2172 | ||
2173 | mutex_lock(&callback_mutex); | 2173 | spin_lock_irq(&callback_lock); |
2174 | cpumask_copy(cs->effective_cpus, new_cpus); | 2174 | cpumask_copy(cs->effective_cpus, new_cpus); |
2175 | cs->effective_mems = *new_mems; | 2175 | cs->effective_mems = *new_mems; |
2176 | mutex_unlock(&callback_mutex); | 2176 | spin_unlock_irq(&callback_lock); |
2177 | 2177 | ||
2178 | if (cpus_updated) | 2178 | if (cpus_updated) |
2179 | update_tasks_cpumask(cs); | 2179 | update_tasks_cpumask(cs); |
@@ -2259,21 +2259,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
2259 | 2259 | ||
2260 | /* synchronize cpus_allowed to cpu_active_mask */ | 2260 | /* synchronize cpus_allowed to cpu_active_mask */ |
2261 | if (cpus_updated) { | 2261 | if (cpus_updated) { |
2262 | mutex_lock(&callback_mutex); | 2262 | spin_lock_irq(&callback_lock); |
2263 | if (!on_dfl) | 2263 | if (!on_dfl) |
2264 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); | 2264 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); |
2265 | cpumask_copy(top_cpuset.effective_cpus, &new_cpus); | 2265 | cpumask_copy(top_cpuset.effective_cpus, &new_cpus); |
2266 | mutex_unlock(&callback_mutex); | 2266 | spin_unlock_irq(&callback_lock); |
2267 | /* we don't mess with cpumasks of tasks in top_cpuset */ | 2267 | /* we don't mess with cpumasks of tasks in top_cpuset */ |
2268 | } | 2268 | } |
2269 | 2269 | ||
2270 | /* synchronize mems_allowed to N_MEMORY */ | 2270 | /* synchronize mems_allowed to N_MEMORY */ |
2271 | if (mems_updated) { | 2271 | if (mems_updated) { |
2272 | mutex_lock(&callback_mutex); | 2272 | spin_lock_irq(&callback_lock); |
2273 | if (!on_dfl) | 2273 | if (!on_dfl) |
2274 | top_cpuset.mems_allowed = new_mems; | 2274 | top_cpuset.mems_allowed = new_mems; |
2275 | top_cpuset.effective_mems = new_mems; | 2275 | top_cpuset.effective_mems = new_mems; |
2276 | mutex_unlock(&callback_mutex); | 2276 | spin_unlock_irq(&callback_lock); |
2277 | update_tasks_nodemask(&top_cpuset); | 2277 | update_tasks_nodemask(&top_cpuset); |
2278 | } | 2278 | } |
2279 | 2279 | ||
@@ -2366,11 +2366,13 @@ void __init cpuset_init_smp(void) | |||
2366 | 2366 | ||
2367 | void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) | 2367 | void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) |
2368 | { | 2368 | { |
2369 | mutex_lock(&callback_mutex); | 2369 | unsigned long flags; |
2370 | |||
2371 | spin_lock_irqsave(&callback_lock, flags); | ||
2370 | rcu_read_lock(); | 2372 | rcu_read_lock(); |
2371 | guarantee_online_cpus(task_cs(tsk), pmask); | 2373 | guarantee_online_cpus(task_cs(tsk), pmask); |
2372 | rcu_read_unlock(); | 2374 | rcu_read_unlock(); |
2373 | mutex_unlock(&callback_mutex); | 2375 | spin_unlock_irqrestore(&callback_lock, flags); |
2374 | } | 2376 | } |
2375 | 2377 | ||
2376 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) | 2378 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) |
@@ -2416,12 +2418,13 @@ void cpuset_init_current_mems_allowed(void) | |||
2416 | nodemask_t cpuset_mems_allowed(struct task_struct *tsk) | 2418 | nodemask_t cpuset_mems_allowed(struct task_struct *tsk) |
2417 | { | 2419 | { |
2418 | nodemask_t mask; | 2420 | nodemask_t mask; |
2421 | unsigned long flags; | ||
2419 | 2422 | ||
2420 | mutex_lock(&callback_mutex); | 2423 | spin_lock_irqsave(&callback_lock, flags); |
2421 | rcu_read_lock(); | 2424 | rcu_read_lock(); |
2422 | guarantee_online_mems(task_cs(tsk), &mask); | 2425 | guarantee_online_mems(task_cs(tsk), &mask); |
2423 | rcu_read_unlock(); | 2426 | rcu_read_unlock(); |
2424 | mutex_unlock(&callback_mutex); | 2427 | spin_unlock_irqrestore(&callback_lock, flags); |
2425 | 2428 | ||
2426 | return mask; | 2429 | return mask; |
2427 | } | 2430 | } |
@@ -2440,7 +2443,7 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) | |||
2440 | /* | 2443 | /* |
2441 | * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or | 2444 | * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or |
2442 | * mem_hardwall ancestor to the specified cpuset. Call holding | 2445 | * mem_hardwall ancestor to the specified cpuset. Call holding |
2443 | * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall | 2446 | * callback_lock. If no ancestor is mem_exclusive or mem_hardwall |
2444 | * (an unusual configuration), then returns the root cpuset. | 2447 | * (an unusual configuration), then returns the root cpuset. |
2445 | */ | 2448 | */ |
2446 | static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) | 2449 | static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) |
@@ -2451,7 +2454,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) | |||
2451 | } | 2454 | } |
2452 | 2455 | ||
2453 | /** | 2456 | /** |
2454 | * cpuset_node_allowed_softwall - Can we allocate on a memory node? | 2457 | * cpuset_node_allowed - Can we allocate on a memory node? |
2455 | * @node: is this an allowed node? | 2458 | * @node: is this an allowed node? |
2456 | * @gfp_mask: memory allocation flags | 2459 | * @gfp_mask: memory allocation flags |
2457 | * | 2460 | * |
@@ -2463,13 +2466,6 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) | |||
2463 | * flag, yes. | 2466 | * flag, yes. |
2464 | * Otherwise, no. | 2467 | * Otherwise, no. |
2465 | * | 2468 | * |
2466 | * If __GFP_HARDWALL is set, cpuset_node_allowed_softwall() reduces to | ||
2467 | * cpuset_node_allowed_hardwall(). Otherwise, cpuset_node_allowed_softwall() | ||
2468 | * might sleep, and might allow a node from an enclosing cpuset. | ||
2469 | * | ||
2470 | * cpuset_node_allowed_hardwall() only handles the simpler case of hardwall | ||
2471 | * cpusets, and never sleeps. | ||
2472 | * | ||
2473 | * The __GFP_THISNODE placement logic is really handled elsewhere, | 2469 | * The __GFP_THISNODE placement logic is really handled elsewhere, |
2474 | * by forcibly using a zonelist starting at a specified node, and by | 2470 | * by forcibly using a zonelist starting at a specified node, and by |
2475 | * (in get_page_from_freelist()) refusing to consider the zones for | 2471 | * (in get_page_from_freelist()) refusing to consider the zones for |
@@ -2482,13 +2478,12 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) | |||
2482 | * GFP_KERNEL allocations are not so marked, so can escape to the | 2478 | * GFP_KERNEL allocations are not so marked, so can escape to the |
2483 | * nearest enclosing hardwalled ancestor cpuset. | 2479 | * nearest enclosing hardwalled ancestor cpuset. |
2484 | * | 2480 | * |
2485 | * Scanning up parent cpusets requires callback_mutex. The | 2481 | * Scanning up parent cpusets requires callback_lock. The |
2486 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit | 2482 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit |
2487 | * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the | 2483 | * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the |
2488 | * current tasks mems_allowed came up empty on the first pass over | 2484 | * current tasks mems_allowed came up empty on the first pass over |
2489 | * the zonelist. So only GFP_KERNEL allocations, if all nodes in the | 2485 | * the zonelist. So only GFP_KERNEL allocations, if all nodes in the |
2490 | * cpuset are short of memory, might require taking the callback_mutex | 2486 | * cpuset are short of memory, might require taking the callback_lock. |
2491 | * mutex. | ||
2492 | * | 2487 | * |
2493 | * The first call here from mm/page_alloc:get_page_from_freelist() | 2488 | * The first call here from mm/page_alloc:get_page_from_freelist() |
2494 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, | 2489 | * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, |
@@ -2505,20 +2500,15 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) | |||
2505 | * TIF_MEMDIE - any node ok | 2500 | * TIF_MEMDIE - any node ok |
2506 | * GFP_KERNEL - any node in enclosing hardwalled cpuset ok | 2501 | * GFP_KERNEL - any node in enclosing hardwalled cpuset ok |
2507 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2502 | * GFP_USER - only nodes in current tasks mems allowed ok. |
2508 | * | ||
2509 | * Rule: | ||
2510 | * Don't call cpuset_node_allowed_softwall if you can't sleep, unless you | ||
2511 | * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables | ||
2512 | * the code that might scan up ancestor cpusets and sleep. | ||
2513 | */ | 2503 | */ |
2514 | int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) | 2504 | int __cpuset_node_allowed(int node, gfp_t gfp_mask) |
2515 | { | 2505 | { |
2516 | struct cpuset *cs; /* current cpuset ancestors */ | 2506 | struct cpuset *cs; /* current cpuset ancestors */ |
2517 | int allowed; /* is allocation in zone z allowed? */ | 2507 | int allowed; /* is allocation in zone z allowed? */ |
2508 | unsigned long flags; | ||
2518 | 2509 | ||
2519 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) | 2510 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) |
2520 | return 1; | 2511 | return 1; |
2521 | might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); | ||
2522 | if (node_isset(node, current->mems_allowed)) | 2512 | if (node_isset(node, current->mems_allowed)) |
2523 | return 1; | 2513 | return 1; |
2524 | /* | 2514 | /* |
@@ -2534,55 +2524,17 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) | |||
2534 | return 1; | 2524 | return 1; |
2535 | 2525 | ||
2536 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | 2526 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ |
2537 | mutex_lock(&callback_mutex); | 2527 | spin_lock_irqsave(&callback_lock, flags); |
2538 | 2528 | ||
2539 | rcu_read_lock(); | 2529 | rcu_read_lock(); |
2540 | cs = nearest_hardwall_ancestor(task_cs(current)); | 2530 | cs = nearest_hardwall_ancestor(task_cs(current)); |
2541 | allowed = node_isset(node, cs->mems_allowed); | 2531 | allowed = node_isset(node, cs->mems_allowed); |
2542 | rcu_read_unlock(); | 2532 | rcu_read_unlock(); |
2543 | 2533 | ||
2544 | mutex_unlock(&callback_mutex); | 2534 | spin_unlock_irqrestore(&callback_lock, flags); |
2545 | return allowed; | 2535 | return allowed; |
2546 | } | 2536 | } |
2547 | 2537 | ||
2548 | /* | ||
2549 | * cpuset_node_allowed_hardwall - Can we allocate on a memory node? | ||
2550 | * @node: is this an allowed node? | ||
2551 | * @gfp_mask: memory allocation flags | ||
2552 | * | ||
2553 | * If we're in interrupt, yes, we can always allocate. If __GFP_THISNODE is | ||
2554 | * set, yes, we can always allocate. If node is in our task's mems_allowed, | ||
2555 | * yes. If the task has been OOM killed and has access to memory reserves as | ||
2556 | * specified by the TIF_MEMDIE flag, yes. | ||
2557 | * Otherwise, no. | ||
2558 | * | ||
2559 | * The __GFP_THISNODE placement logic is really handled elsewhere, | ||
2560 | * by forcibly using a zonelist starting at a specified node, and by | ||
2561 | * (in get_page_from_freelist()) refusing to consider the zones for | ||
2562 | * any node on the zonelist except the first. By the time any such | ||
2563 | * calls get to this routine, we should just shut up and say 'yes'. | ||
2564 | * | ||
2565 | * Unlike the cpuset_node_allowed_softwall() variant, above, | ||
2566 | * this variant requires that the node be in the current task's | ||
2567 | * mems_allowed or that we're in interrupt. It does not scan up the | ||
2568 | * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset. | ||
2569 | * It never sleeps. | ||
2570 | */ | ||
2571 | int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) | ||
2572 | { | ||
2573 | if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) | ||
2574 | return 1; | ||
2575 | if (node_isset(node, current->mems_allowed)) | ||
2576 | return 1; | ||
2577 | /* | ||
2578 | * Allow tasks that have access to memory reserves because they have | ||
2579 | * been OOM killed to get memory anywhere. | ||
2580 | */ | ||
2581 | if (unlikely(test_thread_flag(TIF_MEMDIE))) | ||
2582 | return 1; | ||
2583 | return 0; | ||
2584 | } | ||
2585 | |||
2586 | /** | 2538 | /** |
2587 | * cpuset_mem_spread_node() - On which node to begin search for a file page | 2539 | * cpuset_mem_spread_node() - On which node to begin search for a file page |
2588 | * cpuset_slab_spread_node() - On which node to begin search for a slab page | 2540 | * cpuset_slab_spread_node() - On which node to begin search for a slab page |