diff options
-rw-r--r-- | kernel/cpuset.c | 40 |
1 files changed, 30 insertions, 10 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index fc949e4a625c..6fe28d6f282b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/namei.h> | 39 | #include <linux/namei.h> |
40 | #include <linux/pagemap.h> | 40 | #include <linux/pagemap.h> |
41 | #include <linux/proc_fs.h> | 41 | #include <linux/proc_fs.h> |
42 | #include <linux/rcupdate.h> | ||
42 | #include <linux/sched.h> | 43 | #include <linux/sched.h> |
43 | #include <linux/seq_file.h> | 44 | #include <linux/seq_file.h> |
44 | #include <linux/slab.h> | 45 | #include <linux/slab.h> |
@@ -248,6 +249,11 @@ static struct super_block *cpuset_sb; | |||
248 | * a tasks cpuset pointer we use task_lock(), which acts on a spinlock | 249 | * a tasks cpuset pointer we use task_lock(), which acts on a spinlock |
249 | * (task->alloc_lock) already in the task_struct routinely used for | 250 | * (task->alloc_lock) already in the task_struct routinely used for |
250 | * such matters. | 251 | * such matters. |
252 | * | ||
253 | * P.S. One more locking exception. RCU is used to guard the | ||
254 | * update of a tasks cpuset pointer by attach_task() and the | ||
255 | * access of task->cpuset->mems_generation via that pointer in | ||
256 | * the routine cpuset_update_task_memory_state(). | ||
251 | */ | 257 | */ |
252 | 258 | ||
253 | static DECLARE_MUTEX(manage_sem); | 259 | static DECLARE_MUTEX(manage_sem); |
@@ -610,12 +616,24 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) | |||
610 | * cpuset pointer. This routine also might acquire callback_sem and | 616 | * cpuset pointer. This routine also might acquire callback_sem and |
611 | * current->mm->mmap_sem during call. | 617 | * current->mm->mmap_sem during call. |
612 | * | 618 | * |
613 | * The task_lock() is required to dereference current->cpuset safely. | 619 | * Reading current->cpuset->mems_generation doesn't need task_lock |
614 | * Without it, we could pick up the pointer value of current->cpuset | 620 | * to guard the current->cpuset derefence, because it is guarded |
615 | * in one instruction, and then attach_task could give us a different | 621 | * from concurrent freeing of current->cpuset by attach_task(), |
616 | * cpuset, and then the cpuset we had could be removed and freed, | 622 | * using RCU. |
617 | * and then on our next instruction, we could dereference a no longer | 623 | * |
618 | * valid cpuset pointer to get its mems_generation field. | 624 | * The rcu_dereference() is technically probably not needed, |
625 | * as I don't actually mind if I see a new cpuset pointer but | ||
626 | * an old value of mems_generation. However this really only | ||
627 | * matters on alpha systems using cpusets heavily. If I dropped | ||
628 | * that rcu_dereference(), it would save them a memory barrier. | ||
629 | * For all other arch's, rcu_dereference is a no-op anyway, and for | ||
630 | * alpha systems not using cpusets, another planned optimization, | ||
631 | * avoiding the rcu critical section for tasks in the root cpuset | ||
632 | * which is statically allocated, so can't vanish, will make this | ||
633 | * irrelevant. Better to use RCU as intended, than to engage in | ||
634 | * some cute trick to save a memory barrier that is impossible to | ||
635 | * test, for alpha systems using cpusets heavily, which might not | ||
636 | * even exist. | ||
619 | * | 637 | * |
620 | * This routine is needed to update the per-task mems_allowed data, | 638 | * This routine is needed to update the per-task mems_allowed data, |
621 | * within the tasks context, when it is trying to allocate memory | 639 | * within the tasks context, when it is trying to allocate memory |
@@ -627,11 +645,12 @@ void cpuset_update_task_memory_state() | |||
627 | { | 645 | { |
628 | int my_cpusets_mem_gen; | 646 | int my_cpusets_mem_gen; |
629 | struct task_struct *tsk = current; | 647 | struct task_struct *tsk = current; |
630 | struct cpuset *cs = tsk->cpuset; | 648 | struct cpuset *cs; |
631 | 649 | ||
632 | task_lock(tsk); | 650 | rcu_read_lock(); |
651 | cs = rcu_dereference(tsk->cpuset); | ||
633 | my_cpusets_mem_gen = cs->mems_generation; | 652 | my_cpusets_mem_gen = cs->mems_generation; |
634 | task_unlock(tsk); | 653 | rcu_read_unlock(); |
635 | 654 | ||
636 | if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { | 655 | if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { |
637 | down(&callback_sem); | 656 | down(&callback_sem); |
@@ -1131,7 +1150,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1131 | return -ESRCH; | 1150 | return -ESRCH; |
1132 | } | 1151 | } |
1133 | atomic_inc(&cs->count); | 1152 | atomic_inc(&cs->count); |
1134 | tsk->cpuset = cs; | 1153 | rcu_assign_pointer(tsk->cpuset, cs); |
1135 | task_unlock(tsk); | 1154 | task_unlock(tsk); |
1136 | 1155 | ||
1137 | guarantee_online_cpus(cs, &cpus); | 1156 | guarantee_online_cpus(cs, &cpus); |
@@ -1151,6 +1170,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1151 | if (is_memory_migrate(cs)) | 1170 | if (is_memory_migrate(cs)) |
1152 | do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); | 1171 | do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); |
1153 | put_task_struct(tsk); | 1172 | put_task_struct(tsk); |
1173 | synchronize_rcu(); | ||
1154 | if (atomic_dec_and_test(&oldcs->count)) | 1174 | if (atomic_dec_and_test(&oldcs->count)) |
1155 | check_for_release(oldcs, ppathbuf); | 1175 | check_for_release(oldcs, ppathbuf); |
1156 | return 0; | 1176 | return 0; |