aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c40
1 files changed, 30 insertions, 10 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index fc949e4a625c..6fe28d6f282b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -39,6 +39,7 @@
39#include <linux/namei.h> 39#include <linux/namei.h>
40#include <linux/pagemap.h> 40#include <linux/pagemap.h>
41#include <linux/proc_fs.h> 41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
42#include <linux/sched.h> 43#include <linux/sched.h>
43#include <linux/seq_file.h> 44#include <linux/seq_file.h>
44#include <linux/slab.h> 45#include <linux/slab.h>
@@ -248,6 +249,11 @@ static struct super_block *cpuset_sb;
248 * a tasks cpuset pointer we use task_lock(), which acts on a spinlock 249 * a tasks cpuset pointer we use task_lock(), which acts on a spinlock
249 * (task->alloc_lock) already in the task_struct routinely used for 250 * (task->alloc_lock) already in the task_struct routinely used for
250 * such matters. 251 * such matters.
252 *
253 * P.S. One more locking exception. RCU is used to guard the
254 * update of a tasks cpuset pointer by attach_task() and the
255 * access of task->cpuset->mems_generation via that pointer in
256 * the routine cpuset_update_task_memory_state().
251 */ 257 */
252 258
253static DECLARE_MUTEX(manage_sem); 259static DECLARE_MUTEX(manage_sem);
@@ -610,12 +616,24 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
610 * cpuset pointer. This routine also might acquire callback_sem and 616 * cpuset pointer. This routine also might acquire callback_sem and
611 * current->mm->mmap_sem during call. 617 * current->mm->mmap_sem during call.
612 * 618 *
613 * The task_lock() is required to dereference current->cpuset safely. 619 * Reading current->cpuset->mems_generation doesn't need task_lock
614 * Without it, we could pick up the pointer value of current->cpuset 620 * to guard the current->cpuset derefence, because it is guarded
615 * in one instruction, and then attach_task could give us a different 621 * from concurrent freeing of current->cpuset by attach_task(),
616 * cpuset, and then the cpuset we had could be removed and freed, 622 * using RCU.
617 * and then on our next instruction, we could dereference a no longer 623 *
618 * valid cpuset pointer to get its mems_generation field. 624 * The rcu_dereference() is technically probably not needed,
625 * as I don't actually mind if I see a new cpuset pointer but
626 * an old value of mems_generation. However this really only
627 * matters on alpha systems using cpusets heavily. If I dropped
628 * that rcu_dereference(), it would save them a memory barrier.
629 * For all other arch's, rcu_dereference is a no-op anyway, and for
630 * alpha systems not using cpusets, another planned optimization,
631 * avoiding the rcu critical section for tasks in the root cpuset
632 * which is statically allocated, so can't vanish, will make this
633 * irrelevant. Better to use RCU as intended, than to engage in
634 * some cute trick to save a memory barrier that is impossible to
635 * test, for alpha systems using cpusets heavily, which might not
636 * even exist.
619 * 637 *
620 * This routine is needed to update the per-task mems_allowed data, 638 * This routine is needed to update the per-task mems_allowed data,
621 * within the tasks context, when it is trying to allocate memory 639 * within the tasks context, when it is trying to allocate memory
@@ -627,11 +645,12 @@ void cpuset_update_task_memory_state()
627{ 645{
628 int my_cpusets_mem_gen; 646 int my_cpusets_mem_gen;
629 struct task_struct *tsk = current; 647 struct task_struct *tsk = current;
630 struct cpuset *cs = tsk->cpuset; 648 struct cpuset *cs;
631 649
632 task_lock(tsk); 650 rcu_read_lock();
651 cs = rcu_dereference(tsk->cpuset);
633 my_cpusets_mem_gen = cs->mems_generation; 652 my_cpusets_mem_gen = cs->mems_generation;
634 task_unlock(tsk); 653 rcu_read_unlock();
635 654
636 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { 655 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
637 down(&callback_sem); 656 down(&callback_sem);
@@ -1131,7 +1150,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1131 return -ESRCH; 1150 return -ESRCH;
1132 } 1151 }
1133 atomic_inc(&cs->count); 1152 atomic_inc(&cs->count);
1134 tsk->cpuset = cs; 1153 rcu_assign_pointer(tsk->cpuset, cs);
1135 task_unlock(tsk); 1154 task_unlock(tsk);
1136 1155
1137 guarantee_online_cpus(cs, &cpus); 1156 guarantee_online_cpus(cs, &cpus);
@@ -1151,6 +1170,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1151 if (is_memory_migrate(cs)) 1170 if (is_memory_migrate(cs))
1152 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); 1171 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
1153 put_task_struct(tsk); 1172 put_task_struct(tsk);
1173 synchronize_rcu();
1154 if (atomic_dec_and_test(&oldcs->count)) 1174 if (atomic_dec_and_test(&oldcs->count))
1155 check_for_release(oldcs, ppathbuf); 1175 check_for_release(oldcs, ppathbuf);
1156 return 0; 1176 return 0;