aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-01-08 04:02:02 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-08 23:13:45 -0500
commit6b9c2603ce07f70de9c7a8d335ecd028e8ff11f3 (patch)
tree38b009da71a1f93bbda8621c7d2721c18913260d /kernel/cpuset.c
parentc417f0242ebe578924a30d4e53d35b5059fed4e7 (diff)
[PATCH] cpuset: use rcu directly optimization
Optimize the cpuset impact on page allocation, the most performance critical cpuset hook in the kernel. On each page allocation, the cpuset hook needs to check for a possible change in the current tasks cpuset. It can now handle the common case, of no change, without taking any spinlock or semaphore, thanks to RCU. Convert a spinlock on the current task to an rcu_read_lock(), saving approximately a memory barrier and an atomic op, depending on architecture. This is done by adding rcu_assign_pointer() and synchronize_rcu() calls to the write side of the task->cpuset pointer, in cpuset.c:attach_task(), to delay freeing up a detached cpuset until after any critical sections referencing that pointer. Thanks to Andi Kleen, Nick Piggin and Eric Dumazet for ideas. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c40
1 files changed, 30 insertions, 10 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index fc949e4a625c..6fe28d6f282b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -39,6 +39,7 @@
39#include <linux/namei.h> 39#include <linux/namei.h>
40#include <linux/pagemap.h> 40#include <linux/pagemap.h>
41#include <linux/proc_fs.h> 41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
42#include <linux/sched.h> 43#include <linux/sched.h>
43#include <linux/seq_file.h> 44#include <linux/seq_file.h>
44#include <linux/slab.h> 45#include <linux/slab.h>
@@ -248,6 +249,11 @@ static struct super_block *cpuset_sb;
248 * a tasks cpuset pointer we use task_lock(), which acts on a spinlock 249 * a tasks cpuset pointer we use task_lock(), which acts on a spinlock
249 * (task->alloc_lock) already in the task_struct routinely used for 250 * (task->alloc_lock) already in the task_struct routinely used for
250 * such matters. 251 * such matters.
252 *
253 * P.S. One more locking exception. RCU is used to guard the
254 * update of a tasks cpuset pointer by attach_task() and the
255 * access of task->cpuset->mems_generation via that pointer in
256 * the routine cpuset_update_task_memory_state().
251 */ 257 */
252 258
253static DECLARE_MUTEX(manage_sem); 259static DECLARE_MUTEX(manage_sem);
@@ -610,12 +616,24 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
610 * cpuset pointer. This routine also might acquire callback_sem and 616 * cpuset pointer. This routine also might acquire callback_sem and
611 * current->mm->mmap_sem during call. 617 * current->mm->mmap_sem during call.
612 * 618 *
613 * The task_lock() is required to dereference current->cpuset safely. 619 * Reading current->cpuset->mems_generation doesn't need task_lock
614 * Without it, we could pick up the pointer value of current->cpuset 620 * to guard the current->cpuset derefence, because it is guarded
615 * in one instruction, and then attach_task could give us a different 621 * from concurrent freeing of current->cpuset by attach_task(),
616 * cpuset, and then the cpuset we had could be removed and freed, 622 * using RCU.
617 * and then on our next instruction, we could dereference a no longer 623 *
618 * valid cpuset pointer to get its mems_generation field. 624 * The rcu_dereference() is technically probably not needed,
625 * as I don't actually mind if I see a new cpuset pointer but
626 * an old value of mems_generation. However this really only
627 * matters on alpha systems using cpusets heavily. If I dropped
628 * that rcu_dereference(), it would save them a memory barrier.
629 * For all other arch's, rcu_dereference is a no-op anyway, and for
630 * alpha systems not using cpusets, another planned optimization,
631 * avoiding the rcu critical section for tasks in the root cpuset
632 * which is statically allocated, so can't vanish, will make this
633 * irrelevant. Better to use RCU as intended, than to engage in
634 * some cute trick to save a memory barrier that is impossible to
635 * test, for alpha systems using cpusets heavily, which might not
636 * even exist.
619 * 637 *
620 * This routine is needed to update the per-task mems_allowed data, 638 * This routine is needed to update the per-task mems_allowed data,
621 * within the tasks context, when it is trying to allocate memory 639 * within the tasks context, when it is trying to allocate memory
@@ -627,11 +645,12 @@ void cpuset_update_task_memory_state()
627{ 645{
628 int my_cpusets_mem_gen; 646 int my_cpusets_mem_gen;
629 struct task_struct *tsk = current; 647 struct task_struct *tsk = current;
630 struct cpuset *cs = tsk->cpuset; 648 struct cpuset *cs;
631 649
632 task_lock(tsk); 650 rcu_read_lock();
651 cs = rcu_dereference(tsk->cpuset);
633 my_cpusets_mem_gen = cs->mems_generation; 652 my_cpusets_mem_gen = cs->mems_generation;
634 task_unlock(tsk); 653 rcu_read_unlock();
635 654
636 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { 655 if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
637 down(&callback_sem); 656 down(&callback_sem);
@@ -1131,7 +1150,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1131 return -ESRCH; 1150 return -ESRCH;
1132 } 1151 }
1133 atomic_inc(&cs->count); 1152 atomic_inc(&cs->count);
1134 tsk->cpuset = cs; 1153 rcu_assign_pointer(tsk->cpuset, cs);
1135 task_unlock(tsk); 1154 task_unlock(tsk);
1136 1155
1137 guarantee_online_cpus(cs, &cpus); 1156 guarantee_online_cpus(cs, &cpus);
@@ -1151,6 +1170,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1151 if (is_memory_migrate(cs)) 1170 if (is_memory_migrate(cs))
1152 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); 1171 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
1153 put_task_struct(tsk); 1172 put_task_struct(tsk);
1173 synchronize_rcu();
1154 if (atomic_dec_and_test(&oldcs->count)) 1174 if (atomic_dec_and_test(&oldcs->count))
1155 check_for_release(oldcs, ppathbuf); 1175 check_for_release(oldcs, ppathbuf);
1156 return 0; 1176 return 0;