diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2012-02-07 21:37:27 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2012-02-21 12:46:47 -0500 |
commit | 3ce3230a0cff484e5130153f244d4fb8a56b3a8b (patch) | |
tree | 6ee374618c9edacefd2e9d0f912c651bd5f12cb7 /kernel/cgroup.c | |
parent | 9a4b430451bb6d8d6b7dcdfbee0e1330b7c475a6 (diff) |
cgroup: Walk task list under tasklist_lock in cgroup_enable_task_cg_list
Walking through the tasklist in cgroup_enable_task_cg_list() inside
an RCU read side critical section is not enough because:
- RCU is not (yet) safe against while_each_thread()
- If we use only RCU, a forking task that has passed cgroup_post_fork()
without seeing use_task_css_set_links == 1 is not guaranteed to have
its child immediately visible in the tasklist if we walk through it
remotely with RCU. In this case it will be missing in its css_set's
task list.
Thus we need to traverse the list (unfortunately) under the
tasklist_lock. It makes us safe against while_each_thread() and also
make sure we see all forked task that have been added to the tasklist.
As a secondary effect, reading and writing use_task_css_set_links are
now well ordered against tasklist traversing and modification. The new
layout is:
CPU 0 CPU 1
use_task_css_set_links = 1 write_lock(tasklist_lock)
read_lock(tasklist_lock) add task to tasklist
do_each_thread() { write_unlock(tasklist_lock)
add thread to css set links if (use_task_css_set_links)
} while_each_thread() add thread to css set links
read_unlock(tasklist_lock)
If CPU 0 traverse the list after the task has been added to the tasklist
then it is correctly added to the css set links. OTOH if CPU 0 traverse
the tasklist before the new task had the opportunity to be added to the
tasklist because it was too early in the fork process, then CPU 1
catches up and add the task to the css set links after it added the task
to the tasklist. The right value of use_task_css_set_links is guaranteed
to be visible from CPU 1 due to the LOCK/UNLOCK implicit barrier properties:
the read_unlock on CPU 0 makes the write on use_task_css_set_links happening
and the write_lock on CPU 1 make the read of use_task_css_set_links that comes
afterward to return the correct value.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Mandeep Singh Baines <msb@chromium.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6e4eb4312571..c6877fe9a831 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -2707,6 +2707,14 @@ static void cgroup_enable_task_cg_lists(void) | |||
2707 | struct task_struct *p, *g; | 2707 | struct task_struct *p, *g; |
2708 | write_lock(&css_set_lock); | 2708 | write_lock(&css_set_lock); |
2709 | use_task_css_set_links = 1; | 2709 | use_task_css_set_links = 1; |
2710 | /* | ||
2711 | * We need tasklist_lock because RCU is not safe against | ||
2712 | * while_each_thread(). Besides, a forking task that has passed | ||
2713 | * cgroup_post_fork() without seeing use_task_css_set_links = 1 | ||
2714 | * is not guaranteed to have its child immediately visible in the | ||
2715 | * tasklist if we walk through it with RCU. | ||
2716 | */ | ||
2717 | read_lock(&tasklist_lock); | ||
2710 | do_each_thread(g, p) { | 2718 | do_each_thread(g, p) { |
2711 | task_lock(p); | 2719 | task_lock(p); |
2712 | /* | 2720 | /* |
@@ -2718,6 +2726,7 @@ static void cgroup_enable_task_cg_lists(void) | |||
2718 | list_add(&p->cg_list, &p->cgroups->tasks); | 2726 | list_add(&p->cg_list, &p->cgroups->tasks); |
2719 | task_unlock(p); | 2727 | task_unlock(p); |
2720 | } while_each_thread(g, p); | 2728 | } while_each_thread(g, p); |
2729 | read_unlock(&tasklist_lock); | ||
2721 | write_unlock(&css_set_lock); | 2730 | write_unlock(&css_set_lock); |
2722 | } | 2731 | } |
2723 | 2732 | ||
@@ -4522,6 +4531,17 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
4522 | */ | 4531 | */ |
4523 | void cgroup_post_fork(struct task_struct *child) | 4532 | void cgroup_post_fork(struct task_struct *child) |
4524 | { | 4533 | { |
4534 | /* | ||
4535 | * use_task_css_set_links is set to 1 before we walk the tasklist | ||
4536 | * under the tasklist_lock and we read it here after we added the child | ||
4537 | * to the tasklist under the tasklist_lock as well. If the child wasn't | ||
4538 | * yet in the tasklist when we walked through it from | ||
4539 | * cgroup_enable_task_cg_lists(), then use_task_css_set_links value | ||
4540 | * should be visible now due to the paired locking and barriers implied | ||
4541 | * by LOCK/UNLOCK: it is written before the tasklist_lock unlock | ||
4542 | * in cgroup_enable_task_cg_lists() and read here after the tasklist_lock | ||
4543 | * lock on fork. | ||
4544 | */ | ||
4525 | if (use_task_css_set_links) { | 4545 | if (use_task_css_set_links) { |
4526 | write_lock(&css_set_lock); | 4546 | write_lock(&css_set_lock); |
4527 | if (list_empty(&child->cg_list)) { | 4547 | if (list_empty(&child->cg_list)) { |