aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-02-25 10:04:03 -0500
committerTejun Heo <tj@kernel.org>2014-02-25 10:04:03 -0500
commiteaf797abc53b0ab3f0a02d4ef873a565fcce6daa (patch)
tree690461cc8a6262e0fd0fc8472d8a93a8a7592750 /kernel/cgroup.c
parent1958d2d53dadbb1c9aaf0b37741f13a60098b243 (diff)
cgroup: update how a newly forked task gets associated with css_set
When a new process is forked, cgroup_fork() associates it with the css_set of its parent but doesn't link it into it. After the new process is linked to tasklist, cgroup_post_fork() does the linking. This is problematic for cgroup_transfer_tasks() as there's no way to tell whether there are tasks which are pointing to a css_set but not linked yet. It is impossible to implement an operation which transfer all tasks of a cgroup to another and the current cgroup_transfer_tasks() can easily be tricked into leaving a newly forked process behind if it gets called between cgroup_fork() and cgroup_post_fork(). Let's make association with a css_set and linking atomic by moving it to cgroup_post_fork(). cgroup_fork() sets child->cgroups to init_css_set as a placeholder and cgroup_post_fork() is updated to perform both the association with the parent's cgroup and linking there. This means that a newly created task will point to init_css_set without holding a ref to it much like what it does on the exit path. Empty cg_list is used to indicate that the task isn't holding a ref to the associated css_set. This fixes an actual bug with cgroup_transfer_tasks(); however, I'm not marking it for -stable. The whole thing is broken in multiple other ways which require invasive updates to fix and I don't think it's worthwhile to bother with backporting this particular one. Fortunately, the only user is cpuset and these bugs don't crash the machine. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c86
1 files changed, 55 insertions, 31 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a93f6f1ebc69..fa0567f4eedd 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1342,8 +1342,12 @@ static void cgroup_enable_task_cg_lists(void)
1342 * racing against cgroup_exit(). 1342 * racing against cgroup_exit().
1343 */ 1343 */
1344 spin_lock_irq(&p->sighand->siglock); 1344 spin_lock_irq(&p->sighand->siglock);
1345 if (!(p->flags & PF_EXITING)) 1345 if (!(p->flags & PF_EXITING)) {
1346 list_add(&p->cg_list, &task_css_set(p)->tasks); 1346 struct css_set *cset = task_css_set(p);
1347
1348 list_add(&p->cg_list, &cset->tasks);
1349 get_css_set(cset);
1350 }
1347 spin_unlock_irq(&p->sighand->siglock); 1351 spin_unlock_irq(&p->sighand->siglock);
1348 1352
1349 task_unlock(p); 1353 task_unlock(p);
@@ -1911,6 +1915,10 @@ static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader,
1911 if (task->flags & PF_EXITING) 1915 if (task->flags & PF_EXITING)
1912 goto next; 1916 goto next;
1913 1917
1918 /* leave @task alone if post_fork() hasn't linked it yet */
1919 if (list_empty(&task->cg_list))
1920 goto next;
1921
1914 cset = task_css_set(task); 1922 cset = task_css_set(task);
1915 if (!cset->mg_src_cgrp) 1923 if (!cset->mg_src_cgrp)
1916 goto next; 1924 goto next;
@@ -2815,6 +2823,12 @@ void css_task_iter_end(struct css_task_iter *it)
2815 * cgroup_trasnsfer_tasks - move tasks from one cgroup to another 2823 * cgroup_trasnsfer_tasks - move tasks from one cgroup to another
2816 * @to: cgroup to which the tasks will be moved 2824 * @to: cgroup to which the tasks will be moved
2817 * @from: cgroup in which the tasks currently reside 2825 * @from: cgroup in which the tasks currently reside
2826 *
2827 * Locking rules between cgroup_post_fork() and the migration path
2828 * guarantee that, if a task is forking while being migrated, the new child
2829 * is guaranteed to be either visible in the source cgroup after the
2830 * parent's migration is complete or put into the target cgroup. No task
2831 * can slip out of migration through forking.
2818 */ 2832 */
2819int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) 2833int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
2820{ 2834{
@@ -4243,27 +4257,16 @@ static const struct file_operations proc_cgroupstats_operations = {
4243}; 4257};
4244 4258
4245/** 4259/**
4246 * cgroup_fork - attach newly forked task to its parents cgroup. 4260 * cgroup_fork - initialize cgroup related fields during copy_process()
4247 * @child: pointer to task_struct of forking parent process. 4261 * @child: pointer to task_struct of forking parent process.
4248 * 4262 *
4249 * Description: A task inherits its parent's cgroup at fork(). 4263 * A task is associated with the init_css_set until cgroup_post_fork()
4250 * 4264 * attaches it to the parent's css_set. Empty cg_list indicates that
4251 * A pointer to the shared css_set was automatically copied in 4265 * @child isn't holding reference to its css_set.
4252 * fork.c by dup_task_struct(). However, we ignore that copy, since
4253 * it was not made under the protection of RCU or cgroup_mutex, so
4254 * might no longer be a valid cgroup pointer. cgroup_attach_task() might
4255 * have already changed current->cgroups, allowing the previously
4256 * referenced cgroup group to be removed and freed.
4257 *
4258 * At the point that cgroup_fork() is called, 'current' is the parent
4259 * task, and the passed argument 'child' points to the child task.
4260 */ 4266 */
4261void cgroup_fork(struct task_struct *child) 4267void cgroup_fork(struct task_struct *child)
4262{ 4268{
4263 task_lock(current); 4269 RCU_INIT_POINTER(child->cgroups, &init_css_set);
4264 get_css_set(task_css_set(current));
4265 child->cgroups = current->cgroups;
4266 task_unlock(current);
4267 INIT_LIST_HEAD(&child->cg_list); 4270 INIT_LIST_HEAD(&child->cg_list);
4268} 4271}
4269 4272
@@ -4283,21 +4286,38 @@ void cgroup_post_fork(struct task_struct *child)
4283 int i; 4286 int i;
4284 4287
4285 /* 4288 /*
4286 * use_task_css_set_links is set to 1 before we walk the tasklist 4289 * This may race against cgroup_enable_task_cg_links(). As that
4287 * under the tasklist_lock and we read it here after we added the child 4290 * function sets use_task_css_set_links before grabbing
4288 * to the tasklist under the tasklist_lock as well. If the child wasn't 4291 * tasklist_lock and we just went through tasklist_lock to add
4289 * yet in the tasklist when we walked through it from 4292 * @child, it's guaranteed that either we see the set
4290 * cgroup_enable_task_cg_lists(), then use_task_css_set_links value 4293 * use_task_css_set_links or cgroup_enable_task_cg_lists() sees
4291 * should be visible now due to the paired locking and barriers implied 4294 * @child during its iteration.
4292 * by LOCK/UNLOCK: it is written before the tasklist_lock unlock 4295 *
4293 * in cgroup_enable_task_cg_lists() and read here after the tasklist_lock 4296 * If we won the race, @child is associated with %current's
4294 * lock on fork. 4297 * css_set. Grabbing css_set_rwsem guarantees both that the
4298 * association is stable, and, on completion of the parent's
4299 * migration, @child is visible in the source of migration or
4300 * already in the destination cgroup. This guarantee is necessary
4301 * when implementing operations which need to migrate all tasks of
4302 * a cgroup to another.
4303 *
4304 * Note that if we lose to cgroup_enable_task_cg_links(), @child
4305 * will remain in init_css_set. This is safe because all tasks are
4306 * in the init_css_set before cg_links is enabled and there's no
4307 * operation which transfers all tasks out of init_css_set.
4295 */ 4308 */
4296 if (use_task_css_set_links) { 4309 if (use_task_css_set_links) {
4310 struct css_set *cset;
4311
4297 down_write(&css_set_rwsem); 4312 down_write(&css_set_rwsem);
4313 cset = task_css_set_check(current,
4314 lockdep_is_held(&css_set_rwsem));
4298 task_lock(child); 4315 task_lock(child);
4299 if (list_empty(&child->cg_list)) 4316 if (list_empty(&child->cg_list)) {
4300 list_add(&child->cg_list, &task_css_set(child)->tasks); 4317 rcu_assign_pointer(child->cgroups, cset);
4318 list_add(&child->cg_list, &cset->tasks);
4319 get_css_set(cset);
4320 }
4301 task_unlock(child); 4321 task_unlock(child);
4302 up_write(&css_set_rwsem); 4322 up_write(&css_set_rwsem);
4303 } 4323 }
@@ -4353,6 +4373,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4353{ 4373{
4354 struct cgroup_subsys *ss; 4374 struct cgroup_subsys *ss;
4355 struct css_set *cset; 4375 struct css_set *cset;
4376 bool put_cset = false;
4356 int i; 4377 int i;
4357 4378
4358 /* 4379 /*
@@ -4361,8 +4382,10 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4361 */ 4382 */
4362 if (!list_empty(&tsk->cg_list)) { 4383 if (!list_empty(&tsk->cg_list)) {
4363 down_write(&css_set_rwsem); 4384 down_write(&css_set_rwsem);
4364 if (!list_empty(&tsk->cg_list)) 4385 if (!list_empty(&tsk->cg_list)) {
4365 list_del_init(&tsk->cg_list); 4386 list_del_init(&tsk->cg_list);
4387 put_cset = true;
4388 }
4366 up_write(&css_set_rwsem); 4389 up_write(&css_set_rwsem);
4367 } 4390 }
4368 4391
@@ -4384,7 +4407,8 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4384 } 4407 }
4385 task_unlock(tsk); 4408 task_unlock(tsk);
4386 4409
4387 put_css_set(cset, true); 4410 if (put_cset)
4411 put_css_set(cset, true);
4388} 4412}
4389 4413
4390static void check_for_release(struct cgroup *cgrp) 4414static void check_for_release(struct cgroup *cgrp)