diff options
author | Tejun Heo <tj@kernel.org> | 2014-02-25 10:04:03 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2014-02-25 10:04:03 -0500 |
commit | eaf797abc53b0ab3f0a02d4ef873a565fcce6daa (patch) | |
tree | 690461cc8a6262e0fd0fc8472d8a93a8a7592750 /kernel/cgroup.c | |
parent | 1958d2d53dadbb1c9aaf0b37741f13a60098b243 (diff) |
cgroup: update how a newly forked task gets associated with css_set
When a new process is forked, cgroup_fork() associates it with the
css_set of its parent but doesn't link it into it. After the new
process is linked to tasklist, cgroup_post_fork() does the linking.
This is problematic for cgroup_transfer_tasks() as there's no way to
tell whether there are tasks which are pointing to a css_set but not
linked yet. It is impossible to implement an operation which transfer
all tasks of a cgroup to another and the current
cgroup_transfer_tasks() can easily be tricked into leaving a newly
forked process behind if it gets called between cgroup_fork() and
cgroup_post_fork().
Let's make association with a css_set and linking atomic by moving it
to cgroup_post_fork(). cgroup_fork() sets child->cgroups to
init_css_set as a placeholder and cgroup_post_fork() is updated to
perform both the association with the parent's cgroup and linking
there. This means that a newly created task will point to
init_css_set without holding a ref to it much like what it does on the
exit path. Empty cg_list is used to indicate that the task isn't
holding a ref to the associated css_set.
This fixes an actual bug with cgroup_transfer_tasks(); however, I'm
not marking it for -stable. The whole thing is broken in multiple
other ways which require invasive updates to fix and I don't think
it's worthwhile to bother with backporting this particular one.
Fortunately, the only user is cpuset and these bugs don't crash the
machine.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 86 |
1 files changed, 55 insertions, 31 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a93f6f1ebc69..fa0567f4eedd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1342,8 +1342,12 @@ static void cgroup_enable_task_cg_lists(void) | |||
1342 | * racing against cgroup_exit(). | 1342 | * racing against cgroup_exit(). |
1343 | */ | 1343 | */ |
1344 | spin_lock_irq(&p->sighand->siglock); | 1344 | spin_lock_irq(&p->sighand->siglock); |
1345 | if (!(p->flags & PF_EXITING)) | 1345 | if (!(p->flags & PF_EXITING)) { |
1346 | list_add(&p->cg_list, &task_css_set(p)->tasks); | 1346 | struct css_set *cset = task_css_set(p); |
1347 | |||
1348 | list_add(&p->cg_list, &cset->tasks); | ||
1349 | get_css_set(cset); | ||
1350 | } | ||
1347 | spin_unlock_irq(&p->sighand->siglock); | 1351 | spin_unlock_irq(&p->sighand->siglock); |
1348 | 1352 | ||
1349 | task_unlock(p); | 1353 | task_unlock(p); |
@@ -1911,6 +1915,10 @@ static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader, | |||
1911 | if (task->flags & PF_EXITING) | 1915 | if (task->flags & PF_EXITING) |
1912 | goto next; | 1916 | goto next; |
1913 | 1917 | ||
1918 | /* leave @task alone if post_fork() hasn't linked it yet */ | ||
1919 | if (list_empty(&task->cg_list)) | ||
1920 | goto next; | ||
1921 | |||
1914 | cset = task_css_set(task); | 1922 | cset = task_css_set(task); |
1915 | if (!cset->mg_src_cgrp) | 1923 | if (!cset->mg_src_cgrp) |
1916 | goto next; | 1924 | goto next; |
@@ -2815,6 +2823,12 @@ void css_task_iter_end(struct css_task_iter *it) | |||
2815 | * cgroup_trasnsfer_tasks - move tasks from one cgroup to another | 2823 | * cgroup_trasnsfer_tasks - move tasks from one cgroup to another |
2816 | * @to: cgroup to which the tasks will be moved | 2824 | * @to: cgroup to which the tasks will be moved |
2817 | * @from: cgroup in which the tasks currently reside | 2825 | * @from: cgroup in which the tasks currently reside |
2826 | * | ||
2827 | * Locking rules between cgroup_post_fork() and the migration path | ||
2828 | * guarantee that, if a task is forking while being migrated, the new child | ||
2829 | * is guaranteed to be either visible in the source cgroup after the | ||
2830 | * parent's migration is complete or put into the target cgroup. No task | ||
2831 | * can slip out of migration through forking. | ||
2818 | */ | 2832 | */ |
2819 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) | 2833 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) |
2820 | { | 2834 | { |
@@ -4243,27 +4257,16 @@ static const struct file_operations proc_cgroupstats_operations = { | |||
4243 | }; | 4257 | }; |
4244 | 4258 | ||
4245 | /** | 4259 | /** |
4246 | * cgroup_fork - attach newly forked task to its parents cgroup. | 4260 | * cgroup_fork - initialize cgroup related fields during copy_process() |
4247 | * @child: pointer to task_struct of forking parent process. | 4261 | * @child: pointer to task_struct of forking parent process. |
4248 | * | 4262 | * |
4249 | * Description: A task inherits its parent's cgroup at fork(). | 4263 | * A task is associated with the init_css_set until cgroup_post_fork() |
4250 | * | 4264 | * attaches it to the parent's css_set. Empty cg_list indicates that |
4251 | * A pointer to the shared css_set was automatically copied in | 4265 | * @child isn't holding reference to its css_set. |
4252 | * fork.c by dup_task_struct(). However, we ignore that copy, since | ||
4253 | * it was not made under the protection of RCU or cgroup_mutex, so | ||
4254 | * might no longer be a valid cgroup pointer. cgroup_attach_task() might | ||
4255 | * have already changed current->cgroups, allowing the previously | ||
4256 | * referenced cgroup group to be removed and freed. | ||
4257 | * | ||
4258 | * At the point that cgroup_fork() is called, 'current' is the parent | ||
4259 | * task, and the passed argument 'child' points to the child task. | ||
4260 | */ | 4266 | */ |
4261 | void cgroup_fork(struct task_struct *child) | 4267 | void cgroup_fork(struct task_struct *child) |
4262 | { | 4268 | { |
4263 | task_lock(current); | 4269 | RCU_INIT_POINTER(child->cgroups, &init_css_set); |
4264 | get_css_set(task_css_set(current)); | ||
4265 | child->cgroups = current->cgroups; | ||
4266 | task_unlock(current); | ||
4267 | INIT_LIST_HEAD(&child->cg_list); | 4270 | INIT_LIST_HEAD(&child->cg_list); |
4268 | } | 4271 | } |
4269 | 4272 | ||
@@ -4283,21 +4286,38 @@ void cgroup_post_fork(struct task_struct *child) | |||
4283 | int i; | 4286 | int i; |
4284 | 4287 | ||
4285 | /* | 4288 | /* |
4286 | * use_task_css_set_links is set to 1 before we walk the tasklist | 4289 | * This may race against cgroup_enable_task_cg_links(). As that |
4287 | * under the tasklist_lock and we read it here after we added the child | 4290 | * function sets use_task_css_set_links before grabbing |
4288 | * to the tasklist under the tasklist_lock as well. If the child wasn't | 4291 | * tasklist_lock and we just went through tasklist_lock to add |
4289 | * yet in the tasklist when we walked through it from | 4292 | * @child, it's guaranteed that either we see the set |
4290 | * cgroup_enable_task_cg_lists(), then use_task_css_set_links value | 4293 | * use_task_css_set_links or cgroup_enable_task_cg_lists() sees |
4291 | * should be visible now due to the paired locking and barriers implied | 4294 | * @child during its iteration. |
4292 | * by LOCK/UNLOCK: it is written before the tasklist_lock unlock | 4295 | * |
4293 | * in cgroup_enable_task_cg_lists() and read here after the tasklist_lock | 4296 | * If we won the race, @child is associated with %current's |
4294 | * lock on fork. | 4297 | * css_set. Grabbing css_set_rwsem guarantees both that the |
4298 | * association is stable, and, on completion of the parent's | ||
4299 | * migration, @child is visible in the source of migration or | ||
4300 | * already in the destination cgroup. This guarantee is necessary | ||
4301 | * when implementing operations which need to migrate all tasks of | ||
4302 | * a cgroup to another. | ||
4303 | * | ||
4304 | * Note that if we lose to cgroup_enable_task_cg_links(), @child | ||
4305 | * will remain in init_css_set. This is safe because all tasks are | ||
4306 | * in the init_css_set before cg_links is enabled and there's no | ||
4307 | * operation which transfers all tasks out of init_css_set. | ||
4295 | */ | 4308 | */ |
4296 | if (use_task_css_set_links) { | 4309 | if (use_task_css_set_links) { |
4310 | struct css_set *cset; | ||
4311 | |||
4297 | down_write(&css_set_rwsem); | 4312 | down_write(&css_set_rwsem); |
4313 | cset = task_css_set_check(current, | ||
4314 | lockdep_is_held(&css_set_rwsem)); | ||
4298 | task_lock(child); | 4315 | task_lock(child); |
4299 | if (list_empty(&child->cg_list)) | 4316 | if (list_empty(&child->cg_list)) { |
4300 | list_add(&child->cg_list, &task_css_set(child)->tasks); | 4317 | rcu_assign_pointer(child->cgroups, cset); |
4318 | list_add(&child->cg_list, &cset->tasks); | ||
4319 | get_css_set(cset); | ||
4320 | } | ||
4301 | task_unlock(child); | 4321 | task_unlock(child); |
4302 | up_write(&css_set_rwsem); | 4322 | up_write(&css_set_rwsem); |
4303 | } | 4323 | } |
@@ -4353,6 +4373,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4353 | { | 4373 | { |
4354 | struct cgroup_subsys *ss; | 4374 | struct cgroup_subsys *ss; |
4355 | struct css_set *cset; | 4375 | struct css_set *cset; |
4376 | bool put_cset = false; | ||
4356 | int i; | 4377 | int i; |
4357 | 4378 | ||
4358 | /* | 4379 | /* |
@@ -4361,8 +4382,10 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4361 | */ | 4382 | */ |
4362 | if (!list_empty(&tsk->cg_list)) { | 4383 | if (!list_empty(&tsk->cg_list)) { |
4363 | down_write(&css_set_rwsem); | 4384 | down_write(&css_set_rwsem); |
4364 | if (!list_empty(&tsk->cg_list)) | 4385 | if (!list_empty(&tsk->cg_list)) { |
4365 | list_del_init(&tsk->cg_list); | 4386 | list_del_init(&tsk->cg_list); |
4387 | put_cset = true; | ||
4388 | } | ||
4366 | up_write(&css_set_rwsem); | 4389 | up_write(&css_set_rwsem); |
4367 | } | 4390 | } |
4368 | 4391 | ||
@@ -4384,7 +4407,8 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4384 | } | 4407 | } |
4385 | task_unlock(tsk); | 4408 | task_unlock(tsk); |
4386 | 4409 | ||
4387 | put_css_set(cset, true); | 4410 | if (put_cset) |
4411 | put_css_set(cset, true); | ||
4388 | } | 4412 | } |
4389 | 4413 | ||
4390 | static void check_for_release(struct cgroup *cgrp) | 4414 | static void check_for_release(struct cgroup *cgrp) |