summaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-10-16 18:03:14 -0400
committerTejun Heo <tj@kernel.org>2012-10-16 18:03:14 -0400
commit5edee61edeaaebafe584f8fb7074c1ef4658596b (patch)
tree23e6ee3581eb0009b3c2a2686c25fdba538219de /kernel/cgroup.c
parentddffeb8c4d0331609ef2581d84de4d763607bd37 (diff)
cgroup: cgroup_subsys->fork() should be called after the task is added to css_set
cgroup core has a bug which violates a basic rule about event notifications - when a new entity needs to be added, you add that to the notification list first and then make the new entity conform to the current state. If done in the reverse order, an event happening inbetween will be lost. cgroup_subsys->fork() is invoked way before the new task is added to the css_set. Currently, cgroup_freezer is the only user of ->fork() and uses it to make new tasks conform to the current state of the freezer. If FROZEN state is requested while fork is in progress between cgroup_fork_callbacks() and cgroup_post_fork(), the child could escape freezing - the cgroup isn't frozen when ->fork() is called and the freezer couldn't see the new task on the css_set. This patch moves cgroup_subsys->fork() invocation to cgroup_post_fork() after the new task is added to the css_set. cgroup_fork_callbacks() is removed. Because now a task may be migrated during cgroup_subsys->fork(), freezer_fork() is updated so that it adheres to the usual RCU locking and the rather pointless comment on why locking can be different there is removed (if it doesn't make anything simpler, why even bother?). Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Rafael J. Wysocki <rjw@sisk.pl> Cc: stable@vger.kernel.org
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c62
1 files changed, 30 insertions, 32 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 13774b3b39aa..b7a0171067ea 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4844,44 +4844,19 @@ void cgroup_fork(struct task_struct *child)
4844} 4844}
4845 4845
4846/** 4846/**
4847 * cgroup_fork_callbacks - run fork callbacks
4848 * @child: the new task
4849 *
4850 * Called on a new task very soon before adding it to the
4851 * tasklist. No need to take any locks since no-one can
4852 * be operating on this task.
4853 */
4854void cgroup_fork_callbacks(struct task_struct *child)
4855{
4856 if (need_forkexit_callback) {
4857 int i;
4858 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4859 struct cgroup_subsys *ss = subsys[i];
4860
4861 /*
4862 * forkexit callbacks are only supported for
4863 * builtin subsystems.
4864 */
4865 if (!ss || ss->module)
4866 continue;
4867
4868 if (ss->fork)
4869 ss->fork(child);
4870 }
4871 }
4872}
4873
4874/**
4875 * cgroup_post_fork - called on a new task after adding it to the task list 4847 * cgroup_post_fork - called on a new task after adding it to the task list
4876 * @child: the task in question 4848 * @child: the task in question
4877 * 4849 *
4878 * Adds the task to the list running through its css_set if necessary. 4850 * Adds the task to the list running through its css_set if necessary and
4879 * Has to be after the task is visible on the task list in case we race 4851 * call the subsystem fork() callbacks. Has to be after the task is
4880 * with the first call to cgroup_iter_start() - to guarantee that the 4852 * visible on the task list in case we race with the first call to
4881 * new task ends up on its list. 4853 * cgroup_iter_start() - to guarantee that the new task ends up on its
4854 * list.
4882 */ 4855 */
4883void cgroup_post_fork(struct task_struct *child) 4856void cgroup_post_fork(struct task_struct *child)
4884{ 4857{
4858 int i;
4859
4885 /* 4860 /*
4886 * use_task_css_set_links is set to 1 before we walk the tasklist 4861 * use_task_css_set_links is set to 1 before we walk the tasklist
4887 * under the tasklist_lock and we read it here after we added the child 4862 * under the tasklist_lock and we read it here after we added the child
@@ -4910,7 +4885,30 @@ void cgroup_post_fork(struct task_struct *child)
4910 } 4885 }
4911 write_unlock(&css_set_lock); 4886 write_unlock(&css_set_lock);
4912 } 4887 }
4888
4889 /*
4890 * Call ss->fork(). This must happen after @child is linked on
4891 * css_set; otherwise, @child might change state between ->fork()
4892 * and addition to css_set.
4893 */
4894 if (need_forkexit_callback) {
4895 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4896 struct cgroup_subsys *ss = subsys[i];
4897
4898 /*
4899 * fork/exit callbacks are supported only for
4900 * builtin subsystems and we don't need further
4901 * synchronization as they never go away.
4902 */
4903 if (!ss || ss->module)
4904 continue;
4905
4906 if (ss->fork)
4907 ss->fork(child);
4908 }
4909 }
4913} 4910}
4911
4914/** 4912/**
4915 * cgroup_exit - detach cgroup from exiting task 4913 * cgroup_exit - detach cgroup from exiting task
4916 * @tsk: pointer to task_struct of exiting process 4914 * @tsk: pointer to task_struct of exiting process