aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2015-11-27 13:57:19 -0500
committerTejun Heo <tj@kernel.org>2015-11-30 09:48:18 -0500
commitc9e75f0492b248aeaa7af8991a6fc9a21506bc96 (patch)
tree65b6a47aabaa91a19d1f0c01a613d73ab3cecee3
parent53254f900bd9ff1e3cc5628e76126bb403d9d160 (diff)
cgroup: pids: fix race between cgroup_post_fork() and cgroup_migrate()
If the new child migrates to another cgroup before cgroup_post_fork() calls subsys->fork(), then both pids_can_attach() and pids_fork() will do the same pids_uncharge(old_pids) + pids_charge(pids) sequence twice. Change copy_process() to call threadgroup_change_begin/threadgroup_change_end unconditionally. percpu_down_read() is cheap and this allows other cleanups, see the next changes. Also, this way we can unify cgroup_threadgroup_rwsem and dup_mmap_sem. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Zefan Li <lizefan@huawei.com> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--kernel/cgroup_pids.c21
-rw-r--r--kernel/fork.c9
2 files changed, 5 insertions, 25 deletions
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index cdd8df4e991c..15ef2e46c396 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -243,27 +243,10 @@ static void pids_cancel_fork(struct task_struct *task, void *priv)
243 243
244static void pids_fork(struct task_struct *task, void *priv) 244static void pids_fork(struct task_struct *task, void *priv)
245{ 245{
246 struct cgroup_subsys_state *css; 246 struct cgroup_subsys_state *css = priv;
247 struct cgroup_subsys_state *old_css = priv;
248 struct pids_cgroup *pids;
249 struct pids_cgroup *old_pids = css_pids(old_css);
250
251 css = task_get_css(task, pids_cgrp_id);
252 pids = css_pids(css);
253
254 /*
255 * If the association has changed, we have to revert and reapply the
256 * charge/uncharge on the wrong hierarchy to the current one. Since
257 * the association can only change due to an organisation event, its
258 * okay for us to ignore the limit in this case.
259 */
260 if (pids != old_pids) {
261 pids_uncharge(old_pids, 1);
262 pids_charge(pids, 1);
263 }
264 247
248 WARN_ON(task_css_check(task, pids_cgrp_id, true) != css);
265 css_put(css); 249 css_put(css);
266 css_put(old_css);
267} 250}
268 251
269static void pids_free(struct task_struct *task) 252static void pids_free(struct task_struct *task)
diff --git a/kernel/fork.c b/kernel/fork.c
index f97f2c449f5c..fce002ee3ddf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1368,8 +1368,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1368 p->real_start_time = ktime_get_boot_ns(); 1368 p->real_start_time = ktime_get_boot_ns();
1369 p->io_context = NULL; 1369 p->io_context = NULL;
1370 p->audit_context = NULL; 1370 p->audit_context = NULL;
1371 if (clone_flags & CLONE_THREAD) 1371 threadgroup_change_begin(current);
1372 threadgroup_change_begin(current);
1373 cgroup_fork(p); 1372 cgroup_fork(p);
1374#ifdef CONFIG_NUMA 1373#ifdef CONFIG_NUMA
1375 p->mempolicy = mpol_dup(p->mempolicy); 1374 p->mempolicy = mpol_dup(p->mempolicy);
@@ -1610,8 +1609,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1610 1609
1611 proc_fork_connector(p); 1610 proc_fork_connector(p);
1612 cgroup_post_fork(p, cgrp_ss_priv); 1611 cgroup_post_fork(p, cgrp_ss_priv);
1613 if (clone_flags & CLONE_THREAD) 1612 threadgroup_change_end(current);
1614 threadgroup_change_end(current);
1615 perf_event_fork(p); 1613 perf_event_fork(p);
1616 1614
1617 trace_task_newtask(p, clone_flags); 1615 trace_task_newtask(p, clone_flags);
@@ -1652,8 +1650,7 @@ bad_fork_cleanup_policy:
1652 mpol_put(p->mempolicy); 1650 mpol_put(p->mempolicy);
1653bad_fork_cleanup_threadgroup_lock: 1651bad_fork_cleanup_threadgroup_lock:
1654#endif 1652#endif
1655 if (clone_flags & CLONE_THREAD) 1653 threadgroup_change_end(current);
1656 threadgroup_change_end(current);
1657 delayacct_tsk_free(p); 1654 delayacct_tsk_free(p);
1658bad_fork_cleanup_count: 1655bad_fork_cleanup_count:
1659 atomic_dec(&p->cred->user->processes); 1656 atomic_dec(&p->cred->user->processes);