aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-12-12 21:12:21 -0500
committerTejun Heo <tj@kernel.org>2011-12-12 21:12:21 -0500
commitcd3d095275374220921fcf0d4e0c16584b26ddbc (patch)
treeeb867534c5cdfa111f88b255f76242e93fddb8b6
parent77e4ef99d1c596a31747668e5fd837f77b6349b6 (diff)
cgroup: always lock threadgroup during migration
Update cgroup to take advantage of the fack that threadgroup_lock() guarantees stable threadgroup. * Lock threadgroup even if the target is a single task. This guarantees that when the target tasks stay stable during migration regardless of the target type. * Remove PF_EXITING early exit optimization from attach_task_by_pid() and check it in cgroup_task_migrate() instead. The optimization was for rather cold path to begin with and PF_EXITING state can be trusted throughout migration by checking it after locking threadgroup. * Don't add PF_EXITING tasks to target task array in cgroup_attach_proc(). This ensures that task migration is performed only for live tasks. * Remove -ESRCH failure path from cgroup_task_migrate(). With the above changes, it's guaranteed to be called only for live tasks. After the changes, only live tasks are migrated and they're guaranteed to stay alive until migration is complete. This removes problems caused by exec and exit racing against cgroup migration including symmetry among cgroup attach methods and different cgroup methods racing each other. v2: Oleg pointed out that one more PF_EXITING check can be removed from cgroup_attach_proc(). Removed. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com> Acked-by: Li Zefan <lizf@cn.fujitsu.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Paul Menage <paul@paulmenage.org>
-rw-r--r--kernel/cgroup.c62
1 files changed, 27 insertions, 35 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b409df3b2e9d..d71e012e81be 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1762,7 +1762,7 @@ EXPORT_SYMBOL_GPL(cgroup_path);
1762 * 1762 *
1763 * 'guarantee' is set if the caller promises that a new css_set for the task 1763 * 'guarantee' is set if the caller promises that a new css_set for the task
1764 * will already exist. If not set, this function might sleep, and can fail with 1764 * will already exist. If not set, this function might sleep, and can fail with
1765 * -ENOMEM. Otherwise, it can only fail with -ESRCH. 1765 * -ENOMEM. Must be called with cgroup_mutex and threadgroup locked.
1766 */ 1766 */
1767static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, 1767static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1768 struct task_struct *tsk, bool guarantee) 1768 struct task_struct *tsk, bool guarantee)
@@ -1800,13 +1800,9 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1800 } 1800 }
1801 put_css_set(oldcg); 1801 put_css_set(oldcg);
1802 1802
1803 /* if PF_EXITING is set, the tsk->cgroups pointer is no longer safe. */ 1803 /* @tsk can't exit as its threadgroup is locked */
1804 task_lock(tsk); 1804 task_lock(tsk);
1805 if (tsk->flags & PF_EXITING) { 1805 WARN_ON_ONCE(tsk->flags & PF_EXITING);
1806 task_unlock(tsk);
1807 put_css_set(newcg);
1808 return -ESRCH;
1809 }
1810 rcu_assign_pointer(tsk->cgroups, newcg); 1806 rcu_assign_pointer(tsk->cgroups, newcg);
1811 task_unlock(tsk); 1807 task_unlock(tsk);
1812 1808
@@ -1832,8 +1828,8 @@ static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1832 * @cgrp: the cgroup the task is attaching to 1828 * @cgrp: the cgroup the task is attaching to
1833 * @tsk: the task to be attached 1829 * @tsk: the task to be attached
1834 * 1830 *
1835 * Call holding cgroup_mutex. May take task_lock of 1831 * Call with cgroup_mutex and threadgroup locked. May take task_lock of
1836 * the task 'tsk' during call. 1832 * @tsk during call.
1837 */ 1833 */
1838int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1834int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1839{ 1835{
@@ -1842,6 +1838,10 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1842 struct cgroup *oldcgrp; 1838 struct cgroup *oldcgrp;
1843 struct cgroupfs_root *root = cgrp->root; 1839 struct cgroupfs_root *root = cgrp->root;
1844 1840
1841 /* @tsk either already exited or can't exit until the end */
1842 if (tsk->flags & PF_EXITING)
1843 return -ESRCH;
1844
1845 /* Nothing to do if the task is already in that cgroup */ 1845 /* Nothing to do if the task is already in that cgroup */
1846 oldcgrp = task_cgroup_from_root(tsk, root); 1846 oldcgrp = task_cgroup_from_root(tsk, root);
1847 if (cgrp == oldcgrp) 1847 if (cgrp == oldcgrp)
@@ -2062,6 +2062,10 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2062 tsk = leader; 2062 tsk = leader;
2063 i = 0; 2063 i = 0;
2064 do { 2064 do {
2065 /* @tsk either already exited or can't exit until the end */
2066 if (tsk->flags & PF_EXITING)
2067 continue;
2068
2065 /* as per above, nr_threads may decrease, but not increase. */ 2069 /* as per above, nr_threads may decrease, but not increase. */
2066 BUG_ON(i >= group_size); 2070 BUG_ON(i >= group_size);
2067 get_task_struct(tsk); 2071 get_task_struct(tsk);
@@ -2116,11 +2120,6 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2116 continue; 2120 continue;
2117 /* get old css_set pointer */ 2121 /* get old css_set pointer */
2118 task_lock(tsk); 2122 task_lock(tsk);
2119 if (tsk->flags & PF_EXITING) {
2120 /* ignore this task if it's going away */
2121 task_unlock(tsk);
2122 continue;
2123 }
2124 oldcg = tsk->cgroups; 2123 oldcg = tsk->cgroups;
2125 get_css_set(oldcg); 2124 get_css_set(oldcg);
2126 task_unlock(tsk); 2125 task_unlock(tsk);
@@ -2153,16 +2152,12 @@ int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2153 oldcgrp = task_cgroup_from_root(tsk, root); 2152 oldcgrp = task_cgroup_from_root(tsk, root);
2154 if (cgrp == oldcgrp) 2153 if (cgrp == oldcgrp)
2155 continue; 2154 continue;
2156 /* if the thread is PF_EXITING, it can just get skipped. */
2157 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true); 2155 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2158 if (retval == 0) { 2156 BUG_ON(retval);
2159 /* attach each task to each subsystem */ 2157 /* attach each task to each subsystem */
2160 for_each_subsys(root, ss) { 2158 for_each_subsys(root, ss) {
2161 if (ss->attach_task) 2159 if (ss->attach_task)
2162 ss->attach_task(cgrp, tsk); 2160 ss->attach_task(cgrp, tsk);
2163 }
2164 } else {
2165 BUG_ON(retval != -ESRCH);
2166 } 2161 }
2167 } 2162 }
2168 /* nothing is sensitive to fork() after this point. */ 2163 /* nothing is sensitive to fork() after this point. */
@@ -2215,8 +2210,8 @@ out_free_group_list:
2215 2210
2216/* 2211/*
2217 * Find the task_struct of the task to attach by vpid and pass it along to the 2212 * Find the task_struct of the task to attach by vpid and pass it along to the
2218 * function to attach either it or all tasks in its threadgroup. Will take 2213 * function to attach either it or all tasks in its threadgroup. Will lock
2219 * cgroup_mutex; may take task_lock of task. 2214 * cgroup_mutex and threadgroup; may take task_lock of task.
2220 */ 2215 */
2221static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup) 2216static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2222{ 2217{
@@ -2243,11 +2238,6 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2243 * detect it later. 2238 * detect it later.
2244 */ 2239 */
2245 tsk = tsk->group_leader; 2240 tsk = tsk->group_leader;
2246 } else if (tsk->flags & PF_EXITING) {
2247 /* optimization for the single-task-only case */
2248 rcu_read_unlock();
2249 cgroup_unlock();
2250 return -ESRCH;
2251 } 2241 }
2252 /* 2242 /*
2253 * even if we're attaching all tasks in the thread group, we 2243 * even if we're attaching all tasks in the thread group, we
@@ -2271,13 +2261,15 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2271 get_task_struct(tsk); 2261 get_task_struct(tsk);
2272 } 2262 }
2273 2263
2274 if (threadgroup) { 2264 threadgroup_lock(tsk);
2275 threadgroup_lock(tsk); 2265
2266 if (threadgroup)
2276 ret = cgroup_attach_proc(cgrp, tsk); 2267 ret = cgroup_attach_proc(cgrp, tsk);
2277 threadgroup_unlock(tsk); 2268 else
2278 } else {
2279 ret = cgroup_attach_task(cgrp, tsk); 2269 ret = cgroup_attach_task(cgrp, tsk);
2280 } 2270
2271 threadgroup_unlock(tsk);
2272
2281 put_task_struct(tsk); 2273 put_task_struct(tsk);
2282 cgroup_unlock(); 2274 cgroup_unlock();
2283 return ret; 2275 return ret;