aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2016-03-15 20:43:04 -0400
committerTejun Heo <tj@kernel.org>2016-03-16 16:31:46 -0400
commit2b021cbf3cb6208f0d40fd2f1869f237934340ed (patch)
treed5da3706354863607f2522cd0416e1de5993732f
parent6c83e6cb0ce897818878a7d3b1b25d5dc8f611a2 (diff)
cgroup: ignore css_sets associated with dead cgroups during migration
Before 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups"), all dead tasks were associated with init_css_set. If a zombie task is requested for migration, while migration prep operations would still be performed on init_css_set, the actual migration would ignore zombie tasks. As init_css_set is always valid, this worked fine. However, after 2e91fa7f6d45, zombie tasks stay with the css_set it was associated with at the time of death. Let's say a task T associated with cgroup A on hierarchy H-1 and cgroup B on hiearchy H-2. After T becomes a zombie, it would still remain associated with A and B. If A only contains zombie tasks, it can be removed. On removal, A gets marked offline but stays pinned until all zombies are drained. At this point, if migration is initiated on T to a cgroup C on hierarchy H-2, migration path would try to prepare T's css_set for migration and trigger the following. WARNING: CPU: 0 PID: 1576 at kernel/cgroup.c:474 cgroup_get+0x121/0x160() CPU: 0 PID: 1576 Comm: bash Not tainted 4.4.0-work+ #289 ... Call Trace: [<ffffffff8127e63c>] dump_stack+0x4e/0x82 [<ffffffff810445e8>] warn_slowpath_common+0x78/0xb0 [<ffffffff810446d5>] warn_slowpath_null+0x15/0x20 [<ffffffff810c33e1>] cgroup_get+0x121/0x160 [<ffffffff810c349b>] link_css_set+0x7b/0x90 [<ffffffff810c4fbc>] find_css_set+0x3bc/0x5e0 [<ffffffff810c5269>] cgroup_migrate_prepare_dst+0x89/0x1f0 [<ffffffff810c7547>] cgroup_attach_task+0x157/0x230 [<ffffffff810c7a17>] __cgroup_procs_write+0x2b7/0x470 [<ffffffff810c7bdc>] cgroup_tasks_write+0xc/0x10 [<ffffffff810c4790>] cgroup_file_write+0x30/0x1b0 [<ffffffff811c68fc>] kernfs_fop_write+0x13c/0x180 [<ffffffff81151673>] __vfs_write+0x23/0xe0 [<ffffffff81152494>] vfs_write+0xa4/0x1a0 [<ffffffff811532d4>] SyS_write+0x44/0xa0 [<ffffffff814af2d7>] entry_SYSCALL_64_fastpath+0x12/0x6f It doesn't make sense to prepare migration for css_sets pointing to dead cgroups as they are guaranteed to contain only zombies which are ignored later during migration. This patch makes cgroup destruction path mark all affected css_sets as dead and updates the migration path to ignore them during preparation. Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups") Cc: stable@vger.kernel.org # v4.4+
-rw-r--r--include/linux/cgroup-defs.h3
-rw-r--r--kernel/cgroup.c20
2 files changed, 21 insertions, 2 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 34b42f03fcd8..3e39ae5bc799 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -212,6 +212,9 @@ struct css_set {
212 /* all css_task_iters currently walking this cset */ 212 /* all css_task_iters currently walking this cset */
213 struct list_head task_iters; 213 struct list_head task_iters;
214 214
215 /* dead and being drained, ignore for migration */
216 bool dead;
217
215 /* For RCU-protected deletion */ 218 /* For RCU-protected deletion */
216 struct rcu_head rcu_head; 219 struct rcu_head rcu_head;
217}; 220};
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e22df5d81e59..d57318950076 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2516,6 +2516,14 @@ static void cgroup_migrate_add_src(struct css_set *src_cset,
2516 lockdep_assert_held(&cgroup_mutex); 2516 lockdep_assert_held(&cgroup_mutex);
2517 lockdep_assert_held(&css_set_lock); 2517 lockdep_assert_held(&css_set_lock);
2518 2518
2519 /*
2520 * If ->dead, @src_set is associated with one or more dead cgroups
2521 * and doesn't contain any migratable tasks. Ignore it early so
2522 * that the rest of migration path doesn't get confused by it.
2523 */
2524 if (src_cset->dead)
2525 return;
2526
2519 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); 2527 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2520 2528
2521 if (!list_empty(&src_cset->mg_preload_node)) 2529 if (!list_empty(&src_cset->mg_preload_node))
@@ -5258,6 +5266,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
5258 __releases(&cgroup_mutex) __acquires(&cgroup_mutex) 5266 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
5259{ 5267{
5260 struct cgroup_subsys_state *css; 5268 struct cgroup_subsys_state *css;
5269 struct cgrp_cset_link *link;
5261 int ssid; 5270 int ssid;
5262 5271
5263 lockdep_assert_held(&cgroup_mutex); 5272 lockdep_assert_held(&cgroup_mutex);
@@ -5278,11 +5287,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
5278 return -EBUSY; 5287 return -EBUSY;
5279 5288
5280 /* 5289 /*
5281 * Mark @cgrp dead. This prevents further task migration and child 5290 * Mark @cgrp and the associated csets dead. The former prevents
5282 * creation by disabling cgroup_lock_live_group(). 5291 * further task migration and child creation by disabling
5292 * cgroup_lock_live_group(). The latter makes the csets ignored by
5293 * the migration path.
5283 */ 5294 */
5284 cgrp->self.flags &= ~CSS_ONLINE; 5295 cgrp->self.flags &= ~CSS_ONLINE;
5285 5296
5297 spin_lock_bh(&css_set_lock);
5298 list_for_each_entry(link, &cgrp->cset_links, cset_link)
5299 link->cset->dead = true;
5300 spin_unlock_bh(&css_set_lock);
5301
5286 /* initiate massacre of all css's */ 5302 /* initiate massacre of all css's */
5287 for_each_css(css, ssid, cgrp) 5303 for_each_css(css, ssid, cgrp)
5288 kill_css(css); 5304 kill_css(css);