diff options
author | Tejun Heo <tj@kernel.org> | 2016-03-15 20:43:04 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2016-03-16 16:31:46 -0400 |
commit | 2b021cbf3cb6208f0d40fd2f1869f237934340ed (patch) | |
tree | d5da3706354863607f2522cd0416e1de5993732f | |
parent | 6c83e6cb0ce897818878a7d3b1b25d5dc8f611a2 (diff) |
cgroup: ignore css_sets associated with dead cgroups during migration
Before 2e91fa7f6d45 ("cgroup: keep zombies associated with their
original cgroups"), all dead tasks were associated with init_css_set.
If a zombie task is requested for migration, while migration prep
operations would still be performed on init_css_set, the actual
migration would ignore zombie tasks. As init_css_set is always valid,
this worked fine.
However, after 2e91fa7f6d45, zombie tasks stay with the css_set it was
associated with at the time of death. Let's say a task T associated
with cgroup A on hierarchy H-1 and cgroup B on hiearchy H-2. After T
becomes a zombie, it would still remain associated with A and B. If A
only contains zombie tasks, it can be removed. On removal, A gets
marked offline but stays pinned until all zombies are drained. At
this point, if migration is initiated on T to a cgroup C on hierarchy
H-2, migration path would try to prepare T's css_set for migration and
trigger the following.
WARNING: CPU: 0 PID: 1576 at kernel/cgroup.c:474 cgroup_get+0x121/0x160()
CPU: 0 PID: 1576 Comm: bash Not tainted 4.4.0-work+ #289
...
Call Trace:
[<ffffffff8127e63c>] dump_stack+0x4e/0x82
[<ffffffff810445e8>] warn_slowpath_common+0x78/0xb0
[<ffffffff810446d5>] warn_slowpath_null+0x15/0x20
[<ffffffff810c33e1>] cgroup_get+0x121/0x160
[<ffffffff810c349b>] link_css_set+0x7b/0x90
[<ffffffff810c4fbc>] find_css_set+0x3bc/0x5e0
[<ffffffff810c5269>] cgroup_migrate_prepare_dst+0x89/0x1f0
[<ffffffff810c7547>] cgroup_attach_task+0x157/0x230
[<ffffffff810c7a17>] __cgroup_procs_write+0x2b7/0x470
[<ffffffff810c7bdc>] cgroup_tasks_write+0xc/0x10
[<ffffffff810c4790>] cgroup_file_write+0x30/0x1b0
[<ffffffff811c68fc>] kernfs_fop_write+0x13c/0x180
[<ffffffff81151673>] __vfs_write+0x23/0xe0
[<ffffffff81152494>] vfs_write+0xa4/0x1a0
[<ffffffff811532d4>] SyS_write+0x44/0xa0
[<ffffffff814af2d7>] entry_SYSCALL_64_fastpath+0x12/0x6f
It doesn't make sense to prepare migration for css_sets pointing to
dead cgroups as they are guaranteed to contain only zombies which are
ignored later during migration. This patch makes cgroup destruction
path mark all affected css_sets as dead and updates the migration path
to ignore them during preparation.
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups")
Cc: stable@vger.kernel.org # v4.4+
-rw-r--r-- | include/linux/cgroup-defs.h | 3 | ||||
-rw-r--r-- | kernel/cgroup.c | 20 |
2 files changed, 21 insertions, 2 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 34b42f03fcd8..3e39ae5bc799 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h | |||
@@ -212,6 +212,9 @@ struct css_set { | |||
212 | /* all css_task_iters currently walking this cset */ | 212 | /* all css_task_iters currently walking this cset */ |
213 | struct list_head task_iters; | 213 | struct list_head task_iters; |
214 | 214 | ||
215 | /* dead and being drained, ignore for migration */ | ||
216 | bool dead; | ||
217 | |||
215 | /* For RCU-protected deletion */ | 218 | /* For RCU-protected deletion */ |
216 | struct rcu_head rcu_head; | 219 | struct rcu_head rcu_head; |
217 | }; | 220 | }; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e22df5d81e59..d57318950076 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -2516,6 +2516,14 @@ static void cgroup_migrate_add_src(struct css_set *src_cset, | |||
2516 | lockdep_assert_held(&cgroup_mutex); | 2516 | lockdep_assert_held(&cgroup_mutex); |
2517 | lockdep_assert_held(&css_set_lock); | 2517 | lockdep_assert_held(&css_set_lock); |
2518 | 2518 | ||
2519 | /* | ||
2520 | * If ->dead, @src_set is associated with one or more dead cgroups | ||
2521 | * and doesn't contain any migratable tasks. Ignore it early so | ||
2522 | * that the rest of migration path doesn't get confused by it. | ||
2523 | */ | ||
2524 | if (src_cset->dead) | ||
2525 | return; | ||
2526 | |||
2519 | src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); | 2527 | src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); |
2520 | 2528 | ||
2521 | if (!list_empty(&src_cset->mg_preload_node)) | 2529 | if (!list_empty(&src_cset->mg_preload_node)) |
@@ -5258,6 +5266,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
5258 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) | 5266 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) |
5259 | { | 5267 | { |
5260 | struct cgroup_subsys_state *css; | 5268 | struct cgroup_subsys_state *css; |
5269 | struct cgrp_cset_link *link; | ||
5261 | int ssid; | 5270 | int ssid; |
5262 | 5271 | ||
5263 | lockdep_assert_held(&cgroup_mutex); | 5272 | lockdep_assert_held(&cgroup_mutex); |
@@ -5278,11 +5287,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
5278 | return -EBUSY; | 5287 | return -EBUSY; |
5279 | 5288 | ||
5280 | /* | 5289 | /* |
5281 | * Mark @cgrp dead. This prevents further task migration and child | 5290 | * Mark @cgrp and the associated csets dead. The former prevents |
5282 | * creation by disabling cgroup_lock_live_group(). | 5291 | * further task migration and child creation by disabling |
5292 | * cgroup_lock_live_group(). The latter makes the csets ignored by | ||
5293 | * the migration path. | ||
5283 | */ | 5294 | */ |
5284 | cgrp->self.flags &= ~CSS_ONLINE; | 5295 | cgrp->self.flags &= ~CSS_ONLINE; |
5285 | 5296 | ||
5297 | spin_lock_bh(&css_set_lock); | ||
5298 | list_for_each_entry(link, &cgrp->cset_links, cset_link) | ||
5299 | link->cset->dead = true; | ||
5300 | spin_unlock_bh(&css_set_lock); | ||
5301 | |||
5286 | /* initiate massacre of all css's */ | 5302 | /* initiate massacre of all css's */ |
5287 | for_each_css(css, ssid, cgrp) | 5303 | for_each_css(css, ssid, cgrp) |
5288 | kill_css(css); | 5304 | kill_css(css); |