aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/cgroup.h
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2015-12-03 10:18:21 -0500
committerTejun Heo <tj@kernel.org>2015-12-03 10:18:21 -0500
commit1f7dd3e5a6e4f093017fff12232572ee1aa4639b (patch)
tree2820e6f3fefd3c92ef2f7e58f688a8e2f2211aff /include/linux/cgroup.h
parent599c963a0f19b14132065788322207eaa58bc7f8 (diff)
cgroup: fix handling of multi-destination migration from subtree_control enabling
Consider the following v2 hierarchy. P0 (+memory) --- P1 (-memory) --- A \- B P0 has memory enabled in its subtree_control while P1 doesn't. If both A and B contain processes, they would belong to the memory css of P1. Now if memory is enabled on P1's subtree_control, memory csses should be created on both A and B and A's processes should be moved to the former and B's processes the latter. IOW, enabling controllers can cause atomic migrations into different csses. The core cgroup migration logic has been updated accordingly but the controller migration methods haven't and still assume that all tasks migrate to a single target css; furthermore, the methods were fed the css in which subtree_control was updated which is the parent of the target csses. pids controller depends on the migration methods to move charges and this made the controller attribute charges to the wrong csses often triggering the following warning by driving a counter negative. WARNING: CPU: 1 PID: 1 at kernel/cgroup_pids.c:97 pids_cancel.constprop.6+0x31/0x40() Modules linked in: CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc1+ #29 ... ffffffff81f65382 ffff88007c043b90 ffffffff81551ffc 0000000000000000 ffff88007c043bc8 ffffffff810de202 ffff88007a752000 ffff88007a29ab00 ffff88007c043c80 ffff88007a1d8400 0000000000000001 ffff88007c043bd8 Call Trace: [<ffffffff81551ffc>] dump_stack+0x4e/0x82 [<ffffffff810de202>] warn_slowpath_common+0x82/0xc0 [<ffffffff810de2fa>] warn_slowpath_null+0x1a/0x20 [<ffffffff8118e031>] pids_cancel.constprop.6+0x31/0x40 [<ffffffff8118e0fd>] pids_can_attach+0x6d/0xf0 [<ffffffff81188a4c>] cgroup_taskset_migrate+0x6c/0x330 [<ffffffff81188e05>] cgroup_migrate+0xf5/0x190 [<ffffffff81189016>] cgroup_attach_task+0x176/0x200 [<ffffffff8118949d>] __cgroup_procs_write+0x2ad/0x460 [<ffffffff81189684>] cgroup_procs_write+0x14/0x20 [<ffffffff811854e5>] cgroup_file_write+0x35/0x1c0 [<ffffffff812e26f1>] kernfs_fop_write+0x141/0x190 [<ffffffff81265f88>] __vfs_write+0x28/0xe0 [<ffffffff812666fc>] vfs_write+0xac/0x1a0 [<ffffffff81267019>] SyS_write+0x49/0xb0 [<ffffffff81bcef32>] entry_SYSCALL_64_fastpath+0x12/0x76 This patch fixes the bug by removing @css parameter from the three migration methods, ->can_attach, ->cancel_attach() and ->attach() and updating cgroup_taskset iteration helpers also return the destination css in addition to the task being migrated. All controllers are updated accordingly. * Controllers which don't care whether there are one or multiple target csses can be converted trivially. cpu, io, freezer, perf, netclassid and netprio fall in this category. * cpuset's current implementation assumes that there's single source and destination and thus doesn't support v2 hierarchy already. The only change made by this patchset is how that single destination css is obtained. * memory migration path already doesn't do anything on v2. How the single destination css is obtained is updated and the prep stage of mem_cgroup_can_attach() is reordered to accomodate the change. * pids is the only controller which was affected by this bug. It now correctly handles multi-destination migrations and no longer causes counter underflow from incorrect accounting. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-and-tested-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Cc: Aleksa Sarai <cyphar@cyphar.com>
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r--include/linux/cgroup.h33
1 files changed, 22 insertions, 11 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f64083030ad5..cb91b44f5f78 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -120,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state
120struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos, 120struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
121 struct cgroup_subsys_state *css); 121 struct cgroup_subsys_state *css);
122 122
123struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); 123struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
124struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); 124 struct cgroup_subsys_state **dst_cssp);
125struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
126 struct cgroup_subsys_state **dst_cssp);
125 127
126void css_task_iter_start(struct cgroup_subsys_state *css, 128void css_task_iter_start(struct cgroup_subsys_state *css,
127 struct css_task_iter *it); 129 struct css_task_iter *it);
@@ -236,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it);
236/** 238/**
237 * cgroup_taskset_for_each - iterate cgroup_taskset 239 * cgroup_taskset_for_each - iterate cgroup_taskset
238 * @task: the loop cursor 240 * @task: the loop cursor
241 * @dst_css: the destination css
239 * @tset: taskset to iterate 242 * @tset: taskset to iterate
240 * 243 *
241 * @tset may contain multiple tasks and they may belong to multiple 244 * @tset may contain multiple tasks and they may belong to multiple
242 * processes. When there are multiple tasks in @tset, if a task of a 245 * processes.
243 * process is in @tset, all tasks of the process are in @tset. Also, all 246 *
244 * are guaranteed to share the same source and destination csses. 247 * On the v2 hierarchy, there may be tasks from multiple processes and they
248 * may not share the source or destination csses.
249 *
250 * On traditional hierarchies, when there are multiple tasks in @tset, if a
251 * task of a process is in @tset, all tasks of the process are in @tset.
252 * Also, all are guaranteed to share the same source and destination csses.
245 * 253 *
246 * Iteration is not in any specific order. 254 * Iteration is not in any specific order.
247 */ 255 */
248#define cgroup_taskset_for_each(task, tset) \ 256#define cgroup_taskset_for_each(task, dst_css, tset) \
249 for ((task) = cgroup_taskset_first((tset)); (task); \ 257 for ((task) = cgroup_taskset_first((tset), &(dst_css)); \
250 (task) = cgroup_taskset_next((tset))) 258 (task); \
259 (task) = cgroup_taskset_next((tset), &(dst_css)))
251 260
252/** 261/**
253 * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset 262 * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
254 * @leader: the loop cursor 263 * @leader: the loop cursor
264 * @dst_css: the destination css
255 * @tset: takset to iterate 265 * @tset: takset to iterate
256 * 266 *
257 * Iterate threadgroup leaders of @tset. For single-task migrations, @tset 267 * Iterate threadgroup leaders of @tset. For single-task migrations, @tset
258 * may not contain any. 268 * may not contain any.
259 */ 269 */
260#define cgroup_taskset_for_each_leader(leader, tset) \ 270#define cgroup_taskset_for_each_leader(leader, dst_css, tset) \
261 for ((leader) = cgroup_taskset_first((tset)); (leader); \ 271 for ((leader) = cgroup_taskset_first((tset), &(dst_css)); \
262 (leader) = cgroup_taskset_next((tset))) \ 272 (leader); \
273 (leader) = cgroup_taskset_next((tset), &(dst_css))) \
263 if ((leader) != (leader)->group_leader) \ 274 if ((leader) != (leader)->group_leader) \
264 ; \ 275 ; \
265 else 276 else