diff options
author | Tejun Heo <tj@kernel.org> | 2015-09-11 15:00:19 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2015-09-22 12:46:53 -0400 |
commit | 4530eddb59494b89650d6bcd980fc7f7717ad80c (patch) | |
tree | 0f6212afa2e7e0e4724c5f12644508f1a0baf138 | |
parent | 3df9ca0a2b8b50db5a079ae9d97c5b55435e9a6c (diff) |
cgroup, memcg, cpuset: implement cgroup_taskset_for_each_leader()
It wasn't explicitly documented but, when a process is being migrated,
cpuset and memcg depend on cgroup_taskset_first() returning the
threadgroup leader; however, this approach is somewhat ghetto and
would no longer work for the planned multi-process migration.
This patch introduces explicit cgroup_taskset_for_each_leader() which
iterates over only the threadgroup leaders and replaces
cgroup_taskset_first() usages for accessing the leader with it.
This prepares both memcg and cpuset for multi-process migration. This
patch also updates the documentation for cgroup_taskset_for_each() to
clarify the iteration rules and removes comments mentioning task
ordering in tasksets.
v2: A previous patch which added threadgroup leader test was dropped.
Patch updated accordingly.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
-rw-r--r-- | include/linux/cgroup.h | 22 | ||||
-rw-r--r-- | kernel/cgroup.c | 11 | ||||
-rw-r--r-- | kernel/cpuset.c | 9 | ||||
-rw-r--r-- | mm/memcontrol.c | 17 |
4 files changed, 41 insertions, 18 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index fb717f2cba5b..e9c3eac074e2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -232,11 +232,33 @@ void css_task_iter_end(struct css_task_iter *it); | |||
232 | * cgroup_taskset_for_each - iterate cgroup_taskset | 232 | * cgroup_taskset_for_each - iterate cgroup_taskset |
233 | * @task: the loop cursor | 233 | * @task: the loop cursor |
234 | * @tset: taskset to iterate | 234 | * @tset: taskset to iterate |
235 | * | ||
236 | * @tset may contain multiple tasks and they may belong to multiple | ||
237 | * processes. When there are multiple tasks in @tset, if a task of a | ||
238 | * process is in @tset, all tasks of the process are in @tset. Also, all | ||
239 | * are guaranteed to share the same source and destination csses. | ||
240 | * | ||
241 | * Iteration is not in any specific order. | ||
235 | */ | 242 | */ |
236 | #define cgroup_taskset_for_each(task, tset) \ | 243 | #define cgroup_taskset_for_each(task, tset) \ |
237 | for ((task) = cgroup_taskset_first((tset)); (task); \ | 244 | for ((task) = cgroup_taskset_first((tset)); (task); \ |
238 | (task) = cgroup_taskset_next((tset))) | 245 | (task) = cgroup_taskset_next((tset))) |
239 | 246 | ||
247 | /** | ||
248 | * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset | ||
249 | * @leader: the loop cursor | ||
250 | * @tset: takset to iterate | ||
251 | * | ||
252 | * Iterate threadgroup leaders of @tset. For single-task migrations, @tset | ||
253 | * may not contain any. | ||
254 | */ | ||
255 | #define cgroup_taskset_for_each_leader(leader, tset) \ | ||
256 | for ((leader) = cgroup_taskset_first((tset)); (leader); \ | ||
257 | (leader) = cgroup_taskset_next((tset))) \ | ||
258 | if ((leader) != (leader)->group_leader) \ | ||
259 | ; \ | ||
260 | else | ||
261 | |||
240 | /* | 262 | /* |
241 | * Inline functions. | 263 | * Inline functions. |
242 | */ | 264 | */ |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0be276ffe08a..7f4b85af03dc 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -2217,13 +2217,6 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, | |||
2217 | 2217 | ||
2218 | get_css_set(new_cset); | 2218 | get_css_set(new_cset); |
2219 | rcu_assign_pointer(tsk->cgroups, new_cset); | 2219 | rcu_assign_pointer(tsk->cgroups, new_cset); |
2220 | |||
2221 | /* | ||
2222 | * Use move_tail so that cgroup_taskset_first() still returns the | ||
2223 | * leader after migration. This works because cgroup_migrate() | ||
2224 | * ensures that the dst_cset of the leader is the first on the | ||
2225 | * tset's dst_csets list. | ||
2226 | */ | ||
2227 | list_move_tail(&tsk->cg_list, &new_cset->mg_tasks); | 2220 | list_move_tail(&tsk->cg_list, &new_cset->mg_tasks); |
2228 | 2221 | ||
2229 | /* | 2222 | /* |
@@ -2419,10 +2412,6 @@ static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader, | |||
2419 | if (!cset->mg_src_cgrp) | 2412 | if (!cset->mg_src_cgrp) |
2420 | goto next; | 2413 | goto next; |
2421 | 2414 | ||
2422 | /* | ||
2423 | * cgroup_taskset_first() must always return the leader. | ||
2424 | * Take care to avoid disturbing the ordering. | ||
2425 | */ | ||
2426 | list_move_tail(&task->cg_list, &cset->mg_tasks); | 2415 | list_move_tail(&task->cg_list, &cset->mg_tasks); |
2427 | if (list_empty(&cset->mg_node)) | 2416 | if (list_empty(&cset->mg_node)) |
2428 | list_add_tail(&cset->mg_node, &tset.src_csets); | 2417 | list_add_tail(&cset->mg_node, &tset.src_csets); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 0b361a0b58f6..e4d999929903 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1488,7 +1488,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, | |||
1488 | /* static buf protected by cpuset_mutex */ | 1488 | /* static buf protected by cpuset_mutex */ |
1489 | static nodemask_t cpuset_attach_nodemask_to; | 1489 | static nodemask_t cpuset_attach_nodemask_to; |
1490 | struct task_struct *task; | 1490 | struct task_struct *task; |
1491 | struct task_struct *leader = cgroup_taskset_first(tset); | 1491 | struct task_struct *leader; |
1492 | struct cpuset *cs = css_cs(css); | 1492 | struct cpuset *cs = css_cs(css); |
1493 | struct cpuset *oldcs = cpuset_attach_old_cs; | 1493 | struct cpuset *oldcs = cpuset_attach_old_cs; |
1494 | 1494 | ||
@@ -1514,12 +1514,11 @@ static void cpuset_attach(struct cgroup_subsys_state *css, | |||
1514 | } | 1514 | } |
1515 | 1515 | ||
1516 | /* | 1516 | /* |
1517 | * Change mm, possibly for multiple threads in a threadgroup. This | 1517 | * Change mm for all threadgroup leaders. This is expensive and may |
1518 | * is expensive and may sleep and should be moved outside migration | 1518 | * sleep and should be moved outside migration path proper. |
1519 | * path proper. | ||
1520 | */ | 1519 | */ |
1521 | cpuset_attach_nodemask_to = cs->effective_mems; | 1520 | cpuset_attach_nodemask_to = cs->effective_mems; |
1522 | if (thread_group_leader(leader)) { | 1521 | cgroup_taskset_for_each_leader(leader, tset) { |
1523 | struct mm_struct *mm = get_task_mm(leader); | 1522 | struct mm_struct *mm = get_task_mm(leader); |
1524 | 1523 | ||
1525 | if (mm) { | 1524 | if (mm) { |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9f331402e502..33c8dad6830f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -4828,7 +4828,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, | |||
4828 | { | 4828 | { |
4829 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 4829 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
4830 | struct mem_cgroup *from; | 4830 | struct mem_cgroup *from; |
4831 | struct task_struct *p; | 4831 | struct task_struct *leader, *p; |
4832 | struct mm_struct *mm; | 4832 | struct mm_struct *mm; |
4833 | unsigned long move_flags; | 4833 | unsigned long move_flags; |
4834 | int ret = 0; | 4834 | int ret = 0; |
@@ -4842,7 +4842,20 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, | |||
4842 | if (!move_flags) | 4842 | if (!move_flags) |
4843 | return 0; | 4843 | return 0; |
4844 | 4844 | ||
4845 | p = cgroup_taskset_first(tset); | 4845 | /* |
4846 | * Multi-process migrations only happen on the default hierarchy | ||
4847 | * where charge immigration is not used. Perform charge | ||
4848 | * immigration if @tset contains a leader and whine if there are | ||
4849 | * multiple. | ||
4850 | */ | ||
4851 | p = NULL; | ||
4852 | cgroup_taskset_for_each_leader(leader, tset) { | ||
4853 | WARN_ON_ONCE(p); | ||
4854 | p = leader; | ||
4855 | } | ||
4856 | if (!p) | ||
4857 | return 0; | ||
4858 | |||
4846 | from = mem_cgroup_from_task(p); | 4859 | from = mem_cgroup_from_task(p); |
4847 | 4860 | ||
4848 | VM_BUG_ON(from == memcg); | 4861 | VM_BUG_ON(from == memcg); |