aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2015-09-11 15:00:19 -0400
committerTejun Heo <tj@kernel.org>2015-09-22 12:46:53 -0400
commit4530eddb59494b89650d6bcd980fc7f7717ad80c (patch)
tree0f6212afa2e7e0e4724c5f12644508f1a0baf138
parent3df9ca0a2b8b50db5a079ae9d97c5b55435e9a6c (diff)
cgroup, memcg, cpuset: implement cgroup_taskset_for_each_leader()
It wasn't explicitly documented but, when a process is being migrated, cpuset and memcg depend on cgroup_taskset_first() returning the threadgroup leader; however, this approach is somewhat ghetto and would no longer work for the planned multi-process migration. This patch introduces explicit cgroup_taskset_for_each_leader() which iterates over only the threadgroup leaders and replaces cgroup_taskset_first() usages for accessing the leader with it. This prepares both memcg and cpuset for multi-process migration. This patch also updates the documentation for cgroup_taskset_for_each() to clarify the iteration rules and removes comments mentioning task ordering in tasksets. v2: A previous patch which added threadgroup leader test was dropped. Patch updated accordingly. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Zefan Li <lizefan@huawei.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org>
-rw-r--r--include/linux/cgroup.h22
-rw-r--r--kernel/cgroup.c11
-rw-r--r--kernel/cpuset.c9
-rw-r--r--mm/memcontrol.c17
4 files changed, 41 insertions, 18 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index fb717f2cba5b..e9c3eac074e2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -232,11 +232,33 @@ void css_task_iter_end(struct css_task_iter *it);
232 * cgroup_taskset_for_each - iterate cgroup_taskset 232 * cgroup_taskset_for_each - iterate cgroup_taskset
233 * @task: the loop cursor 233 * @task: the loop cursor
234 * @tset: taskset to iterate 234 * @tset: taskset to iterate
235 *
236 * @tset may contain multiple tasks and they may belong to multiple
237 * processes. When there are multiple tasks in @tset, if a task of a
238 * process is in @tset, all tasks of the process are in @tset. Also, all
239 * are guaranteed to share the same source and destination csses.
240 *
241 * Iteration is not in any specific order.
235 */ 242 */
236#define cgroup_taskset_for_each(task, tset) \ 243#define cgroup_taskset_for_each(task, tset) \
237 for ((task) = cgroup_taskset_first((tset)); (task); \ 244 for ((task) = cgroup_taskset_first((tset)); (task); \
238 (task) = cgroup_taskset_next((tset))) 245 (task) = cgroup_taskset_next((tset)))
239 246
247/**
248 * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
249 * @leader: the loop cursor
250 * @tset: takset to iterate
251 *
252 * Iterate threadgroup leaders of @tset. For single-task migrations, @tset
253 * may not contain any.
254 */
255#define cgroup_taskset_for_each_leader(leader, tset) \
256 for ((leader) = cgroup_taskset_first((tset)); (leader); \
257 (leader) = cgroup_taskset_next((tset))) \
258 if ((leader) != (leader)->group_leader) \
259 ; \
260 else
261
240/* 262/*
241 * Inline functions. 263 * Inline functions.
242 */ 264 */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0be276ffe08a..7f4b85af03dc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2217,13 +2217,6 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
2217 2217
2218 get_css_set(new_cset); 2218 get_css_set(new_cset);
2219 rcu_assign_pointer(tsk->cgroups, new_cset); 2219 rcu_assign_pointer(tsk->cgroups, new_cset);
2220
2221 /*
2222 * Use move_tail so that cgroup_taskset_first() still returns the
2223 * leader after migration. This works because cgroup_migrate()
2224 * ensures that the dst_cset of the leader is the first on the
2225 * tset's dst_csets list.
2226 */
2227 list_move_tail(&tsk->cg_list, &new_cset->mg_tasks); 2220 list_move_tail(&tsk->cg_list, &new_cset->mg_tasks);
2228 2221
2229 /* 2222 /*
@@ -2419,10 +2412,6 @@ static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader,
2419 if (!cset->mg_src_cgrp) 2412 if (!cset->mg_src_cgrp)
2420 goto next; 2413 goto next;
2421 2414
2422 /*
2423 * cgroup_taskset_first() must always return the leader.
2424 * Take care to avoid disturbing the ordering.
2425 */
2426 list_move_tail(&task->cg_list, &cset->mg_tasks); 2415 list_move_tail(&task->cg_list, &cset->mg_tasks);
2427 if (list_empty(&cset->mg_node)) 2416 if (list_empty(&cset->mg_node))
2428 list_add_tail(&cset->mg_node, &tset.src_csets); 2417 list_add_tail(&cset->mg_node, &tset.src_csets);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 0b361a0b58f6..e4d999929903 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1488,7 +1488,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
1488 /* static buf protected by cpuset_mutex */ 1488 /* static buf protected by cpuset_mutex */
1489 static nodemask_t cpuset_attach_nodemask_to; 1489 static nodemask_t cpuset_attach_nodemask_to;
1490 struct task_struct *task; 1490 struct task_struct *task;
1491 struct task_struct *leader = cgroup_taskset_first(tset); 1491 struct task_struct *leader;
1492 struct cpuset *cs = css_cs(css); 1492 struct cpuset *cs = css_cs(css);
1493 struct cpuset *oldcs = cpuset_attach_old_cs; 1493 struct cpuset *oldcs = cpuset_attach_old_cs;
1494 1494
@@ -1514,12 +1514,11 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
1514 } 1514 }
1515 1515
1516 /* 1516 /*
1517 * Change mm, possibly for multiple threads in a threadgroup. This 1517 * Change mm for all threadgroup leaders. This is expensive and may
1518 * is expensive and may sleep and should be moved outside migration 1518 * sleep and should be moved outside migration path proper.
1519 * path proper.
1520 */ 1519 */
1521 cpuset_attach_nodemask_to = cs->effective_mems; 1520 cpuset_attach_nodemask_to = cs->effective_mems;
1522 if (thread_group_leader(leader)) { 1521 cgroup_taskset_for_each_leader(leader, tset) {
1523 struct mm_struct *mm = get_task_mm(leader); 1522 struct mm_struct *mm = get_task_mm(leader);
1524 1523
1525 if (mm) { 1524 if (mm) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9f331402e502..33c8dad6830f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4828,7 +4828,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
4828{ 4828{
4829 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 4829 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
4830 struct mem_cgroup *from; 4830 struct mem_cgroup *from;
4831 struct task_struct *p; 4831 struct task_struct *leader, *p;
4832 struct mm_struct *mm; 4832 struct mm_struct *mm;
4833 unsigned long move_flags; 4833 unsigned long move_flags;
4834 int ret = 0; 4834 int ret = 0;
@@ -4842,7 +4842,20 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
4842 if (!move_flags) 4842 if (!move_flags)
4843 return 0; 4843 return 0;
4844 4844
4845 p = cgroup_taskset_first(tset); 4845 /*
4846 * Multi-process migrations only happen on the default hierarchy
4847 * where charge immigration is not used. Perform charge
4848 * immigration if @tset contains a leader and whine if there are
4849 * multiple.
4850 */
4851 p = NULL;
4852 cgroup_taskset_for_each_leader(leader, tset) {
4853 WARN_ON_ONCE(p);
4854 p = leader;
4855 }
4856 if (!p)
4857 return 0;
4858
4846 from = mem_cgroup_from_task(p); 4859 from = mem_cgroup_from_task(p);
4847 4860
4848 VM_BUG_ON(from == memcg); 4861 VM_BUG_ON(from == memcg);