cgroups: let ss->can_attach and ss->attach do whole threadgroups at a time

Alter the ss->can_attach and ss->attach functions to be able to deal with a whole threadgroup at a time, for use in cgroup_attach_proc. (This is a pre-patch to cgroup-procs-writable.patch.) Currently, new mode of the attach function can only tell the subsystem about the old cgroup of the threadgroup leader. No subsystem currently needs that information for each thread that's being moved, but if one were to be added (for example, one that counts tasks within a group) this bit would need to be reworked a bit to tell the subsystem the right information. [hidave.darkstar@gmail.com: fix build] Signed-off-by: Ben Blum <bblum@google.com> Signed-off-by: Paul Menage <menage@google.com> Acked-by: Li Zefan <lizf@cn.fujitsu.com> Reviewed-by: Matt Helsley <matthltc@us.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Ingo Molnar <mingo@elte.hu> Cc: Dave Young <hidave.darkstar@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Ben Blum <bblum@google.com> 2009-09-23 18:56:31 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-09-24 10:20:58 -0400
commit: be367d09927023d081f9199665c8500f69f14d22 (patch)
tree: f0c5b9da037506da3c5890cf11b51b39a7d3c427 /kernel
parent: c378369d8b4fa516ff2b1e79c3eded4e0e955ebb (diff)
5 files changed, 114 insertions, 22 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index bf8dd1a9f2d1..7ccba4bc5e3b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1552,7 +1552,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
        for_each_subsys(root, ss) {
                if (ss->can_attach) {
-                        retval = ss->can_attach(ss, cgrp, tsk);
+                        retval = ss->can_attach(ss, cgrp, tsk, false);
                        if (retval)
                                return retval;
                }
@@ -1590,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
        for_each_subsys(root, ss) {
                if (ss->attach)
-                        ss->attach(ss, cgrp, oldcgrp, tsk);
+                        ss->attach(ss, cgrp, oldcgrp, tsk, false);
        }
        set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
        synchronize_rcu();
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index fb249e2bcada..59e9ef6aab40 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -159,7 +159,7 @@ static bool is_task_frozen_enough(struct task_struct *task)
 */
 static int freezer_can_attach(struct cgroup_subsys *ss,
                              struct cgroup *new_cgroup,
-                              struct task_struct *task)
+                              struct task_struct *task, bool threadgroup)
 {
        struct freezer *freezer;
@@ -177,6 +177,19 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
        if (freezer->state == CGROUP_FROZEN)
                return -EBUSY;
+        if (threadgroup) {
+                struct task_struct *c;
+                rcu_read_lock();
+                list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+                        if (is_task_frozen_enough(c)) {
+                                rcu_read_unlock();
+                                return -EBUSY;
+                        }
+                }
+                rcu_read_unlock();
+        }
        return 0;
 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7e75a41bd508..b5cb469d2545 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1324,9 +1324,10 @@ static int fmeter_getrate(struct fmeter *fmp)
 static cpumask_var_t cpus_attach;
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys *ss,
+static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                             struct cgroup *cont, struct task_struct *tsk)
+                             struct task_struct *tsk, bool threadgroup)
 {
+        int ret;
        struct cpuset *cs = cgroup_cs(cont);
        if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1343,18 +1344,51 @@ static int cpuset_can_attach(struct cgroup_subsys *ss,
        if (tsk->flags & PF_THREAD_BOUND)
                return -EINVAL;
-        return security_task_setscheduler(tsk, 0, NULL);
+        ret = security_task_setscheduler(tsk, 0, NULL);
+        if (ret)
+                return ret;
+        if (threadgroup) {
+                struct task_struct *c;
+                rcu_read_lock();
+                list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                        ret = security_task_setscheduler(c, 0, NULL);
+                        if (ret) {
+                                rcu_read_unlock();
+                                return ret;
+                        }
+                }
+                rcu_read_unlock();
+        }
+        return 0;
+}
+static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
+                               struct cpuset *cs)
+{
+        int err;
+        /*
+         * can_attach beforehand should guarantee that this doesn't fail.
+         * TODO: have a better way to handle failure here
+         */
+        err = set_cpus_allowed_ptr(tsk, cpus_attach);
+        WARN_ON_ONCE(err);
+        task_lock(tsk);
+        cpuset_change_task_nodemask(tsk, to);
+        task_unlock(tsk);
+        cpuset_update_task_spread_flag(cs, tsk);
 }
-static void cpuset_attach(struct cgroup_subsys *ss,
+static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                          struct cgroup *cont, struct cgroup *oldcont,
+                          struct cgroup *oldcont, struct task_struct *tsk,
-                          struct task_struct *tsk)
+                          bool threadgroup)
 {
        nodemask_t from, to;
        struct mm_struct *mm;
        struct cpuset *cs = cgroup_cs(cont);
        struct cpuset *oldcs = cgroup_cs(oldcont);
-        int err;
        if (cs == &top_cpuset) {
                cpumask_copy(cpus_attach, cpu_possible_mask);
@@ -1363,15 +1397,19 @@ static void cpuset_attach(struct cgroup_subsys *ss,
                guarantee_online_cpus(cs, cpus_attach);
                guarantee_online_mems(cs, &to);
        }
-        err = set_cpus_allowed_ptr(tsk, cpus_attach);
-        if (err)
-                return;
-        task_lock(tsk);
+        /* do per-task migration stuff possibly for each in the threadgroup */
-        cpuset_change_task_nodemask(tsk, &to);
+        cpuset_attach_task(tsk, &to, cs);
-        task_unlock(tsk);
+        if (threadgroup) {
-        cpuset_update_task_spread_flag(cs, tsk);
+                struct task_struct *c;
+                rcu_read_lock();
+                list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                        cpuset_attach_task(c, &to, cs);
+                }
+                rcu_read_unlock();
+        }
+        /* change mm; only needs to be done once even if threadgroup */
        from = oldcs->mems_allowed;
        to = cs->mems_allowed;
        mm = get_task_mm(tsk);
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 5aa854f9e5ae..2a5dfec8efe0 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -42,8 +42,8 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
 *       (hence either you are in the same cgroup as task, or in an
 *        ancestor cgroup thereof)
 */
-static int ns_can_attach(struct cgroup_subsys *ss,
+static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
-                struct cgroup *new_cgroup, struct task_struct *task)
+                         struct task_struct *task, bool threadgroup)
 {
        if (current != task) {
                if (!capable(CAP_SYS_ADMIN))
@@ -56,6 +56,18 @@ static int ns_can_attach(struct cgroup_subsys *ss,
        if (!cgroup_is_descendant(new_cgroup, task))
                return -EPERM;
+        if (threadgroup) {
+                struct task_struct *c;
+                rcu_read_lock();
+                list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+                        if (!cgroup_is_descendant(new_cgroup, c)) {
+                                rcu_read_unlock();
+                                return -EPERM;
+                        }
+                }
+                rcu_read_unlock();
+        }
        return 0;
 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 2f76e06bea58..0d0361b9dbb3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -10377,8 +10377,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 }
 static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
-                      struct task_struct *tsk)
 {
 #ifdef CONFIG_RT_GROUP_SCHED
        if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
@@ -10388,15 +10387,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
        if (tsk->sched_class != &fair_sched_class)
                return -EINVAL;
 #endif
+        return 0;
+}
+static int
+cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+                      struct task_struct *tsk, bool threadgroup)
+{
+        int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
+        if (retval)
+                return retval;
+        if (threadgroup) {
+                struct task_struct *c;
+                rcu_read_lock();
+                list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                        retval = cpu_cgroup_can_attach_task(cgrp, c);
+                        if (retval) {
+                                rcu_read_unlock();
+                                return retval;
+                        }
+                }
+                rcu_read_unlock();
+        }
        return 0;
 }
 static void
 cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                        struct cgroup *old_cont, struct task_struct *tsk)
+                  struct cgroup *old_cont, struct task_struct *tsk,
+                  bool threadgroup)
 {
        sched_move_task(tsk);
+        if (threadgroup) {
+                struct task_struct *c;
+                rcu_read_lock();
+                list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                        sched_move_task(c);
+                }
+                rcu_read_unlock();
+        }
 }
 #ifdef CONFIG_FAIR_GROUP_SCHED
author	Ben Blum <bblum@google.com>	2009-09-23 18:56:31 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-09-24 10:20:58 -0400
commit	be367d09927023d081f9199665c8500f69f14d22 (patch)
tree	f0c5b9da037506da3c5890cf11b51b39a7d3c427 /kernel
parent	c378369d8b4fa516ff2b1e79c3eded4e0e955ebb (diff)