cgroups: add per-thread subsystem callbacks

Add cgroup subsystem callbacks for per-thread attachment in atomic contexts Add can_attach_task(), pre_attach(), and attach_task() as new callbacks for cgroups's subsystem interface. Unlike can_attach and attach, these are for per-thread operations, to be called potentially many times when attaching an entire threadgroup. Also, the old "bool threadgroup" interface is removed, as replaced by this. All subsystems are modified for the new interface - of note is cpuset, which requires from/to nodemasks for attach to be globally scoped (though per-cpuset would work too) to persist from its pre_attach to attach_task and attach. This is a pre-patch for cgroup-procs-writable.patch. Signed-off-by: Ben Blum <bblum@andrew.cmu.edu> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Matt Helsley <matthltc@us.ibm.com> Reviewed-by: Paul Menage <menage@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Ben Blum <bblum@andrew.cmu.edu> 2011-05-26 19:25:19 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-05-26 20:12:34 -0400
commit: f780bdb7c1c73009cb57adcf99ef50027d80bf3c (patch)
tree: d15668ffcc40a2aaa31723b87cfda0b166f84d57 /kernel
parent: 4714d1d32d97239fb5ae3e10521d3f133a899b66 (diff)
4 files changed, 73 insertions, 104 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 909a35510af5..38fb0ad1cb46 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1759,7 +1759,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
        for_each_subsys(root, ss) {
                if (ss->can_attach) {
-                        retval = ss->can_attach(ss, cgrp, tsk, false);
+                        retval = ss->can_attach(ss, cgrp, tsk);
                        if (retval) {
                                /*
                                 * Remember on which subsystem the can_attach()
@@ -1771,6 +1771,13 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
                                goto out;
                        }
                }
+                if (ss->can_attach_task) {
+                        retval = ss->can_attach_task(cgrp, tsk);
+                        if (retval) {
+                                failed_ss = ss;
+                                goto out;
+                        }
+                }
        }
        task_lock(tsk);
@@ -1805,8 +1812,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
        write_unlock(&css_set_lock);
        for_each_subsys(root, ss) {
+                if (ss->pre_attach)
+                        ss->pre_attach(cgrp);
+                if (ss->attach_task)
+                        ss->attach_task(cgrp, tsk);
                if (ss->attach)
-                        ss->attach(ss, cgrp, oldcgrp, tsk, false);
+                        ss->attach(ss, cgrp, oldcgrp, tsk);
        }
        set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
        synchronize_rcu();
@@ -1829,7 +1840,7 @@ out:
                                 */
                                break;
                        if (ss->cancel_attach)
-                                ss->cancel_attach(ss, cgrp, tsk, false);
+                                ss->cancel_attach(ss, cgrp, tsk);
                }
        }
        return retval;
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e7bebb7c6c38..e691818d7e45 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -160,7 +160,7 @@ static void freezer_destroy(struct cgroup_subsys *ss,
 */
 static int freezer_can_attach(struct cgroup_subsys *ss,
                              struct cgroup *new_cgroup,
-                              struct task_struct *task, bool threadgroup)
+                              struct task_struct *task)
 {
        struct freezer *freezer;
@@ -172,26 +172,17 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
        if (freezer->state != CGROUP_THAWED)
                return -EBUSY;
+        return 0;
+}
+static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+{
        rcu_read_lock();
-        if (__cgroup_freezing_or_frozen(task)) {
+        if (__cgroup_freezing_or_frozen(tsk)) {
                rcu_read_unlock();
                return -EBUSY;
        }
        rcu_read_unlock();
-        if (threadgroup) {
-                struct task_struct *c;
-                rcu_read_lock();
-                list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
-                        if (__cgroup_freezing_or_frozen(c)) {
-                                rcu_read_unlock();
-                                return -EBUSY;
-                        }
-                }
-                rcu_read_unlock();
-        }
        return 0;
 }
@@ -390,6 +381,9 @@ struct cgroup_subsys freezer_subsys = {
        .populate       = freezer_populate,
        .subsys_id      = freezer_subsys_id,
        .can_attach     = freezer_can_attach,
+        .can_attach_task = freezer_can_attach_task,
+        .pre_attach     = NULL,
+        .attach_task    = NULL,
        .attach         = NULL,
        .fork           = freezer_fork,
        .exit           = NULL,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2bb8c2e98fff..55b297d78adc 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1367,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp)
        return val;
 }
-/* Protected by cgroup_lock */
-static cpumask_var_t cpus_attach;
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
 static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                             struct task_struct *tsk, bool threadgroup)
+                             struct task_struct *tsk)
 {
-        int ret;
        struct cpuset *cs = cgroup_cs(cont);
        if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1391,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
        if (tsk->flags & PF_THREAD_BOUND)
                return -EINVAL;
-        ret = security_task_setscheduler(tsk);
-        if (ret)
-                return ret;
-        if (threadgroup) {
-                struct task_struct *c;
-                rcu_read_lock();
-                list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                        ret = security_task_setscheduler(c);
-                        if (ret) {
-                                rcu_read_unlock();
-                                return ret;
-                        }
-                }
-                rcu_read_unlock();
-        }
        return 0;
 }
-static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
+static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
-                               struct cpuset *cs)
+{
+        return security_task_setscheduler(task);
+}
+/*
+ * Protected by cgroup_lock. The nodemasks must be stored globally because
+ * dynamically allocating them is not allowed in pre_attach, and they must
+ * persist among pre_attach, attach_task, and attach.
+ */
+static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_from;
+static nodemask_t cpuset_attach_nodemask_to;
+/* Set-up work for before attaching each task. */
+static void cpuset_pre_attach(struct cgroup *cont)
+{
+        struct cpuset *cs = cgroup_cs(cont);
+        if (cs == &top_cpuset)
+                cpumask_copy(cpus_attach, cpu_possible_mask);
+        else
+                guarantee_online_cpus(cs, cpus_attach);
+        guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+}
+/* Per-thread attachment work. */
+static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
 {
        int err;
+        struct cpuset *cs = cgroup_cs(cont);
        /*
         * can_attach beforehand should guarantee that this doesn't fail.
         * TODO: have a better way to handle failure here
@@ -1421,45 +1430,29 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
        err = set_cpus_allowed_ptr(tsk, cpus_attach);
        WARN_ON_ONCE(err);
-        cpuset_change_task_nodemask(tsk, to);
+        cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
        cpuset_update_task_spread_flag(cs, tsk);
 }
 static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                          struct cgroup *oldcont, struct task_struct *tsk,
+                          struct cgroup *oldcont, struct task_struct *tsk)
-                          bool threadgroup)
 {
        struct mm_struct *mm;
        struct cpuset *cs = cgroup_cs(cont);
        struct cpuset *oldcs = cgroup_cs(oldcont);
-        static nodemask_t to;           /* protected by cgroup_mutex */
-        if (cs == &top_cpuset) {
+        /*
-                cpumask_copy(cpus_attach, cpu_possible_mask);
+         * Change mm, possibly for multiple threads in a threadgroup. This is
-        } else {
+         * expensive and may sleep.
-                guarantee_online_cpus(cs, cpus_attach);
+         */
-        }
+        cpuset_attach_nodemask_from = oldcs->mems_allowed;
-        guarantee_online_mems(cs, &to);
+        cpuset_attach_nodemask_to = cs->mems_allowed;
-        /* do per-task migration stuff possibly for each in the threadgroup */
-        cpuset_attach_task(tsk, &to, cs);
-        if (threadgroup) {
-                struct task_struct *c;
-                rcu_read_lock();
-                list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                        cpuset_attach_task(c, &to, cs);
-                }
-                rcu_read_unlock();
-        }
-        /* change mm; only needs to be done once even if threadgroup */
-        to = cs->mems_allowed;
        mm = get_task_mm(tsk);
        if (mm) {
-                mpol_rebind_mm(mm, &to);
+                mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
                if (is_memory_migrate(cs))
-                        cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
+                        cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
+                                          &cpuset_attach_nodemask_to);
                mmput(mm);
        }
 }
@@ -1911,6 +1904,9 @@ struct cgroup_subsys cpuset_subsys = {
        .create = cpuset_create,
        .destroy = cpuset_destroy,
        .can_attach = cpuset_can_attach,
+        .can_attach_task = cpuset_can_attach_task,
+        .pre_attach = cpuset_pre_attach,
+        .attach_task = cpuset_attach_task,
        .attach = cpuset_attach,
        .populate = cpuset_populate,
        .post_clone = cpuset_post_clone,
diff --git a/kernel/sched.c b/kernel/sched.c
index 2d12893b8b0f..5e43e9dc65d1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8764,42 +8764,10 @@ cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
        return 0;
 }
-static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                      struct task_struct *tsk, bool threadgroup)
-{
-        int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
-        if (retval)
-                return retval;
-        if (threadgroup) {
-                struct task_struct *c;
-                rcu_read_lock();
-                list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                        retval = cpu_cgroup_can_attach_task(cgrp, c);
-                        if (retval) {
-                                rcu_read_unlock();
-                                return retval;
-                        }
-                }
-                rcu_read_unlock();
-        }
-        return 0;
-}
 static void
-cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
-                  struct cgroup *old_cont, struct task_struct *tsk,
-                  bool threadgroup)
 {
        sched_move_task(tsk);
-        if (threadgroup) {
-                struct task_struct *c;
-                rcu_read_lock();
-                list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                        sched_move_task(c);
-                }
-                rcu_read_unlock();
-        }
 }
 static void
@@ -8887,8 +8855,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
        .name           = "cpu",
        .create         = cpu_cgroup_create,
        .destroy        = cpu_cgroup_destroy,
-        .can_attach     = cpu_cgroup_can_attach,
+        .can_attach_task = cpu_cgroup_can_attach_task,
-        .attach         = cpu_cgroup_attach,
+        .attach_task    = cpu_cgroup_attach_task,
        .exit           = cpu_cgroup_exit,
        .populate       = cpu_cgroup_populate,
        .subsys_id      = cpu_cgroup_subsys_id,
author	Ben Blum <bblum@andrew.cmu.edu>	2011-05-26 19:25:19 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-05-26 20:12:34 -0400
commit	f780bdb7c1c73009cb57adcf99ef50027d80bf3c (patch)
tree	d15668ffcc40a2aaa31723b87cfda0b166f84d57 /kernel
parent	4714d1d32d97239fb5ae3e10521d3f133a899b66 (diff)