aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorBen Blum <bblum@andrew.cmu.edu>2011-05-26 19:25:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-26 20:12:34 -0400
commitf780bdb7c1c73009cb57adcf99ef50027d80bf3c (patch)
treed15668ffcc40a2aaa31723b87cfda0b166f84d57 /kernel
parent4714d1d32d97239fb5ae3e10521d3f133a899b66 (diff)
cgroups: add per-thread subsystem callbacks
Add cgroup subsystem callbacks for per-thread attachment in atomic contexts Add can_attach_task(), pre_attach(), and attach_task() as new callbacks for cgroups's subsystem interface. Unlike can_attach and attach, these are for per-thread operations, to be called potentially many times when attaching an entire threadgroup. Also, the old "bool threadgroup" interface is removed, as replaced by this. All subsystems are modified for the new interface - of note is cpuset, which requires from/to nodemasks for attach to be globally scoped (though per-cpuset would work too) to persist from its pre_attach to attach_task and attach. This is a pre-patch for cgroup-procs-writable.patch. Signed-off-by: Ben Blum <bblum@andrew.cmu.edu> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Matt Helsley <matthltc@us.ibm.com> Reviewed-by: Paul Menage <menage@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c17
-rw-r--r--kernel/cgroup_freezer.c26
-rw-r--r--kernel/cpuset.c96
-rw-r--r--kernel/sched.c38
4 files changed, 73 insertions, 104 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 909a35510af5..38fb0ad1cb46 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1759,7 +1759,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1759 1759
1760 for_each_subsys(root, ss) { 1760 for_each_subsys(root, ss) {
1761 if (ss->can_attach) { 1761 if (ss->can_attach) {
1762 retval = ss->can_attach(ss, cgrp, tsk, false); 1762 retval = ss->can_attach(ss, cgrp, tsk);
1763 if (retval) { 1763 if (retval) {
1764 /* 1764 /*
1765 * Remember on which subsystem the can_attach() 1765 * Remember on which subsystem the can_attach()
@@ -1771,6 +1771,13 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1771 goto out; 1771 goto out;
1772 } 1772 }
1773 } 1773 }
1774 if (ss->can_attach_task) {
1775 retval = ss->can_attach_task(cgrp, tsk);
1776 if (retval) {
1777 failed_ss = ss;
1778 goto out;
1779 }
1780 }
1774 } 1781 }
1775 1782
1776 task_lock(tsk); 1783 task_lock(tsk);
@@ -1805,8 +1812,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1805 write_unlock(&css_set_lock); 1812 write_unlock(&css_set_lock);
1806 1813
1807 for_each_subsys(root, ss) { 1814 for_each_subsys(root, ss) {
1815 if (ss->pre_attach)
1816 ss->pre_attach(cgrp);
1817 if (ss->attach_task)
1818 ss->attach_task(cgrp, tsk);
1808 if (ss->attach) 1819 if (ss->attach)
1809 ss->attach(ss, cgrp, oldcgrp, tsk, false); 1820 ss->attach(ss, cgrp, oldcgrp, tsk);
1810 } 1821 }
1811 set_bit(CGRP_RELEASABLE, &oldcgrp->flags); 1822 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1812 synchronize_rcu(); 1823 synchronize_rcu();
@@ -1829,7 +1840,7 @@ out:
1829 */ 1840 */
1830 break; 1841 break;
1831 if (ss->cancel_attach) 1842 if (ss->cancel_attach)
1832 ss->cancel_attach(ss, cgrp, tsk, false); 1843 ss->cancel_attach(ss, cgrp, tsk);
1833 } 1844 }
1834 } 1845 }
1835 return retval; 1846 return retval;
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e7bebb7c6c38..e691818d7e45 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -160,7 +160,7 @@ static void freezer_destroy(struct cgroup_subsys *ss,
160 */ 160 */
161static int freezer_can_attach(struct cgroup_subsys *ss, 161static int freezer_can_attach(struct cgroup_subsys *ss,
162 struct cgroup *new_cgroup, 162 struct cgroup *new_cgroup,
163 struct task_struct *task, bool threadgroup) 163 struct task_struct *task)
164{ 164{
165 struct freezer *freezer; 165 struct freezer *freezer;
166 166
@@ -172,26 +172,17 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
172 if (freezer->state != CGROUP_THAWED) 172 if (freezer->state != CGROUP_THAWED)
173 return -EBUSY; 173 return -EBUSY;
174 174
175 return 0;
176}
177
178static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
179{
175 rcu_read_lock(); 180 rcu_read_lock();
176 if (__cgroup_freezing_or_frozen(task)) { 181 if (__cgroup_freezing_or_frozen(tsk)) {
177 rcu_read_unlock(); 182 rcu_read_unlock();
178 return -EBUSY; 183 return -EBUSY;
179 } 184 }
180 rcu_read_unlock(); 185 rcu_read_unlock();
181
182 if (threadgroup) {
183 struct task_struct *c;
184
185 rcu_read_lock();
186 list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
187 if (__cgroup_freezing_or_frozen(c)) {
188 rcu_read_unlock();
189 return -EBUSY;
190 }
191 }
192 rcu_read_unlock();
193 }
194
195 return 0; 186 return 0;
196} 187}
197 188
@@ -390,6 +381,9 @@ struct cgroup_subsys freezer_subsys = {
390 .populate = freezer_populate, 381 .populate = freezer_populate,
391 .subsys_id = freezer_subsys_id, 382 .subsys_id = freezer_subsys_id,
392 .can_attach = freezer_can_attach, 383 .can_attach = freezer_can_attach,
384 .can_attach_task = freezer_can_attach_task,
385 .pre_attach = NULL,
386 .attach_task = NULL,
393 .attach = NULL, 387 .attach = NULL,
394 .fork = freezer_fork, 388 .fork = freezer_fork,
395 .exit = NULL, 389 .exit = NULL,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2bb8c2e98fff..55b297d78adc 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1367,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp)
1367 return val; 1367 return val;
1368} 1368}
1369 1369
1370/* Protected by cgroup_lock */
1371static cpumask_var_t cpus_attach;
1372
1373/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ 1370/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1374static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, 1371static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1375 struct task_struct *tsk, bool threadgroup) 1372 struct task_struct *tsk)
1376{ 1373{
1377 int ret;
1378 struct cpuset *cs = cgroup_cs(cont); 1374 struct cpuset *cs = cgroup_cs(cont);
1379 1375
1380 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 1376 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1391,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1391 if (tsk->flags & PF_THREAD_BOUND) 1387 if (tsk->flags & PF_THREAD_BOUND)
1392 return -EINVAL; 1388 return -EINVAL;
1393 1389
1394 ret = security_task_setscheduler(tsk);
1395 if (ret)
1396 return ret;
1397 if (threadgroup) {
1398 struct task_struct *c;
1399
1400 rcu_read_lock();
1401 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1402 ret = security_task_setscheduler(c);
1403 if (ret) {
1404 rcu_read_unlock();
1405 return ret;
1406 }
1407 }
1408 rcu_read_unlock();
1409 }
1410 return 0; 1390 return 0;
1411} 1391}
1412 1392
1413static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, 1393static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
1414 struct cpuset *cs) 1394{
1395 return security_task_setscheduler(task);
1396}
1397
1398/*
1399 * Protected by cgroup_lock. The nodemasks must be stored globally because
1400 * dynamically allocating them is not allowed in pre_attach, and they must
1401 * persist among pre_attach, attach_task, and attach.
1402 */
1403static cpumask_var_t cpus_attach;
1404static nodemask_t cpuset_attach_nodemask_from;
1405static nodemask_t cpuset_attach_nodemask_to;
1406
1407/* Set-up work for before attaching each task. */
1408static void cpuset_pre_attach(struct cgroup *cont)
1409{
1410 struct cpuset *cs = cgroup_cs(cont);
1411
1412 if (cs == &top_cpuset)
1413 cpumask_copy(cpus_attach, cpu_possible_mask);
1414 else
1415 guarantee_online_cpus(cs, cpus_attach);
1416
1417 guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
1418}
1419
1420/* Per-thread attachment work. */
1421static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
1415{ 1422{
1416 int err; 1423 int err;
1424 struct cpuset *cs = cgroup_cs(cont);
1425
1417 /* 1426 /*
1418 * can_attach beforehand should guarantee that this doesn't fail. 1427 * can_attach beforehand should guarantee that this doesn't fail.
1419 * TODO: have a better way to handle failure here 1428 * TODO: have a better way to handle failure here
@@ -1421,45 +1430,29 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
1421 err = set_cpus_allowed_ptr(tsk, cpus_attach); 1430 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1422 WARN_ON_ONCE(err); 1431 WARN_ON_ONCE(err);
1423 1432
1424 cpuset_change_task_nodemask(tsk, to); 1433 cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
1425 cpuset_update_task_spread_flag(cs, tsk); 1434 cpuset_update_task_spread_flag(cs, tsk);
1426
1427} 1435}
1428 1436
1429static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, 1437static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1430 struct cgroup *oldcont, struct task_struct *tsk, 1438 struct cgroup *oldcont, struct task_struct *tsk)
1431 bool threadgroup)
1432{ 1439{
1433 struct mm_struct *mm; 1440 struct mm_struct *mm;
1434 struct cpuset *cs = cgroup_cs(cont); 1441 struct cpuset *cs = cgroup_cs(cont);
1435 struct cpuset *oldcs = cgroup_cs(oldcont); 1442 struct cpuset *oldcs = cgroup_cs(oldcont);
1436 static nodemask_t to; /* protected by cgroup_mutex */
1437 1443
1438 if (cs == &top_cpuset) { 1444 /*
1439 cpumask_copy(cpus_attach, cpu_possible_mask); 1445 * Change mm, possibly for multiple threads in a threadgroup. This is
1440 } else { 1446 * expensive and may sleep.
1441 guarantee_online_cpus(cs, cpus_attach); 1447 */
1442 } 1448 cpuset_attach_nodemask_from = oldcs->mems_allowed;
1443 guarantee_online_mems(cs, &to); 1449 cpuset_attach_nodemask_to = cs->mems_allowed;
1444
1445 /* do per-task migration stuff possibly for each in the threadgroup */
1446 cpuset_attach_task(tsk, &to, cs);
1447 if (threadgroup) {
1448 struct task_struct *c;
1449 rcu_read_lock();
1450 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1451 cpuset_attach_task(c, &to, cs);
1452 }
1453 rcu_read_unlock();
1454 }
1455
1456 /* change mm; only needs to be done once even if threadgroup */
1457 to = cs->mems_allowed;
1458 mm = get_task_mm(tsk); 1450 mm = get_task_mm(tsk);
1459 if (mm) { 1451 if (mm) {
1460 mpol_rebind_mm(mm, &to); 1452 mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
1461 if (is_memory_migrate(cs)) 1453 if (is_memory_migrate(cs))
1462 cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to); 1454 cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
1455 &cpuset_attach_nodemask_to);
1463 mmput(mm); 1456 mmput(mm);
1464 } 1457 }
1465} 1458}
@@ -1911,6 +1904,9 @@ struct cgroup_subsys cpuset_subsys = {
1911 .create = cpuset_create, 1904 .create = cpuset_create,
1912 .destroy = cpuset_destroy, 1905 .destroy = cpuset_destroy,
1913 .can_attach = cpuset_can_attach, 1906 .can_attach = cpuset_can_attach,
1907 .can_attach_task = cpuset_can_attach_task,
1908 .pre_attach = cpuset_pre_attach,
1909 .attach_task = cpuset_attach_task,
1914 .attach = cpuset_attach, 1910 .attach = cpuset_attach,
1915 .populate = cpuset_populate, 1911 .populate = cpuset_populate,
1916 .post_clone = cpuset_post_clone, 1912 .post_clone = cpuset_post_clone,
diff --git a/kernel/sched.c b/kernel/sched.c
index 2d12893b8b0f..5e43e9dc65d1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8764,42 +8764,10 @@ cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
8764 return 0; 8764 return 0;
8765} 8765}
8766 8766
8767static int
8768cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
8769 struct task_struct *tsk, bool threadgroup)
8770{
8771 int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
8772 if (retval)
8773 return retval;
8774 if (threadgroup) {
8775 struct task_struct *c;
8776 rcu_read_lock();
8777 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
8778 retval = cpu_cgroup_can_attach_task(cgrp, c);
8779 if (retval) {
8780 rcu_read_unlock();
8781 return retval;
8782 }
8783 }
8784 rcu_read_unlock();
8785 }
8786 return 0;
8787}
8788
8789static void 8767static void
8790cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 8768cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
8791 struct cgroup *old_cont, struct task_struct *tsk,
8792 bool threadgroup)
8793{ 8769{
8794 sched_move_task(tsk); 8770 sched_move_task(tsk);
8795 if (threadgroup) {
8796 struct task_struct *c;
8797 rcu_read_lock();
8798 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
8799 sched_move_task(c);
8800 }
8801 rcu_read_unlock();
8802 }
8803} 8771}
8804 8772
8805static void 8773static void
@@ -8887,8 +8855,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
8887 .name = "cpu", 8855 .name = "cpu",
8888 .create = cpu_cgroup_create, 8856 .create = cpu_cgroup_create,
8889 .destroy = cpu_cgroup_destroy, 8857 .destroy = cpu_cgroup_destroy,
8890 .can_attach = cpu_cgroup_can_attach, 8858 .can_attach_task = cpu_cgroup_can_attach_task,
8891 .attach = cpu_cgroup_attach, 8859 .attach_task = cpu_cgroup_attach_task,
8892 .exit = cpu_cgroup_exit, 8860 .exit = cpu_cgroup_exit,
8893 .populate = cpu_cgroup_populate, 8861 .populate = cpu_cgroup_populate,
8894 .subsys_id = cpu_cgroup_subsys_id, 8862 .subsys_id = cpu_cgroup_subsys_id,