diff options
author | Ben Blum <bblum@andrew.cmu.edu> | 2011-05-26 19:25:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 20:12:34 -0400 |
commit | f780bdb7c1c73009cb57adcf99ef50027d80bf3c (patch) | |
tree | d15668ffcc40a2aaa31723b87cfda0b166f84d57 /kernel | |
parent | 4714d1d32d97239fb5ae3e10521d3f133a899b66 (diff) |
cgroups: add per-thread subsystem callbacks
Add cgroup subsystem callbacks for per-thread attachment in atomic contexts
Add can_attach_task(), pre_attach(), and attach_task() as new callbacks
for cgroups's subsystem interface. Unlike can_attach and attach, these
are for per-thread operations, to be called potentially many times when
attaching an entire threadgroup.
Also, the old "bool threadgroup" interface is removed, as replaced by
this. All subsystems are modified for the new interface - of note is
cpuset, which requires from/to nodemasks for attach to be globally scoped
(though per-cpuset would work too) to persist from its pre_attach to
attach_task and attach.
This is a pre-patch for cgroup-procs-writable.patch.
Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 17 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 26 | ||||
-rw-r--r-- | kernel/cpuset.c | 96 | ||||
-rw-r--r-- | kernel/sched.c | 38 |
4 files changed, 73 insertions, 104 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 909a35510af5..38fb0ad1cb46 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1759,7 +1759,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1759 | 1759 | ||
1760 | for_each_subsys(root, ss) { | 1760 | for_each_subsys(root, ss) { |
1761 | if (ss->can_attach) { | 1761 | if (ss->can_attach) { |
1762 | retval = ss->can_attach(ss, cgrp, tsk, false); | 1762 | retval = ss->can_attach(ss, cgrp, tsk); |
1763 | if (retval) { | 1763 | if (retval) { |
1764 | /* | 1764 | /* |
1765 | * Remember on which subsystem the can_attach() | 1765 | * Remember on which subsystem the can_attach() |
@@ -1771,6 +1771,13 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1771 | goto out; | 1771 | goto out; |
1772 | } | 1772 | } |
1773 | } | 1773 | } |
1774 | if (ss->can_attach_task) { | ||
1775 | retval = ss->can_attach_task(cgrp, tsk); | ||
1776 | if (retval) { | ||
1777 | failed_ss = ss; | ||
1778 | goto out; | ||
1779 | } | ||
1780 | } | ||
1774 | } | 1781 | } |
1775 | 1782 | ||
1776 | task_lock(tsk); | 1783 | task_lock(tsk); |
@@ -1805,8 +1812,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1805 | write_unlock(&css_set_lock); | 1812 | write_unlock(&css_set_lock); |
1806 | 1813 | ||
1807 | for_each_subsys(root, ss) { | 1814 | for_each_subsys(root, ss) { |
1815 | if (ss->pre_attach) | ||
1816 | ss->pre_attach(cgrp); | ||
1817 | if (ss->attach_task) | ||
1818 | ss->attach_task(cgrp, tsk); | ||
1808 | if (ss->attach) | 1819 | if (ss->attach) |
1809 | ss->attach(ss, cgrp, oldcgrp, tsk, false); | 1820 | ss->attach(ss, cgrp, oldcgrp, tsk); |
1810 | } | 1821 | } |
1811 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1822 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); |
1812 | synchronize_rcu(); | 1823 | synchronize_rcu(); |
@@ -1829,7 +1840,7 @@ out: | |||
1829 | */ | 1840 | */ |
1830 | break; | 1841 | break; |
1831 | if (ss->cancel_attach) | 1842 | if (ss->cancel_attach) |
1832 | ss->cancel_attach(ss, cgrp, tsk, false); | 1843 | ss->cancel_attach(ss, cgrp, tsk); |
1833 | } | 1844 | } |
1834 | } | 1845 | } |
1835 | return retval; | 1846 | return retval; |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e7bebb7c6c38..e691818d7e45 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -160,7 +160,7 @@ static void freezer_destroy(struct cgroup_subsys *ss, | |||
160 | */ | 160 | */ |
161 | static int freezer_can_attach(struct cgroup_subsys *ss, | 161 | static int freezer_can_attach(struct cgroup_subsys *ss, |
162 | struct cgroup *new_cgroup, | 162 | struct cgroup *new_cgroup, |
163 | struct task_struct *task, bool threadgroup) | 163 | struct task_struct *task) |
164 | { | 164 | { |
165 | struct freezer *freezer; | 165 | struct freezer *freezer; |
166 | 166 | ||
@@ -172,26 +172,17 @@ static int freezer_can_attach(struct cgroup_subsys *ss, | |||
172 | if (freezer->state != CGROUP_THAWED) | 172 | if (freezer->state != CGROUP_THAWED) |
173 | return -EBUSY; | 173 | return -EBUSY; |
174 | 174 | ||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | ||
179 | { | ||
175 | rcu_read_lock(); | 180 | rcu_read_lock(); |
176 | if (__cgroup_freezing_or_frozen(task)) { | 181 | if (__cgroup_freezing_or_frozen(tsk)) { |
177 | rcu_read_unlock(); | 182 | rcu_read_unlock(); |
178 | return -EBUSY; | 183 | return -EBUSY; |
179 | } | 184 | } |
180 | rcu_read_unlock(); | 185 | rcu_read_unlock(); |
181 | |||
182 | if (threadgroup) { | ||
183 | struct task_struct *c; | ||
184 | |||
185 | rcu_read_lock(); | ||
186 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | ||
187 | if (__cgroup_freezing_or_frozen(c)) { | ||
188 | rcu_read_unlock(); | ||
189 | return -EBUSY; | ||
190 | } | ||
191 | } | ||
192 | rcu_read_unlock(); | ||
193 | } | ||
194 | |||
195 | return 0; | 186 | return 0; |
196 | } | 187 | } |
197 | 188 | ||
@@ -390,6 +381,9 @@ struct cgroup_subsys freezer_subsys = { | |||
390 | .populate = freezer_populate, | 381 | .populate = freezer_populate, |
391 | .subsys_id = freezer_subsys_id, | 382 | .subsys_id = freezer_subsys_id, |
392 | .can_attach = freezer_can_attach, | 383 | .can_attach = freezer_can_attach, |
384 | .can_attach_task = freezer_can_attach_task, | ||
385 | .pre_attach = NULL, | ||
386 | .attach_task = NULL, | ||
393 | .attach = NULL, | 387 | .attach = NULL, |
394 | .fork = freezer_fork, | 388 | .fork = freezer_fork, |
395 | .exit = NULL, | 389 | .exit = NULL, |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2bb8c2e98fff..55b297d78adc 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1367,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1367 | return val; | 1367 | return val; |
1368 | } | 1368 | } |
1369 | 1369 | ||
1370 | /* Protected by cgroup_lock */ | ||
1371 | static cpumask_var_t cpus_attach; | ||
1372 | |||
1373 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | 1370 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
1374 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 1371 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
1375 | struct task_struct *tsk, bool threadgroup) | 1372 | struct task_struct *tsk) |
1376 | { | 1373 | { |
1377 | int ret; | ||
1378 | struct cpuset *cs = cgroup_cs(cont); | 1374 | struct cpuset *cs = cgroup_cs(cont); |
1379 | 1375 | ||
1380 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1376 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
@@ -1391,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
1391 | if (tsk->flags & PF_THREAD_BOUND) | 1387 | if (tsk->flags & PF_THREAD_BOUND) |
1392 | return -EINVAL; | 1388 | return -EINVAL; |
1393 | 1389 | ||
1394 | ret = security_task_setscheduler(tsk); | ||
1395 | if (ret) | ||
1396 | return ret; | ||
1397 | if (threadgroup) { | ||
1398 | struct task_struct *c; | ||
1399 | |||
1400 | rcu_read_lock(); | ||
1401 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
1402 | ret = security_task_setscheduler(c); | ||
1403 | if (ret) { | ||
1404 | rcu_read_unlock(); | ||
1405 | return ret; | ||
1406 | } | ||
1407 | } | ||
1408 | rcu_read_unlock(); | ||
1409 | } | ||
1410 | return 0; | 1390 | return 0; |
1411 | } | 1391 | } |
1412 | 1392 | ||
1413 | static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | 1393 | static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task) |
1414 | struct cpuset *cs) | 1394 | { |
1395 | return security_task_setscheduler(task); | ||
1396 | } | ||
1397 | |||
1398 | /* | ||
1399 | * Protected by cgroup_lock. The nodemasks must be stored globally because | ||
1400 | * dynamically allocating them is not allowed in pre_attach, and they must | ||
1401 | * persist among pre_attach, attach_task, and attach. | ||
1402 | */ | ||
1403 | static cpumask_var_t cpus_attach; | ||
1404 | static nodemask_t cpuset_attach_nodemask_from; | ||
1405 | static nodemask_t cpuset_attach_nodemask_to; | ||
1406 | |||
1407 | /* Set-up work for before attaching each task. */ | ||
1408 | static void cpuset_pre_attach(struct cgroup *cont) | ||
1409 | { | ||
1410 | struct cpuset *cs = cgroup_cs(cont); | ||
1411 | |||
1412 | if (cs == &top_cpuset) | ||
1413 | cpumask_copy(cpus_attach, cpu_possible_mask); | ||
1414 | else | ||
1415 | guarantee_online_cpus(cs, cpus_attach); | ||
1416 | |||
1417 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); | ||
1418 | } | ||
1419 | |||
1420 | /* Per-thread attachment work. */ | ||
1421 | static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk) | ||
1415 | { | 1422 | { |
1416 | int err; | 1423 | int err; |
1424 | struct cpuset *cs = cgroup_cs(cont); | ||
1425 | |||
1417 | /* | 1426 | /* |
1418 | * can_attach beforehand should guarantee that this doesn't fail. | 1427 | * can_attach beforehand should guarantee that this doesn't fail. |
1419 | * TODO: have a better way to handle failure here | 1428 | * TODO: have a better way to handle failure here |
@@ -1421,45 +1430,29 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | |||
1421 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | 1430 | err = set_cpus_allowed_ptr(tsk, cpus_attach); |
1422 | WARN_ON_ONCE(err); | 1431 | WARN_ON_ONCE(err); |
1423 | 1432 | ||
1424 | cpuset_change_task_nodemask(tsk, to); | 1433 | cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to); |
1425 | cpuset_update_task_spread_flag(cs, tsk); | 1434 | cpuset_update_task_spread_flag(cs, tsk); |
1426 | |||
1427 | } | 1435 | } |
1428 | 1436 | ||
1429 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 1437 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
1430 | struct cgroup *oldcont, struct task_struct *tsk, | 1438 | struct cgroup *oldcont, struct task_struct *tsk) |
1431 | bool threadgroup) | ||
1432 | { | 1439 | { |
1433 | struct mm_struct *mm; | 1440 | struct mm_struct *mm; |
1434 | struct cpuset *cs = cgroup_cs(cont); | 1441 | struct cpuset *cs = cgroup_cs(cont); |
1435 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1442 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1436 | static nodemask_t to; /* protected by cgroup_mutex */ | ||
1437 | 1443 | ||
1438 | if (cs == &top_cpuset) { | 1444 | /* |
1439 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1445 | * Change mm, possibly for multiple threads in a threadgroup. This is |
1440 | } else { | 1446 | * expensive and may sleep. |
1441 | guarantee_online_cpus(cs, cpus_attach); | 1447 | */ |
1442 | } | 1448 | cpuset_attach_nodemask_from = oldcs->mems_allowed; |
1443 | guarantee_online_mems(cs, &to); | 1449 | cpuset_attach_nodemask_to = cs->mems_allowed; |
1444 | |||
1445 | /* do per-task migration stuff possibly for each in the threadgroup */ | ||
1446 | cpuset_attach_task(tsk, &to, cs); | ||
1447 | if (threadgroup) { | ||
1448 | struct task_struct *c; | ||
1449 | rcu_read_lock(); | ||
1450 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
1451 | cpuset_attach_task(c, &to, cs); | ||
1452 | } | ||
1453 | rcu_read_unlock(); | ||
1454 | } | ||
1455 | |||
1456 | /* change mm; only needs to be done once even if threadgroup */ | ||
1457 | to = cs->mems_allowed; | ||
1458 | mm = get_task_mm(tsk); | 1450 | mm = get_task_mm(tsk); |
1459 | if (mm) { | 1451 | if (mm) { |
1460 | mpol_rebind_mm(mm, &to); | 1452 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); |
1461 | if (is_memory_migrate(cs)) | 1453 | if (is_memory_migrate(cs)) |
1462 | cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to); | 1454 | cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from, |
1455 | &cpuset_attach_nodemask_to); | ||
1463 | mmput(mm); | 1456 | mmput(mm); |
1464 | } | 1457 | } |
1465 | } | 1458 | } |
@@ -1911,6 +1904,9 @@ struct cgroup_subsys cpuset_subsys = { | |||
1911 | .create = cpuset_create, | 1904 | .create = cpuset_create, |
1912 | .destroy = cpuset_destroy, | 1905 | .destroy = cpuset_destroy, |
1913 | .can_attach = cpuset_can_attach, | 1906 | .can_attach = cpuset_can_attach, |
1907 | .can_attach_task = cpuset_can_attach_task, | ||
1908 | .pre_attach = cpuset_pre_attach, | ||
1909 | .attach_task = cpuset_attach_task, | ||
1914 | .attach = cpuset_attach, | 1910 | .attach = cpuset_attach, |
1915 | .populate = cpuset_populate, | 1911 | .populate = cpuset_populate, |
1916 | .post_clone = cpuset_post_clone, | 1912 | .post_clone = cpuset_post_clone, |
diff --git a/kernel/sched.c b/kernel/sched.c index 2d12893b8b0f..5e43e9dc65d1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -8764,42 +8764,10 @@ cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
8764 | return 0; | 8764 | return 0; |
8765 | } | 8765 | } |
8766 | 8766 | ||
8767 | static int | ||
8768 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | ||
8769 | struct task_struct *tsk, bool threadgroup) | ||
8770 | { | ||
8771 | int retval = cpu_cgroup_can_attach_task(cgrp, tsk); | ||
8772 | if (retval) | ||
8773 | return retval; | ||
8774 | if (threadgroup) { | ||
8775 | struct task_struct *c; | ||
8776 | rcu_read_lock(); | ||
8777 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
8778 | retval = cpu_cgroup_can_attach_task(cgrp, c); | ||
8779 | if (retval) { | ||
8780 | rcu_read_unlock(); | ||
8781 | return retval; | ||
8782 | } | ||
8783 | } | ||
8784 | rcu_read_unlock(); | ||
8785 | } | ||
8786 | return 0; | ||
8787 | } | ||
8788 | |||
8789 | static void | 8767 | static void |
8790 | cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 8768 | cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
8791 | struct cgroup *old_cont, struct task_struct *tsk, | ||
8792 | bool threadgroup) | ||
8793 | { | 8769 | { |
8794 | sched_move_task(tsk); | 8770 | sched_move_task(tsk); |
8795 | if (threadgroup) { | ||
8796 | struct task_struct *c; | ||
8797 | rcu_read_lock(); | ||
8798 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
8799 | sched_move_task(c); | ||
8800 | } | ||
8801 | rcu_read_unlock(); | ||
8802 | } | ||
8803 | } | 8771 | } |
8804 | 8772 | ||
8805 | static void | 8773 | static void |
@@ -8887,8 +8855,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
8887 | .name = "cpu", | 8855 | .name = "cpu", |
8888 | .create = cpu_cgroup_create, | 8856 | .create = cpu_cgroup_create, |
8889 | .destroy = cpu_cgroup_destroy, | 8857 | .destroy = cpu_cgroup_destroy, |
8890 | .can_attach = cpu_cgroup_can_attach, | 8858 | .can_attach_task = cpu_cgroup_can_attach_task, |
8891 | .attach = cpu_cgroup_attach, | 8859 | .attach_task = cpu_cgroup_attach_task, |
8892 | .exit = cpu_cgroup_exit, | 8860 | .exit = cpu_cgroup_exit, |
8893 | .populate = cpu_cgroup_populate, | 8861 | .populate = cpu_cgroup_populate, |
8894 | .subsys_id = cpu_cgroup_subsys_id, | 8862 | .subsys_id = cpu_cgroup_subsys_id, |