diff options
author | Ben Blum <bblum@andrew.cmu.edu> | 2011-05-26 19:25:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 20:12:34 -0400 |
commit | f780bdb7c1c73009cb57adcf99ef50027d80bf3c (patch) | |
tree | d15668ffcc40a2aaa31723b87cfda0b166f84d57 /kernel/cpuset.c | |
parent | 4714d1d32d97239fb5ae3e10521d3f133a899b66 (diff) |
cgroups: add per-thread subsystem callbacks
Add cgroup subsystem callbacks for per-thread attachment in atomic contexts
Add can_attach_task(), pre_attach(), and attach_task() as new callbacks
for cgroups's subsystem interface. Unlike can_attach and attach, these
are for per-thread operations, to be called potentially many times when
attaching an entire threadgroup.
Also, the old "bool threadgroup" interface is removed, as replaced by
this. All subsystems are modified for the new interface - of note is
cpuset, which requires from/to nodemasks for attach to be globally scoped
(though per-cpuset would work too) to persist from its pre_attach to
attach_task and attach.
This is a pre-patch for cgroup-procs-writable.patch.
Signed-off-by: Ben Blum <bblum@andrew.cmu.edu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 96 |
1 files changed, 46 insertions, 50 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2bb8c2e98fff..55b297d78adc 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1367,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1367 | return val; | 1367 | return val; |
1368 | } | 1368 | } |
1369 | 1369 | ||
1370 | /* Protected by cgroup_lock */ | ||
1371 | static cpumask_var_t cpus_attach; | ||
1372 | |||
1373 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | 1370 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
1374 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 1371 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
1375 | struct task_struct *tsk, bool threadgroup) | 1372 | struct task_struct *tsk) |
1376 | { | 1373 | { |
1377 | int ret; | ||
1378 | struct cpuset *cs = cgroup_cs(cont); | 1374 | struct cpuset *cs = cgroup_cs(cont); |
1379 | 1375 | ||
1380 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1376 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
@@ -1391,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
1391 | if (tsk->flags & PF_THREAD_BOUND) | 1387 | if (tsk->flags & PF_THREAD_BOUND) |
1392 | return -EINVAL; | 1388 | return -EINVAL; |
1393 | 1389 | ||
1394 | ret = security_task_setscheduler(tsk); | ||
1395 | if (ret) | ||
1396 | return ret; | ||
1397 | if (threadgroup) { | ||
1398 | struct task_struct *c; | ||
1399 | |||
1400 | rcu_read_lock(); | ||
1401 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
1402 | ret = security_task_setscheduler(c); | ||
1403 | if (ret) { | ||
1404 | rcu_read_unlock(); | ||
1405 | return ret; | ||
1406 | } | ||
1407 | } | ||
1408 | rcu_read_unlock(); | ||
1409 | } | ||
1410 | return 0; | 1390 | return 0; |
1411 | } | 1391 | } |
1412 | 1392 | ||
1413 | static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | 1393 | static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task) |
1414 | struct cpuset *cs) | 1394 | { |
1395 | return security_task_setscheduler(task); | ||
1396 | } | ||
1397 | |||
1398 | /* | ||
1399 | * Protected by cgroup_lock. The nodemasks must be stored globally because | ||
1400 | * dynamically allocating them is not allowed in pre_attach, and they must | ||
1401 | * persist among pre_attach, attach_task, and attach. | ||
1402 | */ | ||
1403 | static cpumask_var_t cpus_attach; | ||
1404 | static nodemask_t cpuset_attach_nodemask_from; | ||
1405 | static nodemask_t cpuset_attach_nodemask_to; | ||
1406 | |||
1407 | /* Set-up work for before attaching each task. */ | ||
1408 | static void cpuset_pre_attach(struct cgroup *cont) | ||
1409 | { | ||
1410 | struct cpuset *cs = cgroup_cs(cont); | ||
1411 | |||
1412 | if (cs == &top_cpuset) | ||
1413 | cpumask_copy(cpus_attach, cpu_possible_mask); | ||
1414 | else | ||
1415 | guarantee_online_cpus(cs, cpus_attach); | ||
1416 | |||
1417 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); | ||
1418 | } | ||
1419 | |||
1420 | /* Per-thread attachment work. */ | ||
1421 | static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk) | ||
1415 | { | 1422 | { |
1416 | int err; | 1423 | int err; |
1424 | struct cpuset *cs = cgroup_cs(cont); | ||
1425 | |||
1417 | /* | 1426 | /* |
1418 | * can_attach beforehand should guarantee that this doesn't fail. | 1427 | * can_attach beforehand should guarantee that this doesn't fail. |
1419 | * TODO: have a better way to handle failure here | 1428 | * TODO: have a better way to handle failure here |
@@ -1421,45 +1430,29 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | |||
1421 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | 1430 | err = set_cpus_allowed_ptr(tsk, cpus_attach); |
1422 | WARN_ON_ONCE(err); | 1431 | WARN_ON_ONCE(err); |
1423 | 1432 | ||
1424 | cpuset_change_task_nodemask(tsk, to); | 1433 | cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to); |
1425 | cpuset_update_task_spread_flag(cs, tsk); | 1434 | cpuset_update_task_spread_flag(cs, tsk); |
1426 | |||
1427 | } | 1435 | } |
1428 | 1436 | ||
1429 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | 1437 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, |
1430 | struct cgroup *oldcont, struct task_struct *tsk, | 1438 | struct cgroup *oldcont, struct task_struct *tsk) |
1431 | bool threadgroup) | ||
1432 | { | 1439 | { |
1433 | struct mm_struct *mm; | 1440 | struct mm_struct *mm; |
1434 | struct cpuset *cs = cgroup_cs(cont); | 1441 | struct cpuset *cs = cgroup_cs(cont); |
1435 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1442 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1436 | static nodemask_t to; /* protected by cgroup_mutex */ | ||
1437 | 1443 | ||
1438 | if (cs == &top_cpuset) { | 1444 | /* |
1439 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1445 | * Change mm, possibly for multiple threads in a threadgroup. This is |
1440 | } else { | 1446 | * expensive and may sleep. |
1441 | guarantee_online_cpus(cs, cpus_attach); | 1447 | */ |
1442 | } | 1448 | cpuset_attach_nodemask_from = oldcs->mems_allowed; |
1443 | guarantee_online_mems(cs, &to); | 1449 | cpuset_attach_nodemask_to = cs->mems_allowed; |
1444 | |||
1445 | /* do per-task migration stuff possibly for each in the threadgroup */ | ||
1446 | cpuset_attach_task(tsk, &to, cs); | ||
1447 | if (threadgroup) { | ||
1448 | struct task_struct *c; | ||
1449 | rcu_read_lock(); | ||
1450 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
1451 | cpuset_attach_task(c, &to, cs); | ||
1452 | } | ||
1453 | rcu_read_unlock(); | ||
1454 | } | ||
1455 | |||
1456 | /* change mm; only needs to be done once even if threadgroup */ | ||
1457 | to = cs->mems_allowed; | ||
1458 | mm = get_task_mm(tsk); | 1450 | mm = get_task_mm(tsk); |
1459 | if (mm) { | 1451 | if (mm) { |
1460 | mpol_rebind_mm(mm, &to); | 1452 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); |
1461 | if (is_memory_migrate(cs)) | 1453 | if (is_memory_migrate(cs)) |
1462 | cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to); | 1454 | cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from, |
1455 | &cpuset_attach_nodemask_to); | ||
1463 | mmput(mm); | 1456 | mmput(mm); |
1464 | } | 1457 | } |
1465 | } | 1458 | } |
@@ -1911,6 +1904,9 @@ struct cgroup_subsys cpuset_subsys = { | |||
1911 | .create = cpuset_create, | 1904 | .create = cpuset_create, |
1912 | .destroy = cpuset_destroy, | 1905 | .destroy = cpuset_destroy, |
1913 | .can_attach = cpuset_can_attach, | 1906 | .can_attach = cpuset_can_attach, |
1907 | .can_attach_task = cpuset_can_attach_task, | ||
1908 | .pre_attach = cpuset_pre_attach, | ||
1909 | .attach_task = cpuset_attach_task, | ||
1914 | .attach = cpuset_attach, | 1910 | .attach = cpuset_attach, |
1915 | .populate = cpuset_populate, | 1911 | .populate = cpuset_populate, |
1916 | .post_clone = cpuset_post_clone, | 1912 | .post_clone = cpuset_post_clone, |