diff options
author | Cliff Wickman <cpw@sgi.com> | 2008-02-07 03:14:43 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 11:42:22 -0500 |
commit | 956db3ca0606e78456786ef19fd4dc7a5151a6e1 (patch) | |
tree | 0bef3d107df1115ecf76e342f30ecee67a7f3705 /kernel/cgroup.c | |
parent | 31a7df01fd0cd786f60873a921aecafac148c290 (diff) |
hotplug cpu: move tasks in empty cpusets to parent
This patch corrects a situation that occurs when one disables all the cpus in
a cpuset.
Currently, the disabled (cpu-less) cpuset inherits the cpus of its parent,
which is incorrect because it may then overlap its cpu-exclusive sibling.
Tasks of an empty cpuset should be moved to the cpuset which is the parent of
their current cpuset. Or if the parent cpuset has no cpus, to its parent,
etc.
And the empty cpuset should be released (if it is flagged notify_on_release).
Depends on the cgroup_scan_tasks() function (proposed by David Rientjes) to
iterate through all tasks in the cpu-less cpuset. We are deliberately
avoiding a walk of the tasklist.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Cliff Wickman <cpw@sgi.com>
Cc: Paul Menage <menage@google.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 22 |
1 files changed, 11 insertions, 11 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index bcc7a6e8e3c..2c5cccbe12e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -489,7 +489,7 @@ static struct css_set *find_css_set( | |||
489 | * Any task can increment and decrement the count field without lock. | 489 | * Any task can increment and decrement the count field without lock. |
490 | * So in general, code holding cgroup_mutex can't rely on the count | 490 | * So in general, code holding cgroup_mutex can't rely on the count |
491 | * field not changing. However, if the count goes to zero, then only | 491 | * field not changing. However, if the count goes to zero, then only |
492 | * attach_task() can increment it again. Because a count of zero | 492 | * cgroup_attach_task() can increment it again. Because a count of zero |
493 | * means that no tasks are currently attached, therefore there is no | 493 | * means that no tasks are currently attached, therefore there is no |
494 | * way a task attached to that cgroup can fork (the other way to | 494 | * way a task attached to that cgroup can fork (the other way to |
495 | * increment the count). So code holding cgroup_mutex can safely | 495 | * increment the count). So code holding cgroup_mutex can safely |
@@ -520,17 +520,17 @@ static struct css_set *find_css_set( | |||
520 | * The task_lock() exception | 520 | * The task_lock() exception |
521 | * | 521 | * |
522 | * The need for this exception arises from the action of | 522 | * The need for this exception arises from the action of |
523 | * attach_task(), which overwrites one tasks cgroup pointer with | 523 | * cgroup_attach_task(), which overwrites one tasks cgroup pointer with |
524 | * another. It does so using cgroup_mutexe, however there are | 524 | * another. It does so using cgroup_mutexe, however there are |
525 | * several performance critical places that need to reference | 525 | * several performance critical places that need to reference |
526 | * task->cgroup without the expense of grabbing a system global | 526 | * task->cgroup without the expense of grabbing a system global |
527 | * mutex. Therefore except as noted below, when dereferencing or, as | 527 | * mutex. Therefore except as noted below, when dereferencing or, as |
528 | * in attach_task(), modifying a task'ss cgroup pointer we use | 528 | * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use |
529 | * task_lock(), which acts on a spinlock (task->alloc_lock) already in | 529 | * task_lock(), which acts on a spinlock (task->alloc_lock) already in |
530 | * the task_struct routinely used for such matters. | 530 | * the task_struct routinely used for such matters. |
531 | * | 531 | * |
532 | * P.S. One more locking exception. RCU is used to guard the | 532 | * P.S. One more locking exception. RCU is used to guard the |
533 | * update of a tasks cgroup pointer by attach_task() | 533 | * update of a tasks cgroup pointer by cgroup_attach_task() |
534 | */ | 534 | */ |
535 | 535 | ||
536 | /** | 536 | /** |
@@ -1194,7 +1194,7 @@ static void get_first_subsys(const struct cgroup *cgrp, | |||
1194 | * Call holding cgroup_mutex. May take task_lock of | 1194 | * Call holding cgroup_mutex. May take task_lock of |
1195 | * the task 'pid' during call. | 1195 | * the task 'pid' during call. |
1196 | */ | 1196 | */ |
1197 | static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) | 1197 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
1198 | { | 1198 | { |
1199 | int retval = 0; | 1199 | int retval = 0; |
1200 | struct cgroup_subsys *ss; | 1200 | struct cgroup_subsys *ss; |
@@ -1287,7 +1287,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf) | |||
1287 | get_task_struct(tsk); | 1287 | get_task_struct(tsk); |
1288 | } | 1288 | } |
1289 | 1289 | ||
1290 | ret = attach_task(cgrp, tsk); | 1290 | ret = cgroup_attach_task(cgrp, tsk); |
1291 | put_task_struct(tsk); | 1291 | put_task_struct(tsk); |
1292 | return ret; | 1292 | return ret; |
1293 | } | 1293 | } |
@@ -2514,7 +2514,7 @@ out: | |||
2514 | * - Used for /proc/<pid>/cgroup. | 2514 | * - Used for /proc/<pid>/cgroup. |
2515 | * - No need to task_lock(tsk) on this tsk->cgroup reference, as it | 2515 | * - No need to task_lock(tsk) on this tsk->cgroup reference, as it |
2516 | * doesn't really matter if tsk->cgroup changes after we read it, | 2516 | * doesn't really matter if tsk->cgroup changes after we read it, |
2517 | * and we take cgroup_mutex, keeping attach_task() from changing it | 2517 | * and we take cgroup_mutex, keeping cgroup_attach_task() from changing it |
2518 | * anyway. No need to check that tsk->cgroup != NULL, thanks to | 2518 | * anyway. No need to check that tsk->cgroup != NULL, thanks to |
2519 | * the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks | 2519 | * the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks |
2520 | * cgroup to top_cgroup. | 2520 | * cgroup to top_cgroup. |
@@ -2625,7 +2625,7 @@ static struct file_operations proc_cgroupstats_operations = { | |||
2625 | * A pointer to the shared css_set was automatically copied in | 2625 | * A pointer to the shared css_set was automatically copied in |
2626 | * fork.c by dup_task_struct(). However, we ignore that copy, since | 2626 | * fork.c by dup_task_struct(). However, we ignore that copy, since |
2627 | * it was not made under the protection of RCU or cgroup_mutex, so | 2627 | * it was not made under the protection of RCU or cgroup_mutex, so |
2628 | * might no longer be a valid cgroup pointer. attach_task() might | 2628 | * might no longer be a valid cgroup pointer. cgroup_attach_task() might |
2629 | * have already changed current->cgroups, allowing the previously | 2629 | * have already changed current->cgroups, allowing the previously |
2630 | * referenced cgroup group to be removed and freed. | 2630 | * referenced cgroup group to be removed and freed. |
2631 | * | 2631 | * |
@@ -2704,8 +2704,8 @@ void cgroup_post_fork(struct task_struct *child) | |||
2704 | * attach us to a different cgroup, decrementing the count on | 2704 | * attach us to a different cgroup, decrementing the count on |
2705 | * the first cgroup that we never incremented. But in this case, | 2705 | * the first cgroup that we never incremented. But in this case, |
2706 | * top_cgroup isn't going away, and either task has PF_EXITING set, | 2706 | * top_cgroup isn't going away, and either task has PF_EXITING set, |
2707 | * which wards off any attach_task() attempts, or task is a failed | 2707 | * which wards off any cgroup_attach_task() attempts, or task is a failed |
2708 | * fork, never visible to attach_task. | 2708 | * fork, never visible to cgroup_attach_task. |
2709 | * | 2709 | * |
2710 | */ | 2710 | */ |
2711 | void cgroup_exit(struct task_struct *tsk, int run_callbacks) | 2711 | void cgroup_exit(struct task_struct *tsk, int run_callbacks) |
@@ -2845,7 +2845,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) | |||
2845 | } | 2845 | } |
2846 | 2846 | ||
2847 | /* All seems fine. Finish by moving the task into the new cgroup */ | 2847 | /* All seems fine. Finish by moving the task into the new cgroup */ |
2848 | ret = attach_task(child, tsk); | 2848 | ret = cgroup_attach_task(child, tsk); |
2849 | mutex_unlock(&cgroup_mutex); | 2849 | mutex_unlock(&cgroup_mutex); |
2850 | 2850 | ||
2851 | out_release: | 2851 | out_release: |