aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorCliff Wickman <cpw@sgi.com>2008-02-07 03:14:43 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 11:42:22 -0500
commit956db3ca0606e78456786ef19fd4dc7a5151a6e1 (patch)
tree0bef3d107df1115ecf76e342f30ecee67a7f3705 /kernel/cgroup.c
parent31a7df01fd0cd786f60873a921aecafac148c290 (diff)
hotplug cpu: move tasks in empty cpusets to parent
This patch corrects a situation that occurs when one disables all the cpus in a cpuset. Currently, the disabled (cpu-less) cpuset inherits the cpus of its parent, which is incorrect because it may then overlap its cpu-exclusive sibling. Tasks of an empty cpuset should be moved to the cpuset which is the parent of their current cpuset. Or if the parent cpuset has no cpus, to its parent, etc. And the empty cpuset should be released (if it is flagged notify_on_release). Depends on the cgroup_scan_tasks() function (proposed by David Rientjes) to iterate through all tasks in the cpu-less cpuset. We are deliberately avoiding a walk of the tasklist. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Cliff Wickman <cpw@sgi.com> Cc: Paul Menage <menage@google.com> Cc: Paul Jackson <pj@sgi.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c22
1 files changed, 11 insertions, 11 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index bcc7a6e8e3c0..2c5cccbe12e2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -489,7 +489,7 @@ static struct css_set *find_css_set(
489 * Any task can increment and decrement the count field without lock. 489 * Any task can increment and decrement the count field without lock.
490 * So in general, code holding cgroup_mutex can't rely on the count 490 * So in general, code holding cgroup_mutex can't rely on the count
491 * field not changing. However, if the count goes to zero, then only 491 * field not changing. However, if the count goes to zero, then only
492 * attach_task() can increment it again. Because a count of zero 492 * cgroup_attach_task() can increment it again. Because a count of zero
493 * means that no tasks are currently attached, therefore there is no 493 * means that no tasks are currently attached, therefore there is no
494 * way a task attached to that cgroup can fork (the other way to 494 * way a task attached to that cgroup can fork (the other way to
495 * increment the count). So code holding cgroup_mutex can safely 495 * increment the count). So code holding cgroup_mutex can safely
@@ -520,17 +520,17 @@ static struct css_set *find_css_set(
520 * The task_lock() exception 520 * The task_lock() exception
521 * 521 *
522 * The need for this exception arises from the action of 522 * The need for this exception arises from the action of
523 * attach_task(), which overwrites one tasks cgroup pointer with 523 * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
524 * another. It does so using cgroup_mutexe, however there are 524 * another. It does so using cgroup_mutexe, however there are
525 * several performance critical places that need to reference 525 * several performance critical places that need to reference
526 * task->cgroup without the expense of grabbing a system global 526 * task->cgroup without the expense of grabbing a system global
527 * mutex. Therefore except as noted below, when dereferencing or, as 527 * mutex. Therefore except as noted below, when dereferencing or, as
528 * in attach_task(), modifying a task'ss cgroup pointer we use 528 * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
529 * task_lock(), which acts on a spinlock (task->alloc_lock) already in 529 * task_lock(), which acts on a spinlock (task->alloc_lock) already in
530 * the task_struct routinely used for such matters. 530 * the task_struct routinely used for such matters.
531 * 531 *
532 * P.S. One more locking exception. RCU is used to guard the 532 * P.S. One more locking exception. RCU is used to guard the
533 * update of a tasks cgroup pointer by attach_task() 533 * update of a tasks cgroup pointer by cgroup_attach_task()
534 */ 534 */
535 535
536/** 536/**
@@ -1194,7 +1194,7 @@ static void get_first_subsys(const struct cgroup *cgrp,
1194 * Call holding cgroup_mutex. May take task_lock of 1194 * Call holding cgroup_mutex. May take task_lock of
1195 * the task 'pid' during call. 1195 * the task 'pid' during call.
1196 */ 1196 */
1197static int attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1197int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1198{ 1198{
1199 int retval = 0; 1199 int retval = 0;
1200 struct cgroup_subsys *ss; 1200 struct cgroup_subsys *ss;
@@ -1287,7 +1287,7 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
1287 get_task_struct(tsk); 1287 get_task_struct(tsk);
1288 } 1288 }
1289 1289
1290 ret = attach_task(cgrp, tsk); 1290 ret = cgroup_attach_task(cgrp, tsk);
1291 put_task_struct(tsk); 1291 put_task_struct(tsk);
1292 return ret; 1292 return ret;
1293} 1293}
@@ -2514,7 +2514,7 @@ out:
2514 * - Used for /proc/<pid>/cgroup. 2514 * - Used for /proc/<pid>/cgroup.
2515 * - No need to task_lock(tsk) on this tsk->cgroup reference, as it 2515 * - No need to task_lock(tsk) on this tsk->cgroup reference, as it
2516 * doesn't really matter if tsk->cgroup changes after we read it, 2516 * doesn't really matter if tsk->cgroup changes after we read it,
2517 * and we take cgroup_mutex, keeping attach_task() from changing it 2517 * and we take cgroup_mutex, keeping cgroup_attach_task() from changing it
2518 * anyway. No need to check that tsk->cgroup != NULL, thanks to 2518 * anyway. No need to check that tsk->cgroup != NULL, thanks to
2519 * the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks 2519 * the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
2520 * cgroup to top_cgroup. 2520 * cgroup to top_cgroup.
@@ -2625,7 +2625,7 @@ static struct file_operations proc_cgroupstats_operations = {
2625 * A pointer to the shared css_set was automatically copied in 2625 * A pointer to the shared css_set was automatically copied in
2626 * fork.c by dup_task_struct(). However, we ignore that copy, since 2626 * fork.c by dup_task_struct(). However, we ignore that copy, since
2627 * it was not made under the protection of RCU or cgroup_mutex, so 2627 * it was not made under the protection of RCU or cgroup_mutex, so
2628 * might no longer be a valid cgroup pointer. attach_task() might 2628 * might no longer be a valid cgroup pointer. cgroup_attach_task() might
2629 * have already changed current->cgroups, allowing the previously 2629 * have already changed current->cgroups, allowing the previously
2630 * referenced cgroup group to be removed and freed. 2630 * referenced cgroup group to be removed and freed.
2631 * 2631 *
@@ -2704,8 +2704,8 @@ void cgroup_post_fork(struct task_struct *child)
2704 * attach us to a different cgroup, decrementing the count on 2704 * attach us to a different cgroup, decrementing the count on
2705 * the first cgroup that we never incremented. But in this case, 2705 * the first cgroup that we never incremented. But in this case,
2706 * top_cgroup isn't going away, and either task has PF_EXITING set, 2706 * top_cgroup isn't going away, and either task has PF_EXITING set,
2707 * which wards off any attach_task() attempts, or task is a failed 2707 * which wards off any cgroup_attach_task() attempts, or task is a failed
2708 * fork, never visible to attach_task. 2708 * fork, never visible to cgroup_attach_task.
2709 * 2709 *
2710 */ 2710 */
2711void cgroup_exit(struct task_struct *tsk, int run_callbacks) 2711void cgroup_exit(struct task_struct *tsk, int run_callbacks)
@@ -2845,7 +2845,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
2845 } 2845 }
2846 2846
2847 /* All seems fine. Finish by moving the task into the new cgroup */ 2847 /* All seems fine. Finish by moving the task into the new cgroup */
2848 ret = attach_task(child, tsk); 2848 ret = cgroup_attach_task(child, tsk);
2849 mutex_unlock(&cgroup_mutex); 2849 mutex_unlock(&cgroup_mutex);
2850 2850
2851 out_release: 2851 out_release: