diff options
author | Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> | 2007-11-09 16:39:39 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2007-11-09 16:39:39 -0500 |
commit | 3c90e6e99b08f01d5684a3a07cceae6a543e4fa8 (patch) | |
tree | 2432814cfe4891e4c99945fbe09e6b59d6df49f3 | |
parent | 502d26b524d8980f3ed80d9aec398e85671a8160 (diff) |
sched: fix copy_namespace() <-> sched_fork() dependency in do_fork
Sukadev Bhattiprolu reported a kernel crash with control groups.
There are couple of problems discovered by Suka's test:
- The test requires the cgroup filesystem to be mounted with
atleast the cpu and ns options (i.e both namespace and cpu
controllers are active in the same hierarchy).
# mkdir /dev/cpuctl
# mount -t cgroup -ocpu,ns none cpuctl
(or simply)
# mount -t cgroup none cpuctl -> Will activate all controllers
in same hierarchy.
- The test invokes clone() with CLONE_NEWNS set. This causes a a new child
to be created, also a new group (do_fork->copy_namespaces->ns_cgroup_clone->
cgroup_clone) and the child is attached to the new group (cgroup_clone->
attach_task->sched_move_task). At this point in time, the child's scheduler
related fields are uninitialized (including its on_rq field, which it has
inherited from parent). As a result sched_move_task thinks its on
runqueue, when it isn't.
As a solution to this problem, I moved sched_fork() call, which
initializes scheduler related fields on a new task, before
copy_namespaces(). I am not sure though whether moving up will
cause other side-effects. Do you see any issue?
- The second problem exposed by this test is that task_new_fair()
assumes that parent and child will be part of the same group (which
needn't be as this test shows). As a result, cfs_rq->curr can be NULL
for the child.
The solution is to test for curr pointer being NULL in
task_new_fair().
With the patch below, I could run ns_exec() fine w/o a crash.
Reported-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/fork.c | 6 | ||||
-rw-r--r-- | kernel/sched_fair.c | 3 |
2 files changed, 5 insertions, 4 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 28a740151988..8ca1a14cdc8c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1123,6 +1123,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1123 | p->blocked_on = NULL; /* not blocked yet */ | 1123 | p->blocked_on = NULL; /* not blocked yet */ |
1124 | #endif | 1124 | #endif |
1125 | 1125 | ||
1126 | /* Perform scheduler related setup. Assign this task to a CPU. */ | ||
1127 | sched_fork(p, clone_flags); | ||
1128 | |||
1126 | if ((retval = security_task_alloc(p))) | 1129 | if ((retval = security_task_alloc(p))) |
1127 | goto bad_fork_cleanup_policy; | 1130 | goto bad_fork_cleanup_policy; |
1128 | if ((retval = audit_alloc(p))) | 1131 | if ((retval = audit_alloc(p))) |
@@ -1212,9 +1215,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1212 | INIT_LIST_HEAD(&p->ptrace_children); | 1215 | INIT_LIST_HEAD(&p->ptrace_children); |
1213 | INIT_LIST_HEAD(&p->ptrace_list); | 1216 | INIT_LIST_HEAD(&p->ptrace_list); |
1214 | 1217 | ||
1215 | /* Perform scheduler related setup. Assign this task to a CPU. */ | ||
1216 | sched_fork(p, clone_flags); | ||
1217 | |||
1218 | /* Now that the task is set up, run cgroup callbacks if | 1218 | /* Now that the task is set up, run cgroup callbacks if |
1219 | * necessary. We need to run them before the task is visible | 1219 | * necessary. We need to run them before the task is visible |
1220 | * on the tasklist. */ | 1220 | * on the tasklist. */ |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6c361472cc74..d3c03070872d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1067,8 +1067,9 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
1067 | update_curr(cfs_rq); | 1067 | update_curr(cfs_rq); |
1068 | place_entity(cfs_rq, se, 1); | 1068 | place_entity(cfs_rq, se, 1); |
1069 | 1069 | ||
1070 | /* 'curr' will be NULL if the child belongs to a different group */ | ||
1070 | if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && | 1071 | if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && |
1071 | curr->vruntime < se->vruntime) { | 1072 | curr && curr->vruntime < se->vruntime) { |
1072 | /* | 1073 | /* |
1073 | * Upon rescheduling, sched_class::put_prev_task() will place | 1074 | * Upon rescheduling, sched_class::put_prev_task() will place |
1074 | * 'current' within the tree based on its new key value. | 1075 | * 'current' within the tree based on its new key value. |